]> gcc.gnu.org Git - gcc.git/blob - gcc/config/rs6000/rs6000.cc
c++: trait as typename scope [PR116052]
[gcc.git] / gcc / config / rs6000 / rs6000.cc
1 // SPDX-License-Identifier: GPL-3.0-or-later
2 /* Subroutines used for code generation on IBM RS/6000.
3 Copyright (C) 1991-2024 Free Software Foundation, Inc.
4 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published
10 by the Free Software Foundation; either version 3, or (at your
11 option) any later version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
16 License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #define IN_TARGET_CODE 1
23
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "backend.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "memmodel.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "cfgloop.h"
34 #include "df.h"
35 #include "tm_p.h"
36 #include "stringpool.h"
37 #include "expmed.h"
38 #include "optabs.h"
39 #include "regs.h"
40 #include "ira.h"
41 #include "recog.h"
42 #include "cgraph.h"
43 #include "diagnostic-core.h"
44 #include "insn-attr.h"
45 #include "flags.h"
46 #include "alias.h"
47 #include "fold-const.h"
48 #include "attribs.h"
49 #include "stor-layout.h"
50 #include "calls.h"
51 #include "print-tree.h"
52 #include "varasm.h"
53 #include "explow.h"
54 #include "expr.h"
55 #include "output.h"
56 #include "common/common-target.h"
57 #include "langhooks.h"
58 #include "reload.h"
59 #include "sched-int.h"
60 #include "gimplify.h"
61 #include "gimple-iterator.h"
62 #include "gimple-fold.h"
63 #include "gimple-walk.h"
64 #include "ssa.h"
65 #include "tree-vectorizer.h"
66 #include "tree-ssa-propagate.h"
67 #include "intl.h"
68 #include "tm-constrs.h"
69 #include "target-globals.h"
70 #include "builtins.h"
71 #include "tree-vector-builder.h"
72 #include "context.h"
73 #include "tree-pass.h"
74 #include "symbol-summary.h"
75 #include "sreal.h"
76 #include "ipa-cp.h"
77 #include "ipa-prop.h"
78 #include "ipa-fnsummary.h"
79 #include "except.h"
80 #include "case-cfn-macros.h"
81 #include "ppc-auxv.h"
82 #include "rs6000-internal.h"
83 #include "opts.h"
84
85 /* This file should be included last. */
86 #include "target-def.h"
87
88 extern tree rs6000_builtin_mask_for_load (void);
89 extern tree rs6000_builtin_md_vectorized_function (tree, tree, tree);
90 extern tree rs6000_builtin_reciprocal (tree);
91
92 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
93 systems will also set long double to be IEEE 128-bit. AIX and Darwin
94 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
95 those systems will not pick up this default. This needs to be after all
96 of the include files, so that POWERPC_LINUX and POWERPC_FREEBSD are
97 properly defined. */
98 #ifndef TARGET_IEEEQUAD_DEFAULT
99 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
100 #define TARGET_IEEEQUAD_DEFAULT 1
101 #else
102 #define TARGET_IEEEQUAD_DEFAULT 0
103 #endif
104 #endif
105
106 /* Don't enable PC-relative addressing if the target does not support it. */
107 #ifndef PCREL_SUPPORTED_BY_OS
108 #define PCREL_SUPPORTED_BY_OS 0
109 #endif
110
111 #ifdef USING_ELFOS_H
112 /* Counter for labels which are to be placed in .fixup. */
113 int fixuplabelno = 0;
114 #endif
115
116 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
117 int dot_symbols;
118
119 /* Specify the machine mode that pointers have. After generation of rtl, the
120 compiler makes no further distinction between pointers and any other objects
121 of this machine mode. */
122 scalar_int_mode rs6000_pmode;
123
124 /* Track use of r13 in 64bit AIX TLS. */
125 static bool xcoff_tls_exec_model_detected = false;
126
127 /* Width in bits of a pointer. */
128 unsigned rs6000_pointer_size;
129
130 #ifdef HAVE_AS_GNU_ATTRIBUTE
131 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
132 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
133 # endif
134 /* Flag whether floating point values have been passed/returned.
135 Note that this doesn't say whether fprs are used, since the
136 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
137 should be set for soft-float values passed in gprs and ieee128
138 values passed in vsx registers. */
139 bool rs6000_passes_float = false;
140 bool rs6000_passes_long_double = false;
141 /* Flag whether vector values have been passed/returned. */
142 bool rs6000_passes_vector = false;
143 /* Flag whether small (<= 8 byte) structures have been returned. */
144 bool rs6000_returns_struct = false;
145 #endif
146
147 /* Value is TRUE if register/mode pair is acceptable. */
148 static bool rs6000_hard_regno_mode_ok_p
149 [NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
150
151 /* Maximum number of registers needed for a given register class and mode. */
152 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
153
154 /* How many registers are needed for a given register and mode. */
155 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
156
157 /* Map register number to register class. */
158 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
159
160 static int dbg_cost_ctrl;
161
162 /* Flag to say the TOC is initialized */
163 int toc_initialized, need_toc_init;
164 char toc_label_name[10];
165
166 /* Cached value of rs6000_variable_issue. This is cached in
167 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
168 static short cached_can_issue_more;
169
170 static GTY(()) section *read_only_data_section;
171 static GTY(()) section *private_data_section;
172 static GTY(()) section *tls_data_section;
173 static GTY(()) section *tls_private_data_section;
174 static GTY(()) section *read_only_private_data_section;
175 static GTY(()) section *sdata2_section;
176
177 section *toc_section = 0;
178
179 /* Describe the vector unit used for modes. */
180 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
181 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
182
183 /* Register classes for various constraints that are based on the target
184 switches. */
185 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
186
187 /* Describe the alignment of a vector. */
188 int rs6000_vector_align[NUM_MACHINE_MODES];
189
190 /* What modes to automatically generate reciprocal divide estimate (fre) and
191 reciprocal sqrt (frsqrte) for. */
192 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
193
194 /* Masks to determine which reciprocal esitmate instructions to generate
195 automatically. */
196 enum rs6000_recip_mask {
197 RECIP_SF_DIV = 0x001, /* Use divide estimate */
198 RECIP_DF_DIV = 0x002,
199 RECIP_V4SF_DIV = 0x004,
200 RECIP_V2DF_DIV = 0x008,
201
202 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
203 RECIP_DF_RSQRT = 0x020,
204 RECIP_V4SF_RSQRT = 0x040,
205 RECIP_V2DF_RSQRT = 0x080,
206
207 /* Various combination of flags for -mrecip=xxx. */
208 RECIP_NONE = 0,
209 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
210 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
211 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
212
213 RECIP_HIGH_PRECISION = RECIP_ALL,
214
215 /* On low precision machines like the power5, don't enable double precision
216 reciprocal square root estimate, since it isn't accurate enough. */
217 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
218 };
219
220 /* -mrecip options. */
221 static struct
222 {
223 const char *string; /* option name */
224 unsigned int mask; /* mask bits to set */
225 } recip_options[] = {
226 { "all", RECIP_ALL },
227 { "none", RECIP_NONE },
228 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
229 | RECIP_V2DF_DIV) },
230 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
231 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
232 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
233 | RECIP_V2DF_RSQRT) },
234 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
235 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
236 };
237
238 /* On PowerPC, we have a limited number of target clones that we care about
239 which means we can use an array to hold the options, rather than having more
240 elaborate data structures to identify each possible variation. Order the
241 clones from the default to the highest ISA. */
242 enum {
243 CLONE_DEFAULT = 0, /* default clone. */
244 CLONE_ISA_2_05, /* ISA 2.05 (power6). */
245 CLONE_ISA_2_06, /* ISA 2.06 (power7). */
246 CLONE_ISA_2_07, /* ISA 2.07 (power8). */
247 CLONE_ISA_3_00, /* ISA 3.0 (power9). */
248 CLONE_ISA_3_1, /* ISA 3.1 (power10). */
249 CLONE_MAX
250 };
251
252 /* Map compiler ISA bits into HWCAP names. */
253 struct clone_map {
254 HOST_WIDE_INT isa_mask; /* rs6000_isa mask */
255 const char *name; /* name to use in __builtin_cpu_supports. */
256 };
257
258 static const struct clone_map rs6000_clone_map[CLONE_MAX] = {
259 { 0, "" }, /* Default options. */
260 { OPTION_MASK_CMPB, "arch_2_05" }, /* ISA 2.05 (power6). */
261 { OPTION_MASK_POPCNTD, "arch_2_06" }, /* ISA 2.06 (power7). */
262 { OPTION_MASK_P8_VECTOR, "arch_2_07" }, /* ISA 2.07 (power8). */
263 { OPTION_MASK_P9_VECTOR, "arch_3_00" }, /* ISA 3.0 (power9). */
264 { OPTION_MASK_POWER10, "arch_3_1" }, /* ISA 3.1 (power10). */
265 };
266
267
268 /* Newer LIBCs explicitly export this symbol to declare that they provide
269 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
270 reference to this symbol whenever we expand a CPU builtin, so that
271 we never link against an old LIBC. */
272 const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform";
273
274 /* True if we have expanded a CPU builtin. */
275 bool cpu_builtin_p = false;
276
277 /* Pointer to function (in rs6000-c.cc) that can define or undefine target
278 macros that have changed. Languages that don't support the preprocessor
279 don't link in rs6000-c.cc, so we can't call it directly. */
280 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT);
281
282 /* Simplfy register classes into simpler classifications. We assume
283 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
284 check for standard register classes (gpr/floating/altivec/vsx) and
285 floating/vector classes (float/altivec/vsx). */
286
287 enum rs6000_reg_type {
288 NO_REG_TYPE,
289 PSEUDO_REG_TYPE,
290 GPR_REG_TYPE,
291 VSX_REG_TYPE,
292 ALTIVEC_REG_TYPE,
293 FPR_REG_TYPE,
294 SPR_REG_TYPE,
295 CR_REG_TYPE
296 };
297
298 /* Map register class to register type. */
299 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
300
301 /* First/last register type for the 'normal' register types (i.e. general
302 purpose, floating point, altivec, and VSX registers). */
303 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
304
305 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
306
307
308 /* Register classes we care about in secondary reload or go if legitimate
309 address. We only need to worry about GPR, FPR, and Altivec registers here,
310 along an ANY field that is the OR of the 3 register classes. */
311
312 enum rs6000_reload_reg_type {
313 RELOAD_REG_GPR, /* General purpose registers. */
314 RELOAD_REG_FPR, /* Traditional floating point regs. */
315 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
316 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
317 N_RELOAD_REG
318 };
319
320 /* For setting up register classes, loop through the 3 register classes mapping
321 into real registers, and skip the ANY class, which is just an OR of the
322 bits. */
323 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
324 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
325
326 /* Map reload register type to a register in the register class. */
327 struct reload_reg_map_type {
328 const char *name; /* Register class name. */
329 int reg; /* Register in the register class. */
330 };
331
332 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
333 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
334 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
335 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
336 { "Any", -1 }, /* RELOAD_REG_ANY. */
337 };
338
339 /* Mask bits for each register class, indexed per mode. Historically the
340 compiler has been more restrictive which types can do PRE_MODIFY instead of
341 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
342 typedef unsigned char addr_mask_type;
343
344 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
345 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
346 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
347 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
348 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
349 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
350 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
351 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
352
353 /* Register type masks based on the type, of valid addressing modes. */
354 struct rs6000_reg_addr {
355 enum insn_code reload_load; /* INSN to reload for loading. */
356 enum insn_code reload_store; /* INSN to reload for storing. */
357 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
358 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
359 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
360 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
361 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
362 };
363
364 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
365
366 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
367 static inline bool
368 mode_supports_pre_incdec_p (machine_mode mode)
369 {
370 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
371 != 0);
372 }
373
374 /* Helper function to say whether a mode supports PRE_MODIFY. */
375 static inline bool
376 mode_supports_pre_modify_p (machine_mode mode)
377 {
378 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
379 != 0);
380 }
381
382 /* Return true if we have D-form addressing in altivec registers. */
383 static inline bool
384 mode_supports_vmx_dform (machine_mode mode)
385 {
386 return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0);
387 }
388
389 /* Return true if we have D-form addressing in VSX registers. This addressing
390 is more limited than normal d-form addressing in that the offset must be
391 aligned on a 16-byte boundary. */
392 static inline bool
393 mode_supports_dq_form (machine_mode mode)
394 {
395 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET)
396 != 0);
397 }
398
399 /* Given that there exists at least one variable that is set (produced)
400 by OUT_INSN and read (consumed) by IN_INSN, return true iff
401 IN_INSN represents one or more memory store operations and none of
402 the variables set by OUT_INSN is used by IN_INSN as the address of a
403 store operation. If either IN_INSN or OUT_INSN does not represent
404 a "single" RTL SET expression (as loosely defined by the
405 implementation of the single_set function) or a PARALLEL with only
406 SETs, CLOBBERs, and USEs inside, this function returns false.
407
408 This rs6000-specific version of store_data_bypass_p checks for
409 certain conditions that result in assertion failures (and internal
410 compiler errors) in the generic store_data_bypass_p function and
411 returns false rather than calling store_data_bypass_p if one of the
412 problematic conditions is detected. */
413
414 int
415 rs6000_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
416 {
417 rtx out_set, in_set;
418 rtx out_pat, in_pat;
419 rtx out_exp, in_exp;
420 int i, j;
421
422 in_set = single_set (in_insn);
423 if (in_set)
424 {
425 if (MEM_P (SET_DEST (in_set)))
426 {
427 out_set = single_set (out_insn);
428 if (!out_set)
429 {
430 out_pat = PATTERN (out_insn);
431 if (GET_CODE (out_pat) == PARALLEL)
432 {
433 for (i = 0; i < XVECLEN (out_pat, 0); i++)
434 {
435 out_exp = XVECEXP (out_pat, 0, i);
436 if ((GET_CODE (out_exp) == CLOBBER)
437 || (GET_CODE (out_exp) == USE))
438 continue;
439 else if (GET_CODE (out_exp) != SET)
440 return false;
441 }
442 }
443 }
444 }
445 }
446 else
447 {
448 in_pat = PATTERN (in_insn);
449 if (GET_CODE (in_pat) != PARALLEL)
450 return false;
451
452 for (i = 0; i < XVECLEN (in_pat, 0); i++)
453 {
454 in_exp = XVECEXP (in_pat, 0, i);
455 if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE))
456 continue;
457 else if (GET_CODE (in_exp) != SET)
458 return false;
459
460 if (MEM_P (SET_DEST (in_exp)))
461 {
462 out_set = single_set (out_insn);
463 if (!out_set)
464 {
465 out_pat = PATTERN (out_insn);
466 if (GET_CODE (out_pat) != PARALLEL)
467 return false;
468 for (j = 0; j < XVECLEN (out_pat, 0); j++)
469 {
470 out_exp = XVECEXP (out_pat, 0, j);
471 if ((GET_CODE (out_exp) == CLOBBER)
472 || (GET_CODE (out_exp) == USE))
473 continue;
474 else if (GET_CODE (out_exp) != SET)
475 return false;
476 }
477 }
478 }
479 }
480 }
481 return store_data_bypass_p (out_insn, in_insn);
482 }
483
484 \f
485 /* Processor costs (relative to an add) */
486
487 const struct processor_costs *rs6000_cost;
488
489 /* Instruction size costs on 32bit processors. */
490 static const
491 struct processor_costs size32_cost = {
492 COSTS_N_INSNS (1), /* mulsi */
493 COSTS_N_INSNS (1), /* mulsi_const */
494 COSTS_N_INSNS (1), /* mulsi_const9 */
495 COSTS_N_INSNS (1), /* muldi */
496 COSTS_N_INSNS (1), /* divsi */
497 COSTS_N_INSNS (1), /* divdi */
498 COSTS_N_INSNS (1), /* fp */
499 COSTS_N_INSNS (1), /* dmul */
500 COSTS_N_INSNS (1), /* sdiv */
501 COSTS_N_INSNS (1), /* ddiv */
502 32, /* cache line size */
503 0, /* l1 cache */
504 0, /* l2 cache */
505 0, /* streams */
506 0, /* SF->DF convert */
507 };
508
509 /* Instruction size costs on 64bit processors. */
510 static const
511 struct processor_costs size64_cost = {
512 COSTS_N_INSNS (1), /* mulsi */
513 COSTS_N_INSNS (1), /* mulsi_const */
514 COSTS_N_INSNS (1), /* mulsi_const9 */
515 COSTS_N_INSNS (1), /* muldi */
516 COSTS_N_INSNS (1), /* divsi */
517 COSTS_N_INSNS (1), /* divdi */
518 COSTS_N_INSNS (1), /* fp */
519 COSTS_N_INSNS (1), /* dmul */
520 COSTS_N_INSNS (1), /* sdiv */
521 COSTS_N_INSNS (1), /* ddiv */
522 128, /* cache line size */
523 0, /* l1 cache */
524 0, /* l2 cache */
525 0, /* streams */
526 0, /* SF->DF convert */
527 };
528
529 /* Instruction costs on RS64A processors. */
530 static const
531 struct processor_costs rs64a_cost = {
532 COSTS_N_INSNS (20), /* mulsi */
533 COSTS_N_INSNS (12), /* mulsi_const */
534 COSTS_N_INSNS (8), /* mulsi_const9 */
535 COSTS_N_INSNS (34), /* muldi */
536 COSTS_N_INSNS (65), /* divsi */
537 COSTS_N_INSNS (67), /* divdi */
538 COSTS_N_INSNS (4), /* fp */
539 COSTS_N_INSNS (4), /* dmul */
540 COSTS_N_INSNS (31), /* sdiv */
541 COSTS_N_INSNS (31), /* ddiv */
542 128, /* cache line size */
543 128, /* l1 cache */
544 2048, /* l2 cache */
545 1, /* streams */
546 0, /* SF->DF convert */
547 };
548
549 /* Instruction costs on MPCCORE processors. */
550 static const
551 struct processor_costs mpccore_cost = {
552 COSTS_N_INSNS (2), /* mulsi */
553 COSTS_N_INSNS (2), /* mulsi_const */
554 COSTS_N_INSNS (2), /* mulsi_const9 */
555 COSTS_N_INSNS (2), /* muldi */
556 COSTS_N_INSNS (6), /* divsi */
557 COSTS_N_INSNS (6), /* divdi */
558 COSTS_N_INSNS (4), /* fp */
559 COSTS_N_INSNS (5), /* dmul */
560 COSTS_N_INSNS (10), /* sdiv */
561 COSTS_N_INSNS (17), /* ddiv */
562 32, /* cache line size */
563 4, /* l1 cache */
564 16, /* l2 cache */
565 1, /* streams */
566 0, /* SF->DF convert */
567 };
568
569 /* Instruction costs on PPC403 processors. */
570 static const
571 struct processor_costs ppc403_cost = {
572 COSTS_N_INSNS (4), /* mulsi */
573 COSTS_N_INSNS (4), /* mulsi_const */
574 COSTS_N_INSNS (4), /* mulsi_const9 */
575 COSTS_N_INSNS (4), /* muldi */
576 COSTS_N_INSNS (33), /* divsi */
577 COSTS_N_INSNS (33), /* divdi */
578 COSTS_N_INSNS (11), /* fp */
579 COSTS_N_INSNS (11), /* dmul */
580 COSTS_N_INSNS (11), /* sdiv */
581 COSTS_N_INSNS (11), /* ddiv */
582 32, /* cache line size */
583 4, /* l1 cache */
584 16, /* l2 cache */
585 1, /* streams */
586 0, /* SF->DF convert */
587 };
588
589 /* Instruction costs on PPC405 processors. */
590 static const
591 struct processor_costs ppc405_cost = {
592 COSTS_N_INSNS (5), /* mulsi */
593 COSTS_N_INSNS (4), /* mulsi_const */
594 COSTS_N_INSNS (3), /* mulsi_const9 */
595 COSTS_N_INSNS (5), /* muldi */
596 COSTS_N_INSNS (35), /* divsi */
597 COSTS_N_INSNS (35), /* divdi */
598 COSTS_N_INSNS (11), /* fp */
599 COSTS_N_INSNS (11), /* dmul */
600 COSTS_N_INSNS (11), /* sdiv */
601 COSTS_N_INSNS (11), /* ddiv */
602 32, /* cache line size */
603 16, /* l1 cache */
604 128, /* l2 cache */
605 1, /* streams */
606 0, /* SF->DF convert */
607 };
608
609 /* Instruction costs on PPC440 processors. */
610 static const
611 struct processor_costs ppc440_cost = {
612 COSTS_N_INSNS (3), /* mulsi */
613 COSTS_N_INSNS (2), /* mulsi_const */
614 COSTS_N_INSNS (2), /* mulsi_const9 */
615 COSTS_N_INSNS (3), /* muldi */
616 COSTS_N_INSNS (34), /* divsi */
617 COSTS_N_INSNS (34), /* divdi */
618 COSTS_N_INSNS (5), /* fp */
619 COSTS_N_INSNS (5), /* dmul */
620 COSTS_N_INSNS (19), /* sdiv */
621 COSTS_N_INSNS (33), /* ddiv */
622 32, /* cache line size */
623 32, /* l1 cache */
624 256, /* l2 cache */
625 1, /* streams */
626 0, /* SF->DF convert */
627 };
628
629 /* Instruction costs on PPC476 processors. */
630 static const
631 struct processor_costs ppc476_cost = {
632 COSTS_N_INSNS (4), /* mulsi */
633 COSTS_N_INSNS (4), /* mulsi_const */
634 COSTS_N_INSNS (4), /* mulsi_const9 */
635 COSTS_N_INSNS (4), /* muldi */
636 COSTS_N_INSNS (11), /* divsi */
637 COSTS_N_INSNS (11), /* divdi */
638 COSTS_N_INSNS (6), /* fp */
639 COSTS_N_INSNS (6), /* dmul */
640 COSTS_N_INSNS (19), /* sdiv */
641 COSTS_N_INSNS (33), /* ddiv */
642 32, /* l1 cache line size */
643 32, /* l1 cache */
644 512, /* l2 cache */
645 1, /* streams */
646 0, /* SF->DF convert */
647 };
648
649 /* Instruction costs on PPC601 processors. */
650 static const
651 struct processor_costs ppc601_cost = {
652 COSTS_N_INSNS (5), /* mulsi */
653 COSTS_N_INSNS (5), /* mulsi_const */
654 COSTS_N_INSNS (5), /* mulsi_const9 */
655 COSTS_N_INSNS (5), /* muldi */
656 COSTS_N_INSNS (36), /* divsi */
657 COSTS_N_INSNS (36), /* divdi */
658 COSTS_N_INSNS (4), /* fp */
659 COSTS_N_INSNS (5), /* dmul */
660 COSTS_N_INSNS (17), /* sdiv */
661 COSTS_N_INSNS (31), /* ddiv */
662 32, /* cache line size */
663 32, /* l1 cache */
664 256, /* l2 cache */
665 1, /* streams */
666 0, /* SF->DF convert */
667 };
668
669 /* Instruction costs on PPC603 processors. */
670 static const
671 struct processor_costs ppc603_cost = {
672 COSTS_N_INSNS (5), /* mulsi */
673 COSTS_N_INSNS (3), /* mulsi_const */
674 COSTS_N_INSNS (2), /* mulsi_const9 */
675 COSTS_N_INSNS (5), /* muldi */
676 COSTS_N_INSNS (37), /* divsi */
677 COSTS_N_INSNS (37), /* divdi */
678 COSTS_N_INSNS (3), /* fp */
679 COSTS_N_INSNS (4), /* dmul */
680 COSTS_N_INSNS (18), /* sdiv */
681 COSTS_N_INSNS (33), /* ddiv */
682 32, /* cache line size */
683 8, /* l1 cache */
684 64, /* l2 cache */
685 1, /* streams */
686 0, /* SF->DF convert */
687 };
688
689 /* Instruction costs on PPC604 processors. */
690 static const
691 struct processor_costs ppc604_cost = {
692 COSTS_N_INSNS (4), /* mulsi */
693 COSTS_N_INSNS (4), /* mulsi_const */
694 COSTS_N_INSNS (4), /* mulsi_const9 */
695 COSTS_N_INSNS (4), /* muldi */
696 COSTS_N_INSNS (20), /* divsi */
697 COSTS_N_INSNS (20), /* divdi */
698 COSTS_N_INSNS (3), /* fp */
699 COSTS_N_INSNS (3), /* dmul */
700 COSTS_N_INSNS (18), /* sdiv */
701 COSTS_N_INSNS (32), /* ddiv */
702 32, /* cache line size */
703 16, /* l1 cache */
704 512, /* l2 cache */
705 1, /* streams */
706 0, /* SF->DF convert */
707 };
708
709 /* Instruction costs on PPC604e processors. */
710 static const
711 struct processor_costs ppc604e_cost = {
712 COSTS_N_INSNS (2), /* mulsi */
713 COSTS_N_INSNS (2), /* mulsi_const */
714 COSTS_N_INSNS (2), /* mulsi_const9 */
715 COSTS_N_INSNS (2), /* muldi */
716 COSTS_N_INSNS (20), /* divsi */
717 COSTS_N_INSNS (20), /* divdi */
718 COSTS_N_INSNS (3), /* fp */
719 COSTS_N_INSNS (3), /* dmul */
720 COSTS_N_INSNS (18), /* sdiv */
721 COSTS_N_INSNS (32), /* ddiv */
722 32, /* cache line size */
723 32, /* l1 cache */
724 1024, /* l2 cache */
725 1, /* streams */
726 0, /* SF->DF convert */
727 };
728
729 /* Instruction costs on PPC620 processors. */
730 static const
731 struct processor_costs ppc620_cost = {
732 COSTS_N_INSNS (5), /* mulsi */
733 COSTS_N_INSNS (4), /* mulsi_const */
734 COSTS_N_INSNS (3), /* mulsi_const9 */
735 COSTS_N_INSNS (7), /* muldi */
736 COSTS_N_INSNS (21), /* divsi */
737 COSTS_N_INSNS (37), /* divdi */
738 COSTS_N_INSNS (3), /* fp */
739 COSTS_N_INSNS (3), /* dmul */
740 COSTS_N_INSNS (18), /* sdiv */
741 COSTS_N_INSNS (32), /* ddiv */
742 128, /* cache line size */
743 32, /* l1 cache */
744 1024, /* l2 cache */
745 1, /* streams */
746 0, /* SF->DF convert */
747 };
748
749 /* Instruction costs on PPC630 processors. */
750 static const
751 struct processor_costs ppc630_cost = {
752 COSTS_N_INSNS (5), /* mulsi */
753 COSTS_N_INSNS (4), /* mulsi_const */
754 COSTS_N_INSNS (3), /* mulsi_const9 */
755 COSTS_N_INSNS (7), /* muldi */
756 COSTS_N_INSNS (21), /* divsi */
757 COSTS_N_INSNS (37), /* divdi */
758 COSTS_N_INSNS (3), /* fp */
759 COSTS_N_INSNS (3), /* dmul */
760 COSTS_N_INSNS (17), /* sdiv */
761 COSTS_N_INSNS (21), /* ddiv */
762 128, /* cache line size */
763 64, /* l1 cache */
764 1024, /* l2 cache */
765 1, /* streams */
766 0, /* SF->DF convert */
767 };
768
769 /* Instruction costs on Cell processor. */
770 /* COSTS_N_INSNS (1) ~ one add. */
771 static const
772 struct processor_costs ppccell_cost = {
773 COSTS_N_INSNS (9/2)+2, /* mulsi */
774 COSTS_N_INSNS (6/2), /* mulsi_const */
775 COSTS_N_INSNS (6/2), /* mulsi_const9 */
776 COSTS_N_INSNS (15/2)+2, /* muldi */
777 COSTS_N_INSNS (38/2), /* divsi */
778 COSTS_N_INSNS (70/2), /* divdi */
779 COSTS_N_INSNS (10/2), /* fp */
780 COSTS_N_INSNS (10/2), /* dmul */
781 COSTS_N_INSNS (74/2), /* sdiv */
782 COSTS_N_INSNS (74/2), /* ddiv */
783 128, /* cache line size */
784 32, /* l1 cache */
785 512, /* l2 cache */
786 6, /* streams */
787 0, /* SF->DF convert */
788 };
789
790 /* Instruction costs on PPC750 and PPC7400 processors. */
791 static const
792 struct processor_costs ppc750_cost = {
793 COSTS_N_INSNS (5), /* mulsi */
794 COSTS_N_INSNS (3), /* mulsi_const */
795 COSTS_N_INSNS (2), /* mulsi_const9 */
796 COSTS_N_INSNS (5), /* muldi */
797 COSTS_N_INSNS (17), /* divsi */
798 COSTS_N_INSNS (17), /* divdi */
799 COSTS_N_INSNS (3), /* fp */
800 COSTS_N_INSNS (3), /* dmul */
801 COSTS_N_INSNS (17), /* sdiv */
802 COSTS_N_INSNS (31), /* ddiv */
803 32, /* cache line size */
804 32, /* l1 cache */
805 512, /* l2 cache */
806 1, /* streams */
807 0, /* SF->DF convert */
808 };
809
810 /* Instruction costs on PPC7450 processors. */
811 static const
812 struct processor_costs ppc7450_cost = {
813 COSTS_N_INSNS (4), /* mulsi */
814 COSTS_N_INSNS (3), /* mulsi_const */
815 COSTS_N_INSNS (3), /* mulsi_const9 */
816 COSTS_N_INSNS (4), /* muldi */
817 COSTS_N_INSNS (23), /* divsi */
818 COSTS_N_INSNS (23), /* divdi */
819 COSTS_N_INSNS (5), /* fp */
820 COSTS_N_INSNS (5), /* dmul */
821 COSTS_N_INSNS (21), /* sdiv */
822 COSTS_N_INSNS (35), /* ddiv */
823 32, /* cache line size */
824 32, /* l1 cache */
825 1024, /* l2 cache */
826 1, /* streams */
827 0, /* SF->DF convert */
828 };
829
830 /* Instruction costs on PPC8540 processors. */
831 static const
832 struct processor_costs ppc8540_cost = {
833 COSTS_N_INSNS (4), /* mulsi */
834 COSTS_N_INSNS (4), /* mulsi_const */
835 COSTS_N_INSNS (4), /* mulsi_const9 */
836 COSTS_N_INSNS (4), /* muldi */
837 COSTS_N_INSNS (19), /* divsi */
838 COSTS_N_INSNS (19), /* divdi */
839 COSTS_N_INSNS (4), /* fp */
840 COSTS_N_INSNS (4), /* dmul */
841 COSTS_N_INSNS (29), /* sdiv */
842 COSTS_N_INSNS (29), /* ddiv */
843 32, /* cache line size */
844 32, /* l1 cache */
845 256, /* l2 cache */
846 1, /* prefetch streams /*/
847 0, /* SF->DF convert */
848 };
849
850 /* Instruction costs on E300C2 and E300C3 cores. */
851 static const
852 struct processor_costs ppce300c2c3_cost = {
853 COSTS_N_INSNS (4), /* mulsi */
854 COSTS_N_INSNS (4), /* mulsi_const */
855 COSTS_N_INSNS (4), /* mulsi_const9 */
856 COSTS_N_INSNS (4), /* muldi */
857 COSTS_N_INSNS (19), /* divsi */
858 COSTS_N_INSNS (19), /* divdi */
859 COSTS_N_INSNS (3), /* fp */
860 COSTS_N_INSNS (4), /* dmul */
861 COSTS_N_INSNS (18), /* sdiv */
862 COSTS_N_INSNS (33), /* ddiv */
863 32,
864 16, /* l1 cache */
865 16, /* l2 cache */
866 1, /* prefetch streams /*/
867 0, /* SF->DF convert */
868 };
869
870 /* Instruction costs on PPCE500MC processors. */
871 static const
872 struct processor_costs ppce500mc_cost = {
873 COSTS_N_INSNS (4), /* mulsi */
874 COSTS_N_INSNS (4), /* mulsi_const */
875 COSTS_N_INSNS (4), /* mulsi_const9 */
876 COSTS_N_INSNS (4), /* muldi */
877 COSTS_N_INSNS (14), /* divsi */
878 COSTS_N_INSNS (14), /* divdi */
879 COSTS_N_INSNS (8), /* fp */
880 COSTS_N_INSNS (10), /* dmul */
881 COSTS_N_INSNS (36), /* sdiv */
882 COSTS_N_INSNS (66), /* ddiv */
883 64, /* cache line size */
884 32, /* l1 cache */
885 128, /* l2 cache */
886 1, /* prefetch streams /*/
887 0, /* SF->DF convert */
888 };
889
890 /* Instruction costs on PPCE500MC64 processors. */
891 static const
892 struct processor_costs ppce500mc64_cost = {
893 COSTS_N_INSNS (4), /* mulsi */
894 COSTS_N_INSNS (4), /* mulsi_const */
895 COSTS_N_INSNS (4), /* mulsi_const9 */
896 COSTS_N_INSNS (4), /* muldi */
897 COSTS_N_INSNS (14), /* divsi */
898 COSTS_N_INSNS (14), /* divdi */
899 COSTS_N_INSNS (4), /* fp */
900 COSTS_N_INSNS (10), /* dmul */
901 COSTS_N_INSNS (36), /* sdiv */
902 COSTS_N_INSNS (66), /* ddiv */
903 64, /* cache line size */
904 32, /* l1 cache */
905 128, /* l2 cache */
906 1, /* prefetch streams /*/
907 0, /* SF->DF convert */
908 };
909
910 /* Instruction costs on PPCE5500 processors. */
911 static const
912 struct processor_costs ppce5500_cost = {
913 COSTS_N_INSNS (5), /* mulsi */
914 COSTS_N_INSNS (5), /* mulsi_const */
915 COSTS_N_INSNS (4), /* mulsi_const9 */
916 COSTS_N_INSNS (5), /* muldi */
917 COSTS_N_INSNS (14), /* divsi */
918 COSTS_N_INSNS (14), /* divdi */
919 COSTS_N_INSNS (7), /* fp */
920 COSTS_N_INSNS (10), /* dmul */
921 COSTS_N_INSNS (36), /* sdiv */
922 COSTS_N_INSNS (66), /* ddiv */
923 64, /* cache line size */
924 32, /* l1 cache */
925 128, /* l2 cache */
926 1, /* prefetch streams /*/
927 0, /* SF->DF convert */
928 };
929
930 /* Instruction costs on PPCE6500 processors. */
931 static const
932 struct processor_costs ppce6500_cost = {
933 COSTS_N_INSNS (5), /* mulsi */
934 COSTS_N_INSNS (5), /* mulsi_const */
935 COSTS_N_INSNS (4), /* mulsi_const9 */
936 COSTS_N_INSNS (5), /* muldi */
937 COSTS_N_INSNS (14), /* divsi */
938 COSTS_N_INSNS (14), /* divdi */
939 COSTS_N_INSNS (7), /* fp */
940 COSTS_N_INSNS (10), /* dmul */
941 COSTS_N_INSNS (36), /* sdiv */
942 COSTS_N_INSNS (66), /* ddiv */
943 64, /* cache line size */
944 32, /* l1 cache */
945 128, /* l2 cache */
946 1, /* prefetch streams /*/
947 0, /* SF->DF convert */
948 };
949
950 /* Instruction costs on AppliedMicro Titan processors. */
951 static const
952 struct processor_costs titan_cost = {
953 COSTS_N_INSNS (5), /* mulsi */
954 COSTS_N_INSNS (5), /* mulsi_const */
955 COSTS_N_INSNS (5), /* mulsi_const9 */
956 COSTS_N_INSNS (5), /* muldi */
957 COSTS_N_INSNS (18), /* divsi */
958 COSTS_N_INSNS (18), /* divdi */
959 COSTS_N_INSNS (10), /* fp */
960 COSTS_N_INSNS (10), /* dmul */
961 COSTS_N_INSNS (46), /* sdiv */
962 COSTS_N_INSNS (72), /* ddiv */
963 32, /* cache line size */
964 32, /* l1 cache */
965 512, /* l2 cache */
966 1, /* prefetch streams /*/
967 0, /* SF->DF convert */
968 };
969
970 /* Instruction costs on POWER4 and POWER5 processors. */
971 static const
972 struct processor_costs power4_cost = {
973 COSTS_N_INSNS (3), /* mulsi */
974 COSTS_N_INSNS (2), /* mulsi_const */
975 COSTS_N_INSNS (2), /* mulsi_const9 */
976 COSTS_N_INSNS (4), /* muldi */
977 COSTS_N_INSNS (18), /* divsi */
978 COSTS_N_INSNS (34), /* divdi */
979 COSTS_N_INSNS (3), /* fp */
980 COSTS_N_INSNS (3), /* dmul */
981 COSTS_N_INSNS (17), /* sdiv */
982 COSTS_N_INSNS (17), /* ddiv */
983 128, /* cache line size */
984 32, /* l1 cache */
985 1024, /* l2 cache */
986 8, /* prefetch streams /*/
987 0, /* SF->DF convert */
988 };
989
990 /* Instruction costs on POWER6 processors. */
991 static const
992 struct processor_costs power6_cost = {
993 COSTS_N_INSNS (8), /* mulsi */
994 COSTS_N_INSNS (8), /* mulsi_const */
995 COSTS_N_INSNS (8), /* mulsi_const9 */
996 COSTS_N_INSNS (8), /* muldi */
997 COSTS_N_INSNS (22), /* divsi */
998 COSTS_N_INSNS (28), /* divdi */
999 COSTS_N_INSNS (3), /* fp */
1000 COSTS_N_INSNS (3), /* dmul */
1001 COSTS_N_INSNS (13), /* sdiv */
1002 COSTS_N_INSNS (16), /* ddiv */
1003 128, /* cache line size */
1004 64, /* l1 cache */
1005 2048, /* l2 cache */
1006 16, /* prefetch streams */
1007 0, /* SF->DF convert */
1008 };
1009
1010 /* Instruction costs on POWER7 processors. */
1011 static const
1012 struct processor_costs power7_cost = {
1013 COSTS_N_INSNS (2), /* mulsi */
1014 COSTS_N_INSNS (2), /* mulsi_const */
1015 COSTS_N_INSNS (2), /* mulsi_const9 */
1016 COSTS_N_INSNS (2), /* muldi */
1017 COSTS_N_INSNS (18), /* divsi */
1018 COSTS_N_INSNS (34), /* divdi */
1019 COSTS_N_INSNS (3), /* fp */
1020 COSTS_N_INSNS (3), /* dmul */
1021 COSTS_N_INSNS (13), /* sdiv */
1022 COSTS_N_INSNS (16), /* ddiv */
1023 128, /* cache line size */
1024 32, /* l1 cache */
1025 256, /* l2 cache */
1026 12, /* prefetch streams */
1027 COSTS_N_INSNS (3), /* SF->DF convert */
1028 };
1029
1030 /* Instruction costs on POWER8 processors. */
1031 static const
1032 struct processor_costs power8_cost = {
1033 COSTS_N_INSNS (3), /* mulsi */
1034 COSTS_N_INSNS (3), /* mulsi_const */
1035 COSTS_N_INSNS (3), /* mulsi_const9 */
1036 COSTS_N_INSNS (3), /* muldi */
1037 COSTS_N_INSNS (19), /* divsi */
1038 COSTS_N_INSNS (35), /* divdi */
1039 COSTS_N_INSNS (3), /* fp */
1040 COSTS_N_INSNS (3), /* dmul */
1041 COSTS_N_INSNS (14), /* sdiv */
1042 COSTS_N_INSNS (17), /* ddiv */
1043 128, /* cache line size */
1044 32, /* l1 cache */
1045 512, /* l2 cache */
1046 12, /* prefetch streams */
1047 COSTS_N_INSNS (3), /* SF->DF convert */
1048 };
1049
1050 /* Instruction costs on POWER9 processors. */
1051 static const
1052 struct processor_costs power9_cost = {
1053 COSTS_N_INSNS (3), /* mulsi */
1054 COSTS_N_INSNS (3), /* mulsi_const */
1055 COSTS_N_INSNS (3), /* mulsi_const9 */
1056 COSTS_N_INSNS (3), /* muldi */
1057 COSTS_N_INSNS (8), /* divsi */
1058 COSTS_N_INSNS (12), /* divdi */
1059 COSTS_N_INSNS (3), /* fp */
1060 COSTS_N_INSNS (3), /* dmul */
1061 COSTS_N_INSNS (13), /* sdiv */
1062 COSTS_N_INSNS (18), /* ddiv */
1063 128, /* cache line size */
1064 32, /* l1 cache */
1065 512, /* l2 cache */
1066 8, /* prefetch streams */
1067 COSTS_N_INSNS (3), /* SF->DF convert */
1068 };
1069
1070 /* Instruction costs on Power10/Power11 processors. */
1071 static const
1072 struct processor_costs power10_cost = {
1073 COSTS_N_INSNS (2), /* mulsi */
1074 COSTS_N_INSNS (2), /* mulsi_const */
1075 COSTS_N_INSNS (2), /* mulsi_const9 */
1076 COSTS_N_INSNS (2), /* muldi */
1077 COSTS_N_INSNS (6), /* divsi */
1078 COSTS_N_INSNS (6), /* divdi */
1079 COSTS_N_INSNS (2), /* fp */
1080 COSTS_N_INSNS (2), /* dmul */
1081 COSTS_N_INSNS (11), /* sdiv */
1082 COSTS_N_INSNS (13), /* ddiv */
1083 128, /* cache line size */
1084 32, /* l1 cache */
1085 512, /* l2 cache */
1086 16, /* prefetch streams */
1087 COSTS_N_INSNS (2), /* SF->DF convert */
1088 };
1089
1090 /* Instruction costs on POWER A2 processors. */
1091 static const
1092 struct processor_costs ppca2_cost = {
1093 COSTS_N_INSNS (16), /* mulsi */
1094 COSTS_N_INSNS (16), /* mulsi_const */
1095 COSTS_N_INSNS (16), /* mulsi_const9 */
1096 COSTS_N_INSNS (16), /* muldi */
1097 COSTS_N_INSNS (22), /* divsi */
1098 COSTS_N_INSNS (28), /* divdi */
1099 COSTS_N_INSNS (3), /* fp */
1100 COSTS_N_INSNS (3), /* dmul */
1101 COSTS_N_INSNS (59), /* sdiv */
1102 COSTS_N_INSNS (72), /* ddiv */
1103 64,
1104 16, /* l1 cache */
1105 2048, /* l2 cache */
1106 16, /* prefetch streams */
1107 0, /* SF->DF convert */
1108 };
1109
1110 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1111 static tree (*rs6000_veclib_handler) (combined_fn, tree, tree);
1112
1113 \f
1114 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool,
1115 code_helper = ERROR_MARK);
1116 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1117 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1118 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1119 static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
1120 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT, int * = nullptr);
1121 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1122 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
1123 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1124 bool);
1125 static int rs6000_debug_adjust_cost (rtx_insn *, int, rtx_insn *, int,
1126 unsigned int);
1127 static bool is_microcoded_insn (rtx_insn *);
1128 static bool is_nonpipeline_insn (rtx_insn *);
1129 static bool is_cracked_insn (rtx_insn *);
1130 static bool is_load_insn (rtx, rtx *);
1131 static bool is_store_insn (rtx, rtx *);
1132 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1133 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1134 static bool insn_must_be_first_in_group (rtx_insn *);
1135 static bool insn_must_be_last_in_group (rtx_insn *);
1136 bool easy_vector_constant (rtx, machine_mode);
1137 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1138 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1139 #if TARGET_MACHO
1140 static tree get_prev_label (tree);
1141 #endif
1142 static bool rs6000_mode_dependent_address (const_rtx);
1143 static bool rs6000_debug_mode_dependent_address (const_rtx);
1144 static bool rs6000_offsettable_memref_p (rtx, machine_mode, bool);
1145 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1146 machine_mode, rtx);
1147 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1148 machine_mode,
1149 rtx);
1150 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1151 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1152 enum reg_class);
1153 static bool rs6000_debug_secondary_memory_needed (machine_mode,
1154 reg_class_t,
1155 reg_class_t);
1156 static bool rs6000_debug_can_change_mode_class (machine_mode,
1157 machine_mode,
1158 reg_class_t);
1159
1160 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1161 = rs6000_mode_dependent_address;
1162
1163 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1164 machine_mode, rtx)
1165 = rs6000_secondary_reload_class;
1166
1167 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1168 = rs6000_preferred_reload_class;
1169
1170 const int INSN_NOT_AVAILABLE = -1;
1171
1172 static void rs6000_print_isa_options (FILE *, int, const char *,
1173 HOST_WIDE_INT);
1174 static HOST_WIDE_INT rs6000_disable_incompatible_switches (void);
1175
1176 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1177 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1178 enum rs6000_reg_type,
1179 machine_mode,
1180 secondary_reload_info *,
1181 bool);
1182 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1183
1184 /* Hash table stuff for keeping track of TOC entries. */
1185
1186 struct GTY((for_user)) toc_hash_struct
1187 {
1188 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1189 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1190 rtx key;
1191 machine_mode key_mode;
1192 int labelno;
1193 };
1194
1195 struct toc_hasher : ggc_ptr_hash<toc_hash_struct>
1196 {
1197 static hashval_t hash (toc_hash_struct *);
1198 static bool equal (toc_hash_struct *, toc_hash_struct *);
1199 };
1200
1201 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1202
1203
1204 \f
1205 /* Default register names. */
1206 char rs6000_reg_names[][8] =
1207 {
1208 /* GPRs */
1209 "0", "1", "2", "3", "4", "5", "6", "7",
1210 "8", "9", "10", "11", "12", "13", "14", "15",
1211 "16", "17", "18", "19", "20", "21", "22", "23",
1212 "24", "25", "26", "27", "28", "29", "30", "31",
1213 /* FPRs */
1214 "0", "1", "2", "3", "4", "5", "6", "7",
1215 "8", "9", "10", "11", "12", "13", "14", "15",
1216 "16", "17", "18", "19", "20", "21", "22", "23",
1217 "24", "25", "26", "27", "28", "29", "30", "31",
1218 /* VRs */
1219 "0", "1", "2", "3", "4", "5", "6", "7",
1220 "8", "9", "10", "11", "12", "13", "14", "15",
1221 "16", "17", "18", "19", "20", "21", "22", "23",
1222 "24", "25", "26", "27", "28", "29", "30", "31",
1223 /* lr ctr ca ap */
1224 "lr", "ctr", "ca", "ap",
1225 /* cr0..cr7 */
1226 "0", "1", "2", "3", "4", "5", "6", "7",
1227 /* vrsave vscr sfp */
1228 "vrsave", "vscr", "sfp",
1229 };
1230
1231 #ifdef TARGET_REGNAMES
1232 static const char alt_reg_names[][8] =
1233 {
1234 /* GPRs */
1235 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1236 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1237 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1238 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1239 /* FPRs */
1240 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1241 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1242 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1243 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1244 /* VRs */
1245 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1246 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1247 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1248 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1249 /* lr ctr ca ap */
1250 "lr", "ctr", "ca", "ap",
1251 /* cr0..cr7 */
1252 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1253 /* vrsave vscr sfp */
1254 "vrsave", "vscr", "sfp",
1255 };
1256 #endif
1257
1258 /* Table of valid machine attributes. */
1259
1260 static const attribute_spec rs6000_gnu_attributes[] =
1261 {
1262 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
1263 affects_type_identity, handler, exclude } */
1264 { "altivec", 1, 1, false, true, false, false,
1265 rs6000_handle_altivec_attribute, NULL },
1266 { "longcall", 0, 0, false, true, true, false,
1267 rs6000_handle_longcall_attribute, NULL },
1268 { "shortcall", 0, 0, false, true, true, false,
1269 rs6000_handle_longcall_attribute, NULL },
1270 { "ms_struct", 0, 0, false, false, false, false,
1271 rs6000_handle_struct_attribute, NULL },
1272 { "gcc_struct", 0, 0, false, false, false, false,
1273 rs6000_handle_struct_attribute, NULL },
1274 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1275 SUBTARGET_ATTRIBUTE_TABLE,
1276 #endif
1277 };
1278
1279 static const scoped_attribute_specs rs6000_gnu_attribute_table =
1280 {
1281 "gnu", { rs6000_gnu_attributes }
1282 };
1283
1284 static const scoped_attribute_specs *const rs6000_attribute_table[] =
1285 {
1286 &rs6000_gnu_attribute_table
1287 };
1288 \f
1289 #ifndef TARGET_PROFILE_KERNEL
1290 #define TARGET_PROFILE_KERNEL 0
1291 #endif
1292 \f
1293 /* Initialize the GCC target structure. */
1294 #undef TARGET_ATTRIBUTE_TABLE
1295 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1296 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1297 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1298 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1299 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1300
1301 #undef TARGET_ASM_ALIGNED_DI_OP
1302 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1303
1304 /* Default unaligned ops are only provided for ELF. Find the ops needed
1305 for non-ELF systems. */
1306 #ifndef OBJECT_FORMAT_ELF
1307 #if TARGET_XCOFF
1308 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1309 64-bit targets. */
1310 #undef TARGET_ASM_UNALIGNED_HI_OP
1311 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1312 #undef TARGET_ASM_UNALIGNED_SI_OP
1313 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1314 #undef TARGET_ASM_UNALIGNED_DI_OP
1315 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1316 #else
1317 /* For Darwin. */
1318 #undef TARGET_ASM_UNALIGNED_HI_OP
1319 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1320 #undef TARGET_ASM_UNALIGNED_SI_OP
1321 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1322 #undef TARGET_ASM_UNALIGNED_DI_OP
1323 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1324 #undef TARGET_ASM_ALIGNED_DI_OP
1325 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1326 #endif
1327 #endif
1328
1329 /* This hook deals with fixups for relocatable code and DI-mode objects
1330 in 64-bit code. */
1331 #undef TARGET_ASM_INTEGER
1332 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1333
1334 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1335 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1336 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1337 #endif
1338
1339 #undef TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY
1340 #define TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY \
1341 rs6000_print_patchable_function_entry
1342
1343 #undef TARGET_SET_UP_BY_PROLOGUE
1344 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1345
1346 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1347 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1348 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1349 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1350 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1351 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1352 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1353 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1354 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1355 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1356 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1357 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1358
1359 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1360 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1361
1362 #undef TARGET_INTERNAL_ARG_POINTER
1363 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1364
1365 #undef TARGET_HAVE_TLS
1366 #define TARGET_HAVE_TLS HAVE_AS_TLS
1367
1368 #undef TARGET_CANNOT_FORCE_CONST_MEM
1369 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1370
1371 #undef TARGET_DELEGITIMIZE_ADDRESS
1372 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1373
1374 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1375 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1376
1377 #undef TARGET_LEGITIMATE_COMBINED_INSN
1378 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1379
1380 #undef TARGET_ASM_FUNCTION_PROLOGUE
1381 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1382 #undef TARGET_ASM_FUNCTION_EPILOGUE
1383 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1384
1385 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1386 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1387
1388 #undef TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC
1389 #define TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC rs6000_gen_pic_addr_diff_vec
1390
1391 #undef TARGET_LEGITIMIZE_ADDRESS
1392 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1393
1394 #undef TARGET_SCHED_VARIABLE_ISSUE
1395 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1396
1397 #undef TARGET_SCHED_ISSUE_RATE
1398 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1399 #undef TARGET_SCHED_ADJUST_COST
1400 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1401 #undef TARGET_SCHED_ADJUST_PRIORITY
1402 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1403 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1404 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1405 #undef TARGET_SCHED_INIT
1406 #define TARGET_SCHED_INIT rs6000_sched_init
1407 #undef TARGET_SCHED_FINISH
1408 #define TARGET_SCHED_FINISH rs6000_sched_finish
1409 #undef TARGET_SCHED_REORDER
1410 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1411 #undef TARGET_SCHED_REORDER2
1412 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1413
1414 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1415 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1416
1417 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1418 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1419
1420 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1421 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1422 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1423 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1424 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1425 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1426 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1427 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1428
1429 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1430 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1431
1432 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1433 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1434 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1435 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1436 rs6000_builtin_support_vector_misalignment
1437 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1438 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1439 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1440 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1441 rs6000_builtin_vectorization_cost
1442 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1443 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1444 rs6000_preferred_simd_mode
1445 #undef TARGET_VECTORIZE_CREATE_COSTS
1446 #define TARGET_VECTORIZE_CREATE_COSTS rs6000_vectorize_create_costs
1447
1448 #undef TARGET_LOOP_UNROLL_ADJUST
1449 #define TARGET_LOOP_UNROLL_ADJUST rs6000_loop_unroll_adjust
1450
1451 #undef TARGET_INIT_BUILTINS
1452 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1453 #undef TARGET_BUILTIN_DECL
1454 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1455
1456 #undef TARGET_FOLD_BUILTIN
1457 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1458 #undef TARGET_GIMPLE_FOLD_BUILTIN
1459 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1460
1461 #undef TARGET_EXPAND_BUILTIN
1462 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1463
1464 #undef TARGET_MANGLE_TYPE
1465 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1466
1467 #undef TARGET_INIT_LIBFUNCS
1468 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1469
1470 #if TARGET_MACHO
1471 #undef TARGET_BINDS_LOCAL_P
1472 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1473 #endif
1474
1475 #undef TARGET_MS_BITFIELD_LAYOUT_P
1476 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1477
1478 #undef TARGET_ASM_OUTPUT_MI_THUNK
1479 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1480
1481 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1482 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1483
1484 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1485 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1486
1487 #undef TARGET_REGISTER_MOVE_COST
1488 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1489 #undef TARGET_MEMORY_MOVE_COST
1490 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1491 #undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
1492 #define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
1493 rs6000_ira_change_pseudo_allocno_class
1494 #undef TARGET_CANNOT_COPY_INSN_P
1495 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1496 #undef TARGET_RTX_COSTS
1497 #define TARGET_RTX_COSTS rs6000_rtx_costs
1498 #undef TARGET_ADDRESS_COST
1499 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1500 #undef TARGET_INSN_COST
1501 #define TARGET_INSN_COST rs6000_insn_cost
1502
1503 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1504 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1505
1506 #undef TARGET_PROMOTE_FUNCTION_MODE
1507 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1508
1509 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
1510 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE rs6000_override_options_after_change
1511
1512 #undef TARGET_RETURN_IN_MEMORY
1513 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1514
1515 #undef TARGET_RETURN_IN_MSB
1516 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1517
1518 #undef TARGET_SETUP_INCOMING_VARARGS
1519 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1520
1521 /* Always strict argument naming on rs6000. */
1522 #undef TARGET_STRICT_ARGUMENT_NAMING
1523 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1524 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1525 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1526 #undef TARGET_SPLIT_COMPLEX_ARG
1527 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1528 #undef TARGET_MUST_PASS_IN_STACK
1529 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1530 #undef TARGET_PASS_BY_REFERENCE
1531 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1532 #undef TARGET_ARG_PARTIAL_BYTES
1533 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1534 #undef TARGET_FUNCTION_ARG_ADVANCE
1535 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1536 #undef TARGET_FUNCTION_ARG
1537 #define TARGET_FUNCTION_ARG rs6000_function_arg
1538 #undef TARGET_FUNCTION_ARG_PADDING
1539 #define TARGET_FUNCTION_ARG_PADDING rs6000_function_arg_padding
1540 #undef TARGET_FUNCTION_ARG_BOUNDARY
1541 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1542
1543 #undef TARGET_BUILD_BUILTIN_VA_LIST
1544 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1545
1546 #undef TARGET_EXPAND_BUILTIN_VA_START
1547 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1548
1549 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1550 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1551
1552 #undef TARGET_EH_RETURN_FILTER_MODE
1553 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1554
1555 #undef TARGET_TRANSLATE_MODE_ATTRIBUTE
1556 #define TARGET_TRANSLATE_MODE_ATTRIBUTE rs6000_translate_mode_attribute
1557
1558 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1559 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1560
1561 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
1562 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
1563 rs6000_libgcc_floating_mode_supported_p
1564
1565 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1566 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1567
1568 #undef TARGET_FLOATN_MODE
1569 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1570
1571 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1572 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1573
1574 #undef TARGET_MD_ASM_ADJUST
1575 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1576
1577 #undef TARGET_OPTION_OVERRIDE
1578 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1579
1580 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1581 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1582 rs6000_builtin_vectorized_function
1583
1584 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1585 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1586 rs6000_builtin_md_vectorized_function
1587
1588 #undef TARGET_STACK_PROTECT_GUARD
1589 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1590
1591 #if !TARGET_MACHO
1592 #undef TARGET_STACK_PROTECT_FAIL
1593 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1594 #endif
1595
1596 #ifdef HAVE_AS_TLS
1597 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1598 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1599 #endif
1600
1601 /* Use a 32-bit anchor range. This leads to sequences like:
1602
1603 addis tmp,anchor,high
1604 add dest,tmp,low
1605
1606 where tmp itself acts as an anchor, and can be shared between
1607 accesses to the same 64k page. */
1608 #undef TARGET_MIN_ANCHOR_OFFSET
1609 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1610 #undef TARGET_MAX_ANCHOR_OFFSET
1611 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1612 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1613 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1614 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1615 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1616
1617 #undef TARGET_BUILTIN_RECIPROCAL
1618 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1619
1620 #undef TARGET_SECONDARY_RELOAD
1621 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1622 #undef TARGET_SECONDARY_MEMORY_NEEDED
1623 #define TARGET_SECONDARY_MEMORY_NEEDED rs6000_secondary_memory_needed
1624 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
1625 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE rs6000_secondary_memory_needed_mode
1626
1627 #undef TARGET_LEGITIMATE_ADDRESS_P
1628 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1629
1630 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1631 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1632
1633 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1634 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1635
1636 #undef TARGET_CAN_ELIMINATE
1637 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1638
1639 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1640 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1641
1642 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1643 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1644
1645 #undef TARGET_TRAMPOLINE_INIT
1646 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1647
1648 #undef TARGET_FUNCTION_VALUE
1649 #define TARGET_FUNCTION_VALUE rs6000_function_value
1650
1651 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1652 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1653
1654 #undef TARGET_OPTION_SAVE
1655 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1656
1657 #undef TARGET_OPTION_RESTORE
1658 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1659
1660 #undef TARGET_OPTION_PRINT
1661 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1662
1663 #undef TARGET_CAN_INLINE_P
1664 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1665
1666 #undef TARGET_SET_CURRENT_FUNCTION
1667 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1668
1669 #undef TARGET_LEGITIMATE_CONSTANT_P
1670 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1671
1672 #undef TARGET_VECTORIZE_VEC_PERM_CONST
1673 #define TARGET_VECTORIZE_VEC_PERM_CONST rs6000_vectorize_vec_perm_const
1674
1675 #undef TARGET_CAN_USE_DOLOOP_P
1676 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1677
1678 #undef TARGET_PREDICT_DOLOOP_P
1679 #define TARGET_PREDICT_DOLOOP_P rs6000_predict_doloop_p
1680
1681 #undef TARGET_HAVE_COUNT_REG_DECR_P
1682 #define TARGET_HAVE_COUNT_REG_DECR_P true
1683
1684 /* 1000000000 is infinite cost in IVOPTs. */
1685 #undef TARGET_DOLOOP_COST_FOR_GENERIC
1686 #define TARGET_DOLOOP_COST_FOR_GENERIC 1000000000
1687
1688 #undef TARGET_DOLOOP_COST_FOR_ADDRESS
1689 #define TARGET_DOLOOP_COST_FOR_ADDRESS 1000000000
1690
1691 #undef TARGET_PREFERRED_DOLOOP_MODE
1692 #define TARGET_PREFERRED_DOLOOP_MODE rs6000_preferred_doloop_mode
1693
1694 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1695 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1696
1697 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1698 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1699 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1700 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1701 #undef TARGET_UNWIND_WORD_MODE
1702 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1703
1704 #undef TARGET_OFFLOAD_OPTIONS
1705 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1706
1707 #undef TARGET_C_MODE_FOR_SUFFIX
1708 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1709
1710 #undef TARGET_C_MODE_FOR_FLOATING_TYPE
1711 #define TARGET_C_MODE_FOR_FLOATING_TYPE rs6000_c_mode_for_floating_type
1712
1713 #undef TARGET_INVALID_BINARY_OP
1714 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1715
1716 #undef TARGET_OPTAB_SUPPORTED_P
1717 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1718
1719 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1720 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1721
1722 #undef TARGET_COMPARE_VERSION_PRIORITY
1723 #define TARGET_COMPARE_VERSION_PRIORITY rs6000_compare_version_priority
1724
1725 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
1726 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
1727 rs6000_generate_version_dispatcher_body
1728
1729 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
1730 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
1731 rs6000_get_function_versions_dispatcher
1732
1733 #undef TARGET_OPTION_FUNCTION_VERSIONS
1734 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
1735
1736 #undef TARGET_HARD_REGNO_NREGS
1737 #define TARGET_HARD_REGNO_NREGS rs6000_hard_regno_nregs_hook
1738 #undef TARGET_HARD_REGNO_MODE_OK
1739 #define TARGET_HARD_REGNO_MODE_OK rs6000_hard_regno_mode_ok
1740
1741 #undef TARGET_MODES_TIEABLE_P
1742 #define TARGET_MODES_TIEABLE_P rs6000_modes_tieable_p
1743
1744 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
1745 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
1746 rs6000_hard_regno_call_part_clobbered
1747
1748 #undef TARGET_SLOW_UNALIGNED_ACCESS
1749 #define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access
1750
1751 #undef TARGET_CAN_CHANGE_MODE_CLASS
1752 #define TARGET_CAN_CHANGE_MODE_CLASS rs6000_can_change_mode_class
1753
1754 #undef TARGET_CONSTANT_ALIGNMENT
1755 #define TARGET_CONSTANT_ALIGNMENT rs6000_constant_alignment
1756
1757 #undef TARGET_STARTING_FRAME_OFFSET
1758 #define TARGET_STARTING_FRAME_OFFSET rs6000_starting_frame_offset
1759
1760 #undef TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P
1761 #define TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P hook_bool_void_true
1762
1763 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
1764 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME rs6000_mangle_decl_assembler_name
1765
1766 #undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P
1767 #define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P \
1768 rs6000_cannot_substitute_mem_equiv_p
1769
1770 #undef TARGET_INVALID_CONVERSION
1771 #define TARGET_INVALID_CONVERSION rs6000_invalid_conversion
1772
1773 #undef TARGET_NEED_IPA_FN_TARGET_INFO
1774 #define TARGET_NEED_IPA_FN_TARGET_INFO rs6000_need_ipa_fn_target_info
1775
1776 #undef TARGET_UPDATE_IPA_FN_TARGET_INFO
1777 #define TARGET_UPDATE_IPA_FN_TARGET_INFO rs6000_update_ipa_fn_target_info
1778
1779 #undef TARGET_CONST_ANCHOR
1780 #define TARGET_CONST_ANCHOR 0x8000
1781
1782 #undef TARGET_OVERLAP_OP_BY_PIECES_P
1783 #define TARGET_OVERLAP_OP_BY_PIECES_P hook_bool_void_true
1784
1785 \f
1786
1787 /* Processor table. */
1788 struct rs6000_ptt
1789 {
1790 const char *const name; /* Canonical processor name. */
1791 const enum processor_type processor; /* Processor type enum value. */
1792 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1793 };
1794
1795 static struct rs6000_ptt const processor_target_table[] =
1796 {
1797 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1798 #include "rs6000-cpus.def"
1799 #undef RS6000_CPU
1800 };
1801
1802 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1803 name is invalid. */
1804
1805 static int
1806 rs6000_cpu_name_lookup (const char *name)
1807 {
1808 size_t i;
1809
1810 if (name != NULL)
1811 {
1812 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
1813 if (! strcmp (name, processor_target_table[i].name))
1814 return (int)i;
1815 }
1816
1817 return -1;
1818 }
1819
1820 \f
1821 /* Return number of consecutive hard regs needed starting at reg REGNO
1822 to hold something of mode MODE.
1823 This is ordinarily the length in words of a value of mode MODE
1824 but can be less for certain modes in special long registers.
1825
1826 POWER and PowerPC GPRs hold 32 bits worth;
1827 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1828
1829 static int
1830 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
1831 {
1832 unsigned HOST_WIDE_INT reg_size;
1833
1834 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
1835 128-bit floating point that can go in vector registers, which has VSX
1836 memory addressing. */
1837 if (FP_REGNO_P (regno))
1838 reg_size = (VECTOR_MEM_VSX_P (mode) || VECTOR_ALIGNMENT_P (mode)
1839 ? UNITS_PER_VSX_WORD
1840 : UNITS_PER_FP_WORD);
1841
1842 else if (ALTIVEC_REGNO_P (regno))
1843 reg_size = UNITS_PER_ALTIVEC_WORD;
1844
1845 else
1846 reg_size = UNITS_PER_WORD;
1847
1848 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
1849 }
1850
1851 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1852 MODE. */
1853 static int
1854 rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
1855 {
1856 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
1857
1858 if (COMPLEX_MODE_P (mode))
1859 mode = GET_MODE_INNER (mode);
1860
1861 /* Vector pair modes need even/odd VSX register pairs. Only allow vector
1862 registers. */
1863 if (mode == OOmode)
1864 return (TARGET_MMA && VSX_REGNO_P (regno) && (regno & 1) == 0);
1865
1866 /* MMA accumulator modes need FPR registers divisible by 4. */
1867 if (mode == XOmode)
1868 return (TARGET_MMA && FP_REGNO_P (regno) && (regno & 3) == 0);
1869
1870 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1871 register combinations, and use PTImode where we need to deal with quad
1872 word memory operations. Don't allow quad words in the argument or frame
1873 pointer registers, just registers 0..31. */
1874 if (mode == PTImode)
1875 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1876 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1877 && ((regno & 1) == 0));
1878
1879 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1880 implementations. Don't allow an item to be split between a FP register
1881 and an Altivec register. Allow TImode in all VSX registers if the user
1882 asked for it. */
1883 if (TARGET_VSX && VSX_REGNO_P (regno)
1884 && (VECTOR_MEM_VSX_P (mode)
1885 || VECTOR_ALIGNMENT_P (mode)
1886 || reg_addr[mode].scalar_in_vmx_p
1887 || mode == TImode
1888 || (TARGET_VADDUQM && mode == V1TImode)))
1889 {
1890 if (FP_REGNO_P (regno))
1891 return FP_REGNO_P (last_regno);
1892
1893 if (ALTIVEC_REGNO_P (regno))
1894 {
1895 if (GET_MODE_SIZE (mode) < 16 && !reg_addr[mode].scalar_in_vmx_p)
1896 return 0;
1897
1898 return ALTIVEC_REGNO_P (last_regno);
1899 }
1900 }
1901
1902 /* The GPRs can hold any mode, but values bigger than one register
1903 cannot go past R31. */
1904 if (INT_REGNO_P (regno))
1905 return INT_REGNO_P (last_regno);
1906
1907 /* The float registers (except for VSX vector modes) can only hold floating
1908 modes and DImode. */
1909 if (FP_REGNO_P (regno))
1910 {
1911 if (VECTOR_ALIGNMENT_P (mode))
1912 return false;
1913
1914 if (SCALAR_FLOAT_MODE_P (mode)
1915 && (mode != TDmode || (regno % 2) == 0)
1916 && FP_REGNO_P (last_regno))
1917 return 1;
1918
1919 if (GET_MODE_CLASS (mode) == MODE_INT)
1920 {
1921 if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
1922 return 1;
1923
1924 if (TARGET_POPCNTD && mode == SImode)
1925 return 1;
1926
1927 if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode))
1928 return 1;
1929 }
1930
1931 return 0;
1932 }
1933
1934 /* The CR register can only hold CC modes. */
1935 if (CR_REGNO_P (regno))
1936 return GET_MODE_CLASS (mode) == MODE_CC;
1937
1938 if (CA_REGNO_P (regno))
1939 return mode == Pmode || mode == SImode;
1940
1941 /* AltiVec only in AldyVec registers. */
1942 if (ALTIVEC_REGNO_P (regno))
1943 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
1944 || mode == V1TImode);
1945
1946 /* We cannot put non-VSX TImode or PTImode anywhere except general register
1947 and it must be able to fit within the register set. */
1948
1949 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
1950 }
1951
1952 /* Implement TARGET_HARD_REGNO_NREGS. */
1953
1954 static unsigned int
1955 rs6000_hard_regno_nregs_hook (unsigned int regno, machine_mode mode)
1956 {
1957 return rs6000_hard_regno_nregs[mode][regno];
1958 }
1959
1960 /* Implement TARGET_HARD_REGNO_MODE_OK. */
1961
1962 static bool
1963 rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
1964 {
1965 return rs6000_hard_regno_mode_ok_p[mode][regno];
1966 }
1967
1968 /* Implement TARGET_MODES_TIEABLE_P.
1969
1970 PTImode cannot tie with other modes because PTImode is restricted to even
1971 GPR registers, and TImode can go in any GPR as well as VSX registers (PR
1972 57744).
1973
1974 Similarly, don't allow OOmode (vector pair, restricted to even VSX
1975 registers) or XOmode (vector quad, restricted to FPR registers divisible
1976 by 4) to tie with other modes.
1977
1978 Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
1979 128-bit floating point on VSX systems ties with other vectors. */
1980
1981 static bool
1982 rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
1983 {
1984 if (mode1 == PTImode || mode1 == OOmode || mode1 == XOmode
1985 || mode2 == PTImode || mode2 == OOmode || mode2 == XOmode)
1986 return mode1 == mode2;
1987
1988 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
1989 return ALTIVEC_OR_VSX_VECTOR_MODE (mode2);
1990 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode2))
1991 return false;
1992
1993 if (SCALAR_FLOAT_MODE_P (mode1))
1994 return SCALAR_FLOAT_MODE_P (mode2);
1995 if (SCALAR_FLOAT_MODE_P (mode2))
1996 return false;
1997
1998 if (GET_MODE_CLASS (mode1) == MODE_CC)
1999 return GET_MODE_CLASS (mode2) == MODE_CC;
2000 if (GET_MODE_CLASS (mode2) == MODE_CC)
2001 return false;
2002
2003 return true;
2004 }
2005
2006 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. */
2007
2008 static bool
2009 rs6000_hard_regno_call_part_clobbered (unsigned int, unsigned int regno,
2010 machine_mode mode)
2011 {
2012 if (TARGET_32BIT
2013 && TARGET_POWERPC64
2014 && GET_MODE_SIZE (mode) > 4
2015 && INT_REGNO_P (regno))
2016 return true;
2017
2018 if (TARGET_VSX
2019 && FP_REGNO_P (regno)
2020 && GET_MODE_SIZE (mode) > 8
2021 && !FLOAT128_2REG_P (mode))
2022 return true;
2023
2024 return false;
2025 }
2026
2027 /* Print interesting facts about registers. */
2028 static void
2029 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
2030 {
2031 int r, m;
2032
2033 for (r = first_regno; r <= last_regno; ++r)
2034 {
2035 const char *comma = "";
2036 int len;
2037
2038 if (first_regno == last_regno)
2039 fprintf (stderr, "%s:\t", reg_name);
2040 else
2041 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
2042
2043 len = 8;
2044 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2045 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
2046 {
2047 if (len > 70)
2048 {
2049 fprintf (stderr, ",\n\t");
2050 len = 8;
2051 comma = "";
2052 }
2053
2054 if (rs6000_hard_regno_nregs[m][r] > 1)
2055 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
2056 rs6000_hard_regno_nregs[m][r]);
2057 else
2058 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
2059
2060 comma = ", ";
2061 }
2062
2063 if (call_used_or_fixed_reg_p (r))
2064 {
2065 if (len > 70)
2066 {
2067 fprintf (stderr, ",\n\t");
2068 len = 8;
2069 comma = "";
2070 }
2071
2072 len += fprintf (stderr, "%s%s", comma, "call-used");
2073 comma = ", ";
2074 }
2075
2076 if (fixed_regs[r])
2077 {
2078 if (len > 70)
2079 {
2080 fprintf (stderr, ",\n\t");
2081 len = 8;
2082 comma = "";
2083 }
2084
2085 len += fprintf (stderr, "%s%s", comma, "fixed");
2086 comma = ", ";
2087 }
2088
2089 if (len > 70)
2090 {
2091 fprintf (stderr, ",\n\t");
2092 comma = "";
2093 }
2094
2095 len += fprintf (stderr, "%sreg-class = %s", comma,
2096 reg_class_names[(int)rs6000_regno_regclass[r]]);
2097 comma = ", ";
2098
2099 if (len > 70)
2100 {
2101 fprintf (stderr, ",\n\t");
2102 comma = "";
2103 }
2104
2105 fprintf (stderr, "%sregno = %d\n", comma, r);
2106 }
2107 }
2108
2109 static const char *
2110 rs6000_debug_vector_unit (enum rs6000_vector v)
2111 {
2112 const char *ret;
2113
2114 switch (v)
2115 {
2116 case VECTOR_NONE: ret = "none"; break;
2117 case VECTOR_ALTIVEC: ret = "altivec"; break;
2118 case VECTOR_VSX: ret = "vsx"; break;
2119 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
2120 default: ret = "unknown"; break;
2121 }
2122
2123 return ret;
2124 }
2125
2126 /* Inner function printing just the address mask for a particular reload
2127 register class. */
2128 DEBUG_FUNCTION char *
2129 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
2130 {
2131 static char ret[8];
2132 char *p = ret;
2133
2134 if ((mask & RELOAD_REG_VALID) != 0)
2135 *p++ = 'v';
2136 else if (keep_spaces)
2137 *p++ = ' ';
2138
2139 if ((mask & RELOAD_REG_MULTIPLE) != 0)
2140 *p++ = 'm';
2141 else if (keep_spaces)
2142 *p++ = ' ';
2143
2144 if ((mask & RELOAD_REG_INDEXED) != 0)
2145 *p++ = 'i';
2146 else if (keep_spaces)
2147 *p++ = ' ';
2148
2149 if ((mask & RELOAD_REG_QUAD_OFFSET) != 0)
2150 *p++ = 'O';
2151 else if ((mask & RELOAD_REG_OFFSET) != 0)
2152 *p++ = 'o';
2153 else if (keep_spaces)
2154 *p++ = ' ';
2155
2156 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2157 *p++ = '+';
2158 else if (keep_spaces)
2159 *p++ = ' ';
2160
2161 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2162 *p++ = '+';
2163 else if (keep_spaces)
2164 *p++ = ' ';
2165
2166 if ((mask & RELOAD_REG_AND_M16) != 0)
2167 *p++ = '&';
2168 else if (keep_spaces)
2169 *p++ = ' ';
2170
2171 *p = '\0';
2172
2173 return ret;
2174 }
2175
2176 /* Print the address masks in a human readble fashion. */
2177 DEBUG_FUNCTION void
2178 rs6000_debug_print_mode (ssize_t m)
2179 {
2180 ssize_t rc;
2181 int spaces = 0;
2182
2183 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2184 for (rc = 0; rc < N_RELOAD_REG; rc++)
2185 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2186 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2187
2188 if ((reg_addr[m].reload_store != CODE_FOR_nothing)
2189 || (reg_addr[m].reload_load != CODE_FOR_nothing))
2190 {
2191 fprintf (stderr, "%*s Reload=%c%c", spaces, "",
2192 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2193 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
2194 spaces = 0;
2195 }
2196 else
2197 spaces += strlen (" Reload=sl");
2198
2199 if (reg_addr[m].scalar_in_vmx_p)
2200 {
2201 fprintf (stderr, "%*s Upper=y", spaces, "");
2202 spaces = 0;
2203 }
2204 else
2205 spaces += strlen (" Upper=y");
2206
2207 if (rs6000_vector_unit[m] != VECTOR_NONE
2208 || rs6000_vector_mem[m] != VECTOR_NONE)
2209 {
2210 fprintf (stderr, "%*s vector: arith=%-10s mem=%s",
2211 spaces, "",
2212 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2213 rs6000_debug_vector_unit (rs6000_vector_mem[m]));
2214 }
2215
2216 fputs ("\n", stderr);
2217 }
2218
2219 #define DEBUG_FMT_ID "%-32s= "
2220 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2221 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2222 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2223
2224 /* Print various interesting information with -mdebug=reg. */
2225 static void
2226 rs6000_debug_reg_global (void)
2227 {
2228 static const char *const tf[2] = { "false", "true" };
2229 const char *nl = (const char *)0;
2230 int m;
2231 size_t m1, m2, v;
2232 char costly_num[20];
2233 char nop_num[20];
2234 char flags_buffer[40];
2235 const char *costly_str;
2236 const char *nop_str;
2237 const char *trace_str;
2238 const char *abi_str;
2239 const char *cmodel_str;
2240 struct cl_target_option cl_opts;
2241
2242 /* Modes we want tieable information on. */
2243 static const machine_mode print_tieable_modes[] = {
2244 QImode,
2245 HImode,
2246 SImode,
2247 DImode,
2248 TImode,
2249 PTImode,
2250 SFmode,
2251 DFmode,
2252 TFmode,
2253 IFmode,
2254 KFmode,
2255 SDmode,
2256 DDmode,
2257 TDmode,
2258 V2SImode,
2259 V2SFmode,
2260 V16QImode,
2261 V8HImode,
2262 V4SImode,
2263 V2DImode,
2264 V1TImode,
2265 V32QImode,
2266 V16HImode,
2267 V8SImode,
2268 V4DImode,
2269 V2TImode,
2270 V4SFmode,
2271 V2DFmode,
2272 V8SFmode,
2273 V4DFmode,
2274 OOmode,
2275 XOmode,
2276 CCmode,
2277 CCUNSmode,
2278 CCEQmode,
2279 CCFPmode,
2280 };
2281
2282 /* Virtual regs we are interested in. */
2283 const static struct {
2284 int regno; /* register number. */
2285 const char *name; /* register name. */
2286 } virtual_regs[] = {
2287 { STACK_POINTER_REGNUM, "stack pointer:" },
2288 { TOC_REGNUM, "toc: " },
2289 { STATIC_CHAIN_REGNUM, "static chain: " },
2290 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2291 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2292 { ARG_POINTER_REGNUM, "arg pointer: " },
2293 { FRAME_POINTER_REGNUM, "frame pointer:" },
2294 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2295 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2296 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2297 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2298 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2299 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2300 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2301 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2302 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2303 };
2304
2305 fputs ("\nHard register information:\n", stderr);
2306 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2307 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2308 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2309 LAST_ALTIVEC_REGNO,
2310 "vs");
2311 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2312 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2313 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2314 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2315 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2316 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2317
2318 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2319 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2320 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2321
2322 fprintf (stderr,
2323 "\n"
2324 "d reg_class = %s\n"
2325 "v reg_class = %s\n"
2326 "wa reg_class = %s\n"
2327 "we reg_class = %s\n"
2328 "wr reg_class = %s\n"
2329 "wx reg_class = %s\n"
2330 "wA reg_class = %s\n"
2331 "\n",
2332 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2333 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2334 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2335 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
2336 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2337 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2338 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]]);
2339
2340 nl = "\n";
2341 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2342 rs6000_debug_print_mode (m);
2343
2344 fputs ("\n", stderr);
2345
2346 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2347 {
2348 machine_mode mode1 = print_tieable_modes[m1];
2349 bool first_time = true;
2350
2351 nl = (const char *)0;
2352 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2353 {
2354 machine_mode mode2 = print_tieable_modes[m2];
2355 if (mode1 != mode2 && rs6000_modes_tieable_p (mode1, mode2))
2356 {
2357 if (first_time)
2358 {
2359 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2360 nl = "\n";
2361 first_time = false;
2362 }
2363
2364 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2365 }
2366 }
2367
2368 if (!first_time)
2369 fputs ("\n", stderr);
2370 }
2371
2372 if (nl)
2373 fputs (nl, stderr);
2374
2375 if (rs6000_recip_control)
2376 {
2377 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2378
2379 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2380 if (rs6000_recip_bits[m])
2381 {
2382 fprintf (stderr,
2383 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2384 GET_MODE_NAME (m),
2385 (RS6000_RECIP_AUTO_RE_P (m)
2386 ? "auto"
2387 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2388 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2389 ? "auto"
2390 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2391 }
2392
2393 fputs ("\n", stderr);
2394 }
2395
2396 if (rs6000_cpu_index >= 0)
2397 {
2398 const char *name = processor_target_table[rs6000_cpu_index].name;
2399 HOST_WIDE_INT flags
2400 = processor_target_table[rs6000_cpu_index].target_enable;
2401
2402 sprintf (flags_buffer, "-mcpu=%s flags", name);
2403 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2404 }
2405 else
2406 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2407
2408 if (rs6000_tune_index >= 0)
2409 {
2410 const char *name = processor_target_table[rs6000_tune_index].name;
2411 HOST_WIDE_INT flags
2412 = processor_target_table[rs6000_tune_index].target_enable;
2413
2414 sprintf (flags_buffer, "-mtune=%s flags", name);
2415 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2416 }
2417 else
2418 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2419
2420 cl_target_option_save (&cl_opts, &global_options, &global_options_set);
2421 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2422 rs6000_isa_flags);
2423
2424 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2425 rs6000_isa_flags_explicit);
2426
2427 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2428
2429 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2430 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2431
2432 switch (rs6000_sched_costly_dep)
2433 {
2434 case max_dep_latency:
2435 costly_str = "max_dep_latency";
2436 break;
2437
2438 case no_dep_costly:
2439 costly_str = "no_dep_costly";
2440 break;
2441
2442 case all_deps_costly:
2443 costly_str = "all_deps_costly";
2444 break;
2445
2446 case true_store_to_load_dep_costly:
2447 costly_str = "true_store_to_load_dep_costly";
2448 break;
2449
2450 case store_to_load_dep_costly:
2451 costly_str = "store_to_load_dep_costly";
2452 break;
2453
2454 default:
2455 costly_str = costly_num;
2456 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2457 break;
2458 }
2459
2460 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2461
2462 switch (rs6000_sched_insert_nops)
2463 {
2464 case sched_finish_regroup_exact:
2465 nop_str = "sched_finish_regroup_exact";
2466 break;
2467
2468 case sched_finish_pad_groups:
2469 nop_str = "sched_finish_pad_groups";
2470 break;
2471
2472 case sched_finish_none:
2473 nop_str = "sched_finish_none";
2474 break;
2475
2476 default:
2477 nop_str = nop_num;
2478 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2479 break;
2480 }
2481
2482 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2483
2484 switch (rs6000_sdata)
2485 {
2486 default:
2487 case SDATA_NONE:
2488 break;
2489
2490 case SDATA_DATA:
2491 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2492 break;
2493
2494 case SDATA_SYSV:
2495 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2496 break;
2497
2498 case SDATA_EABI:
2499 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2500 break;
2501
2502 }
2503
2504 switch (rs6000_traceback)
2505 {
2506 case traceback_default: trace_str = "default"; break;
2507 case traceback_none: trace_str = "none"; break;
2508 case traceback_part: trace_str = "part"; break;
2509 case traceback_full: trace_str = "full"; break;
2510 default: trace_str = "unknown"; break;
2511 }
2512
2513 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2514
2515 switch (rs6000_current_cmodel)
2516 {
2517 case CMODEL_SMALL: cmodel_str = "small"; break;
2518 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2519 case CMODEL_LARGE: cmodel_str = "large"; break;
2520 default: cmodel_str = "unknown"; break;
2521 }
2522
2523 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2524
2525 switch (rs6000_current_abi)
2526 {
2527 case ABI_NONE: abi_str = "none"; break;
2528 case ABI_AIX: abi_str = "aix"; break;
2529 case ABI_ELFv2: abi_str = "ELFv2"; break;
2530 case ABI_V4: abi_str = "V4"; break;
2531 case ABI_DARWIN: abi_str = "darwin"; break;
2532 default: abi_str = "unknown"; break;
2533 }
2534
2535 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2536
2537 if (rs6000_altivec_abi)
2538 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2539
2540 if (rs6000_aix_extabi)
2541 fprintf (stderr, DEBUG_FMT_S, "AIX vec-extabi", "true");
2542
2543 if (rs6000_darwin64_abi)
2544 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2545
2546 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2547 (TARGET_SOFT_FLOAT ? "true" : "false"));
2548
2549 if (TARGET_LINK_STACK)
2550 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2551
2552 if (TARGET_P8_FUSION)
2553 {
2554 char options[80];
2555
2556 strcpy (options, "power8");
2557 if (TARGET_P8_FUSION_SIGN)
2558 strcat (options, ", sign");
2559
2560 fprintf (stderr, DEBUG_FMT_S, "fusion", options);
2561 }
2562
2563 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2564 TARGET_SECURE_PLT ? "secure" : "bss");
2565 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2566 aix_struct_return ? "aix" : "sysv");
2567 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2568 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2569 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2570 tf[!!rs6000_align_branch_targets]);
2571 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2572 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2573 rs6000_long_double_type_size);
2574 if (rs6000_long_double_type_size > 64)
2575 {
2576 fprintf (stderr, DEBUG_FMT_S, "long double type",
2577 TARGET_IEEEQUAD ? "IEEE" : "IBM");
2578 fprintf (stderr, DEBUG_FMT_S, "default long double type",
2579 TARGET_IEEEQUAD_DEFAULT ? "IEEE" : "IBM");
2580 }
2581 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2582 (int)rs6000_sched_restricted_insns_priority);
2583 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2584 (int)END_BUILTINS);
2585
2586 fprintf (stderr, DEBUG_FMT_D, "Enable float128 on VSX",
2587 (int)TARGET_FLOAT128_ENABLE_TYPE);
2588
2589 if (TARGET_VSX)
2590 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2591 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2592
2593 if (TARGET_DIRECT_MOVE_128)
2594 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element",
2595 (int)VECTOR_ELEMENT_MFVSRLD_64BIT);
2596 }
2597
2598 \f
2599 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2600 legitimate address support to figure out the appropriate addressing to
2601 use. */
2602
2603 static void
2604 rs6000_setup_reg_addr_masks (void)
2605 {
2606 ssize_t rc, reg, m, nregs;
2607 addr_mask_type any_addr_mask, addr_mask;
2608
2609 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2610 {
2611 machine_mode m2 = (machine_mode) m;
2612 bool complex_p = false;
2613 bool small_int_p = (m2 == QImode || m2 == HImode || m2 == SImode);
2614 size_t msize;
2615
2616 if (COMPLEX_MODE_P (m2))
2617 {
2618 complex_p = true;
2619 m2 = GET_MODE_INNER (m2);
2620 }
2621
2622 msize = GET_MODE_SIZE (m2);
2623
2624 /* SDmode is special in that we want to access it only via REG+REG
2625 addressing on power7 and above, since we want to use the LFIWZX and
2626 STFIWZX instructions to load it. */
2627 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2628
2629 any_addr_mask = 0;
2630 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2631 {
2632 addr_mask = 0;
2633 reg = reload_reg_map[rc].reg;
2634
2635 /* Can mode values go in the GPR/FPR/Altivec registers? */
2636 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2637 {
2638 bool small_int_vsx_p = (small_int_p
2639 && (rc == RELOAD_REG_FPR
2640 || rc == RELOAD_REG_VMX));
2641
2642 nregs = rs6000_hard_regno_nregs[m][reg];
2643 addr_mask |= RELOAD_REG_VALID;
2644
2645 /* Indicate if the mode takes more than 1 physical register. If
2646 it takes a single register, indicate it can do REG+REG
2647 addressing. Small integers in VSX registers can only do
2648 REG+REG addressing. */
2649 if (small_int_vsx_p)
2650 addr_mask |= RELOAD_REG_INDEXED;
2651 else if (nregs > 1 || m == BLKmode || complex_p)
2652 addr_mask |= RELOAD_REG_MULTIPLE;
2653 else
2654 addr_mask |= RELOAD_REG_INDEXED;
2655
2656 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2657 addressing. If we allow scalars into Altivec registers,
2658 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY.
2659
2660 For VSX systems, we don't allow update addressing for
2661 DFmode/SFmode if those registers can go in both the
2662 traditional floating point registers and Altivec registers.
2663 The load/store instructions for the Altivec registers do not
2664 have update forms. If we allowed update addressing, it seems
2665 to break IV-OPT code using floating point if the index type is
2666 int instead of long (PR target/81550 and target/84042). */
2667
2668 if (TARGET_UPDATE
2669 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2670 && msize <= 8
2671 && !VECTOR_MODE_P (m2)
2672 && !VECTOR_ALIGNMENT_P (m2)
2673 && !complex_p
2674 && (m != E_DFmode || !TARGET_VSX)
2675 && (m != E_SFmode || !TARGET_P8_VECTOR)
2676 && !small_int_vsx_p)
2677 {
2678 addr_mask |= RELOAD_REG_PRE_INCDEC;
2679
2680 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2681 we don't allow PRE_MODIFY for some multi-register
2682 operations. */
2683 switch (m)
2684 {
2685 default:
2686 addr_mask |= RELOAD_REG_PRE_MODIFY;
2687 break;
2688
2689 case E_DImode:
2690 if (TARGET_POWERPC64)
2691 addr_mask |= RELOAD_REG_PRE_MODIFY;
2692 break;
2693
2694 case E_DFmode:
2695 case E_DDmode:
2696 if (TARGET_HARD_FLOAT)
2697 addr_mask |= RELOAD_REG_PRE_MODIFY;
2698 break;
2699 }
2700 }
2701 }
2702
2703 /* GPR and FPR registers can do REG+OFFSET addressing, except
2704 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
2705 for 64-bit scalars and 32-bit SFmode to altivec registers. */
2706 if ((addr_mask != 0) && !indexed_only_p
2707 && msize <= 8
2708 && (rc == RELOAD_REG_GPR
2709 || ((msize == 8 || m2 == SFmode)
2710 && (rc == RELOAD_REG_FPR
2711 || (rc == RELOAD_REG_VMX && TARGET_P9_VECTOR)))))
2712 addr_mask |= RELOAD_REG_OFFSET;
2713
2714 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2715 instructions are enabled. The offset for 128-bit VSX registers is
2716 only 12-bits. While GPRs can handle the full offset range, VSX
2717 registers can only handle the restricted range. */
2718 else if ((addr_mask != 0) && !indexed_only_p
2719 && msize == 16 && TARGET_P9_VECTOR
2720 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2)
2721 || (m2 == TImode && TARGET_VSX)))
2722 {
2723 addr_mask |= RELOAD_REG_OFFSET;
2724 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2725 addr_mask |= RELOAD_REG_QUAD_OFFSET;
2726 }
2727
2728 /* Vector pairs can do both indexed and offset loads if the
2729 instructions are enabled, otherwise they can only do offset loads
2730 since it will be broken into two vector moves. Vector quads can
2731 only do offset loads. */
2732 else if ((addr_mask != 0) && TARGET_MMA
2733 && (m2 == OOmode || m2 == XOmode))
2734 {
2735 addr_mask |= RELOAD_REG_OFFSET;
2736 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2737 {
2738 addr_mask |= RELOAD_REG_QUAD_OFFSET;
2739 if (m2 == OOmode)
2740 addr_mask |= RELOAD_REG_INDEXED;
2741 }
2742 }
2743
2744 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2745 addressing on 128-bit types. */
2746 if (rc == RELOAD_REG_VMX && msize == 16
2747 && (addr_mask & RELOAD_REG_VALID) != 0)
2748 addr_mask |= RELOAD_REG_AND_M16;
2749
2750 reg_addr[m].addr_mask[rc] = addr_mask;
2751 any_addr_mask |= addr_mask;
2752 }
2753
2754 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2755 }
2756 }
2757
2758 \f
2759 /* Initialize the various global tables that are based on register size. */
2760 static void
2761 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2762 {
2763 ssize_t r, m, c;
2764 int align64;
2765 int align32;
2766
2767 /* Precalculate REGNO_REG_CLASS. */
2768 rs6000_regno_regclass[0] = GENERAL_REGS;
2769 for (r = 1; r < 32; ++r)
2770 rs6000_regno_regclass[r] = BASE_REGS;
2771
2772 for (r = 32; r < 64; ++r)
2773 rs6000_regno_regclass[r] = FLOAT_REGS;
2774
2775 for (r = 64; HARD_REGISTER_NUM_P (r); ++r)
2776 rs6000_regno_regclass[r] = NO_REGS;
2777
2778 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2779 rs6000_regno_regclass[r] = ALTIVEC_REGS;
2780
2781 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2782 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
2783 rs6000_regno_regclass[r] = CR_REGS;
2784
2785 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
2786 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
2787 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
2788 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
2789 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
2790 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
2791 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
2792
2793 /* Precalculate register class to simpler reload register class. We don't
2794 need all of the register classes that are combinations of different
2795 classes, just the simple ones that have constraint letters. */
2796 for (c = 0; c < N_REG_CLASSES; c++)
2797 reg_class_to_reg_type[c] = NO_REG_TYPE;
2798
2799 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
2800 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
2801 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
2802 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
2803 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
2804 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
2805 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
2806 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
2807 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
2808 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
2809
2810 if (TARGET_VSX)
2811 {
2812 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
2813 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
2814 }
2815 else
2816 {
2817 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
2818 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
2819 }
2820
2821 /* Precalculate the valid memory formats as well as the vector information,
2822 this must be set up before the rs6000_hard_regno_nregs_internal calls
2823 below. */
2824 gcc_assert ((int)VECTOR_NONE == 0);
2825 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
2826 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_mem));
2827
2828 gcc_assert ((int)CODE_FOR_nothing == 0);
2829 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
2830
2831 gcc_assert ((int)NO_REGS == 0);
2832 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
2833
2834 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2835 believes it can use native alignment or still uses 128-bit alignment. */
2836 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
2837 {
2838 align64 = 64;
2839 align32 = 32;
2840 }
2841 else
2842 {
2843 align64 = 128;
2844 align32 = 128;
2845 }
2846
2847 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
2848 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
2849 if (TARGET_FLOAT128_TYPE)
2850 {
2851 rs6000_vector_mem[KFmode] = VECTOR_VSX;
2852 rs6000_vector_align[KFmode] = 128;
2853
2854 if (FLOAT128_IEEE_P (TFmode))
2855 {
2856 rs6000_vector_mem[TFmode] = VECTOR_VSX;
2857 rs6000_vector_align[TFmode] = 128;
2858 }
2859 }
2860
2861 /* V2DF mode, VSX only. */
2862 if (TARGET_VSX)
2863 {
2864 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
2865 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
2866 rs6000_vector_align[V2DFmode] = align64;
2867 }
2868
2869 /* V4SF mode, either VSX or Altivec. */
2870 if (TARGET_VSX)
2871 {
2872 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
2873 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
2874 rs6000_vector_align[V4SFmode] = align32;
2875 }
2876 else if (TARGET_ALTIVEC)
2877 {
2878 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
2879 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
2880 rs6000_vector_align[V4SFmode] = align32;
2881 }
2882
2883 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2884 and stores. */
2885 if (TARGET_ALTIVEC)
2886 {
2887 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
2888 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
2889 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
2890 rs6000_vector_align[V4SImode] = align32;
2891 rs6000_vector_align[V8HImode] = align32;
2892 rs6000_vector_align[V16QImode] = align32;
2893
2894 if (TARGET_VSX)
2895 {
2896 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
2897 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
2898 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
2899 }
2900 else
2901 {
2902 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
2903 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
2904 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
2905 }
2906 }
2907
2908 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
2909 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
2910 if (TARGET_VSX)
2911 {
2912 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
2913 rs6000_vector_unit[V2DImode]
2914 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2915 rs6000_vector_align[V2DImode] = align64;
2916
2917 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
2918 rs6000_vector_unit[V1TImode]
2919 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2920 rs6000_vector_align[V1TImode] = 128;
2921 }
2922
2923 /* DFmode, see if we want to use the VSX unit. Memory is handled
2924 differently, so don't set rs6000_vector_mem. */
2925 if (TARGET_VSX)
2926 {
2927 rs6000_vector_unit[DFmode] = VECTOR_VSX;
2928 rs6000_vector_align[DFmode] = 64;
2929 }
2930
2931 /* SFmode, see if we want to use the VSX unit. */
2932 if (TARGET_P8_VECTOR)
2933 {
2934 rs6000_vector_unit[SFmode] = VECTOR_VSX;
2935 rs6000_vector_align[SFmode] = 32;
2936 }
2937
2938 /* Allow TImode in VSX register and set the VSX memory macros. */
2939 if (TARGET_VSX)
2940 {
2941 rs6000_vector_mem[TImode] = VECTOR_VSX;
2942 rs6000_vector_align[TImode] = align64;
2943 }
2944
2945 /* Add support for vector pairs and vector quad registers. */
2946 if (TARGET_MMA)
2947 {
2948 rs6000_vector_unit[OOmode] = VECTOR_NONE;
2949 rs6000_vector_mem[OOmode] = VECTOR_VSX;
2950 rs6000_vector_align[OOmode] = 256;
2951
2952 rs6000_vector_unit[XOmode] = VECTOR_NONE;
2953 rs6000_vector_mem[XOmode] = VECTOR_VSX;
2954 rs6000_vector_align[XOmode] = 512;
2955 }
2956
2957 /* Register class constraints for the constraints that depend on compile
2958 switches. When the VSX code was added, different constraints were added
2959 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
2960 of the VSX registers are used. The register classes for scalar floating
2961 point types is set, based on whether we allow that type into the upper
2962 (Altivec) registers. GCC has register classes to target the Altivec
2963 registers for load/store operations, to select using a VSX memory
2964 operation instead of the traditional floating point operation. The
2965 constraints are:
2966
2967 d - Register class to use with traditional DFmode instructions.
2968 v - Altivec register.
2969 wa - Any VSX register.
2970 wc - Reserved to represent individual CR bits (used in LLVM).
2971 wn - always NO_REGS.
2972 wr - GPR if 64-bit mode is permitted.
2973 wx - Float register if we can do 32-bit int stores. */
2974
2975 if (TARGET_HARD_FLOAT)
2976 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS;
2977 if (TARGET_ALTIVEC)
2978 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
2979 if (TARGET_VSX)
2980 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
2981
2982 if (TARGET_POWERPC64)
2983 {
2984 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
2985 rs6000_constraints[RS6000_CONSTRAINT_wA] = BASE_REGS;
2986 }
2987
2988 if (TARGET_STFIWX)
2989 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
2990
2991 /* Support for new direct moves (ISA 3.0 + 64bit). */
2992 if (TARGET_DIRECT_MOVE_128)
2993 rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
2994
2995 /* Set up the reload helper and direct move functions. */
2996 if (TARGET_VSX || TARGET_ALTIVEC)
2997 {
2998 if (TARGET_64BIT)
2999 {
3000 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
3001 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
3002 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
3003 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
3004 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
3005 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
3006 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
3007 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
3008 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
3009 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
3010 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
3011 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
3012 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
3013 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
3014 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
3015 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
3016 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
3017 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
3018 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
3019 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
3020
3021 if (FLOAT128_VECTOR_P (KFmode))
3022 {
3023 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store;
3024 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load;
3025 }
3026
3027 if (FLOAT128_VECTOR_P (TFmode))
3028 {
3029 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store;
3030 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load;
3031 }
3032
3033 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3034 available. */
3035 if (TARGET_NO_SDMODE_STACK)
3036 {
3037 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
3038 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
3039 }
3040
3041 if (TARGET_VSX)
3042 {
3043 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
3044 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
3045 }
3046
3047 if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128)
3048 {
3049 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
3050 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
3051 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
3052 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
3053 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
3054 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
3055 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
3056 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
3057 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
3058
3059 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
3060 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
3061 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
3062 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
3063 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
3064 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
3065 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
3066 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
3067 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
3068
3069 if (FLOAT128_VECTOR_P (KFmode))
3070 {
3071 reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf;
3072 reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf;
3073 }
3074
3075 if (FLOAT128_VECTOR_P (TFmode))
3076 {
3077 reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf;
3078 reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf;
3079 }
3080
3081 if (TARGET_MMA)
3082 {
3083 reg_addr[OOmode].reload_store = CODE_FOR_reload_oo_di_store;
3084 reg_addr[OOmode].reload_load = CODE_FOR_reload_oo_di_load;
3085 reg_addr[XOmode].reload_store = CODE_FOR_reload_xo_di_store;
3086 reg_addr[XOmode].reload_load = CODE_FOR_reload_xo_di_load;
3087 }
3088 }
3089 }
3090 else
3091 {
3092 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
3093 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
3094 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
3095 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
3096 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
3097 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
3098 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
3099 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
3100 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
3101 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
3102 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
3103 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
3104 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
3105 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
3106 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
3107 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
3108 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
3109 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
3110 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
3111 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
3112
3113 if (FLOAT128_VECTOR_P (KFmode))
3114 {
3115 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store;
3116 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load;
3117 }
3118
3119 if (FLOAT128_IEEE_P (TFmode))
3120 {
3121 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store;
3122 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load;
3123 }
3124
3125 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3126 available. */
3127 if (TARGET_NO_SDMODE_STACK)
3128 {
3129 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
3130 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
3131 }
3132
3133 if (TARGET_VSX)
3134 {
3135 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
3136 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
3137 }
3138
3139 if (TARGET_DIRECT_MOVE)
3140 {
3141 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
3142 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
3143 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
3144 }
3145 }
3146
3147 reg_addr[DFmode].scalar_in_vmx_p = true;
3148 reg_addr[DImode].scalar_in_vmx_p = true;
3149
3150 if (TARGET_P8_VECTOR)
3151 {
3152 reg_addr[SFmode].scalar_in_vmx_p = true;
3153 reg_addr[SImode].scalar_in_vmx_p = true;
3154
3155 if (TARGET_P9_VECTOR)
3156 {
3157 reg_addr[HImode].scalar_in_vmx_p = true;
3158 reg_addr[QImode].scalar_in_vmx_p = true;
3159 }
3160 }
3161 }
3162
3163 /* Precalculate HARD_REGNO_NREGS. */
3164 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3165 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3166 rs6000_hard_regno_nregs[m][r]
3167 = rs6000_hard_regno_nregs_internal (r, (machine_mode) m);
3168
3169 /* Precalculate TARGET_HARD_REGNO_MODE_OK. */
3170 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3171 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3172 rs6000_hard_regno_mode_ok_p[m][r]
3173 = rs6000_hard_regno_mode_ok_uncached (r, (machine_mode) m);
3174
3175 /* Precalculate CLASS_MAX_NREGS sizes. */
3176 for (c = 0; c < LIM_REG_CLASSES; ++c)
3177 {
3178 int reg_size;
3179
3180 if (TARGET_VSX && VSX_REG_CLASS_P (c))
3181 reg_size = UNITS_PER_VSX_WORD;
3182
3183 else if (c == ALTIVEC_REGS)
3184 reg_size = UNITS_PER_ALTIVEC_WORD;
3185
3186 else if (c == FLOAT_REGS)
3187 reg_size = UNITS_PER_FP_WORD;
3188
3189 else
3190 reg_size = UNITS_PER_WORD;
3191
3192 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3193 {
3194 machine_mode m2 = (machine_mode)m;
3195 int reg_size2 = reg_size;
3196
3197 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3198 in VSX. */
3199 if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m))
3200 reg_size2 = UNITS_PER_FP_WORD;
3201
3202 rs6000_class_max_nregs[m][c]
3203 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3204 }
3205 }
3206
3207 /* Calculate which modes to automatically generate code to use a the
3208 reciprocal divide and square root instructions. In the future, possibly
3209 automatically generate the instructions even if the user did not specify
3210 -mrecip. The older machines double precision reciprocal sqrt estimate is
3211 not accurate enough. */
3212 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3213 if (TARGET_FRES)
3214 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3215 if (TARGET_FRE)
3216 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3217 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3218 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3219 if (VECTOR_UNIT_VSX_P (V2DFmode))
3220 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3221
3222 if (TARGET_FRSQRTES)
3223 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3224 if (TARGET_FRSQRTE)
3225 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3226 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3227 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3228 if (VECTOR_UNIT_VSX_P (V2DFmode))
3229 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3230
3231 if (rs6000_recip_control)
3232 {
3233 if (!flag_finite_math_only)
3234 warning (0, "%qs requires %qs or %qs", "-mrecip", "-ffinite-math",
3235 "-ffast-math");
3236 if (flag_trapping_math)
3237 warning (0, "%qs requires %qs or %qs", "-mrecip",
3238 "-fno-trapping-math", "-ffast-math");
3239 if (!flag_reciprocal_math)
3240 warning (0, "%qs requires %qs or %qs", "-mrecip", "-freciprocal-math",
3241 "-ffast-math");
3242 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3243 {
3244 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3245 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3246 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3247
3248 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3249 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3250 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3251
3252 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3253 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3254 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3255
3256 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3257 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3258 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3259
3260 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3261 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3262 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3263
3264 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3265 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3266 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3267
3268 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3269 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3270 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3271
3272 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3273 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3274 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3275 }
3276 }
3277
3278 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3279 legitimate address support to figure out the appropriate addressing to
3280 use. */
3281 rs6000_setup_reg_addr_masks ();
3282
3283 if (global_init_p || TARGET_DEBUG_TARGET)
3284 {
3285 if (TARGET_DEBUG_REG)
3286 rs6000_debug_reg_global ();
3287
3288 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3289 fprintf (stderr,
3290 "SImode variable mult cost = %d\n"
3291 "SImode constant mult cost = %d\n"
3292 "SImode short constant mult cost = %d\n"
3293 "DImode multipliciation cost = %d\n"
3294 "SImode division cost = %d\n"
3295 "DImode division cost = %d\n"
3296 "Simple fp operation cost = %d\n"
3297 "DFmode multiplication cost = %d\n"
3298 "SFmode division cost = %d\n"
3299 "DFmode division cost = %d\n"
3300 "cache line size = %d\n"
3301 "l1 cache size = %d\n"
3302 "l2 cache size = %d\n"
3303 "simultaneous prefetches = %d\n"
3304 "\n",
3305 rs6000_cost->mulsi,
3306 rs6000_cost->mulsi_const,
3307 rs6000_cost->mulsi_const9,
3308 rs6000_cost->muldi,
3309 rs6000_cost->divsi,
3310 rs6000_cost->divdi,
3311 rs6000_cost->fp,
3312 rs6000_cost->dmul,
3313 rs6000_cost->sdiv,
3314 rs6000_cost->ddiv,
3315 rs6000_cost->cache_line_size,
3316 rs6000_cost->l1_cache_size,
3317 rs6000_cost->l2_cache_size,
3318 rs6000_cost->simultaneous_prefetches);
3319 }
3320 }
3321
3322 #if TARGET_MACHO
3323 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3324
3325 static void
3326 darwin_rs6000_override_options (void)
3327 {
3328 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3329 off. */
3330 rs6000_altivec_abi = 1;
3331 TARGET_ALTIVEC_VRSAVE = 1;
3332 rs6000_current_abi = ABI_DARWIN;
3333
3334 if (DEFAULT_ABI == ABI_DARWIN
3335 && TARGET_64BIT)
3336 darwin_one_byte_bool = 1;
3337
3338 if (TARGET_64BIT && ! TARGET_POWERPC64)
3339 {
3340 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3341 warning (0, "%qs requires PowerPC64 architecture, enabling", "-m64");
3342 }
3343
3344 /* The linkers [ld64] that support 64Bit do not need the JBSR longcall
3345 optimisation, and will not work with the most generic case (where the
3346 symbol is undefined external, but there is no symbl stub). */
3347 if (TARGET_64BIT)
3348 rs6000_default_long_calls = 0;
3349
3350 /* ld_classic is (so far) still used for kernel (static) code, and supports
3351 the JBSR longcall / branch islands. */
3352 if (flag_mkernel)
3353 {
3354 rs6000_default_long_calls = 1;
3355
3356 /* Allow a kext author to do -mkernel -mhard-float. */
3357 if (! (rs6000_isa_flags_explicit & OPTION_MASK_SOFT_FLOAT))
3358 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3359 }
3360
3361 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3362 Altivec. */
3363 if (!flag_mkernel && !flag_apple_kext
3364 && TARGET_64BIT
3365 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3366 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3367
3368 /* Unless the user (not the configurer) has explicitly overridden
3369 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3370 G4 unless targeting the kernel. */
3371 if (!flag_mkernel
3372 && !flag_apple_kext
3373 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3374 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3375 && ! OPTION_SET_P (rs6000_cpu_index))
3376 {
3377 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3378 }
3379 }
3380 #endif
3381
3382 /* If not otherwise specified by a target, make 'long double' equivalent to
3383 'double'. */
3384
3385 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3386 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3387 #endif
3388
3389 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3390 to clobber the XER[CA] bit because clobbering that bit without telling
3391 the compiler worked just fine with versions of GCC before GCC 5, and
3392 breaking a lot of older code in ways that are hard to track down is
3393 not such a great idea. */
3394
3395 static rtx_insn *
3396 rs6000_md_asm_adjust (vec<rtx> & /*outputs*/, vec<rtx> & /*inputs*/,
3397 vec<machine_mode> & /*input_modes*/,
3398 vec<const char *> & /*constraints*/,
3399 vec<rtx> &/*uses*/, vec<rtx> &clobbers,
3400 HARD_REG_SET &clobbered_regs, location_t /*loc*/)
3401 {
3402 clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
3403 SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
3404 return NULL;
3405 }
3406
3407 /* This target function is similar to the hook TARGET_OPTION_OVERRIDE
3408 but is called when the optimize level is changed via an attribute or
3409 pragma or when it is reset at the end of the code affected by the
3410 attribute or pragma. It is not called at the beginning of compilation
3411 when TARGET_OPTION_OVERRIDE is called so if you want to perform these
3412 actions then, you should have TARGET_OPTION_OVERRIDE call
3413 TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE. */
3414
3415 static void
3416 rs6000_override_options_after_change (void)
3417 {
3418 /* Explicit -funroll-loops turns -munroll-only-small-loops off, and
3419 turns -frename-registers on. */
3420 if ((OPTION_SET_P (flag_unroll_loops) && flag_unroll_loops)
3421 || (OPTION_SET_P (flag_unroll_all_loops)
3422 && flag_unroll_all_loops))
3423 {
3424 if (!OPTION_SET_P (unroll_only_small_loops))
3425 unroll_only_small_loops = 0;
3426 if (!OPTION_SET_P (flag_rename_registers))
3427 flag_rename_registers = 1;
3428 if (!OPTION_SET_P (flag_cunroll_grow_size))
3429 flag_cunroll_grow_size = 1;
3430 }
3431 else if (!OPTION_SET_P (flag_cunroll_grow_size))
3432 flag_cunroll_grow_size = flag_peel_loops || optimize >= 3;
3433
3434 /* If we are inserting ROP-protect instructions, disable shrink wrap. */
3435 if (rs6000_rop_protect)
3436 flag_shrink_wrap = 0;
3437
3438 /* One of the late-combine passes runs after register allocation
3439 and can match define_insn_and_splits that were previously used
3440 only before register allocation. Some of those define_insn_and_splits
3441 use gen_reg_rtx unconditionally. Disable late-combine by default
3442 until the define_insn_and_splits are fixed. */
3443 if (!OPTION_SET_P (flag_late_combine_instructions))
3444 flag_late_combine_instructions = 0;
3445 }
3446
3447 #ifdef TARGET_USES_LINUX64_OPT
3448 static void
3449 rs6000_linux64_override_options ()
3450 {
3451 if (!OPTION_SET_P (rs6000_alignment_flags))
3452 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
3453 if (rs6000_isa_flags & OPTION_MASK_64BIT)
3454 {
3455 if (DEFAULT_ABI != ABI_AIX)
3456 {
3457 rs6000_current_abi = ABI_AIX;
3458 error (INVALID_64BIT, "call");
3459 }
3460 dot_symbols = !strcmp (rs6000_abi_name, "aixdesc");
3461 if (ELFv2_ABI_CHECK)
3462 {
3463 rs6000_current_abi = ABI_ELFv2;
3464 if (dot_symbols)
3465 error ("%<-mcall-aixdesc%> incompatible with %<-mabi=elfv2%>");
3466 }
3467 if (rs6000_isa_flags & OPTION_MASK_RELOCATABLE)
3468 {
3469 rs6000_isa_flags &= ~OPTION_MASK_RELOCATABLE;
3470 error (INVALID_64BIT, "relocatable");
3471 }
3472 if (rs6000_isa_flags & OPTION_MASK_EABI)
3473 {
3474 rs6000_isa_flags &= ~OPTION_MASK_EABI;
3475 error (INVALID_64BIT, "eabi");
3476 }
3477 if (TARGET_PROTOTYPE)
3478 {
3479 target_prototype = 0;
3480 error (INVALID_64BIT, "prototype");
3481 }
3482 if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) == 0)
3483 {
3484 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3485 error ("%<-m64%> requires a PowerPC64 cpu");
3486 }
3487 if (!OPTION_SET_P (rs6000_current_cmodel))
3488 SET_CMODEL (CMODEL_MEDIUM);
3489 if ((rs6000_isa_flags_explicit & OPTION_MASK_MINIMAL_TOC) != 0)
3490 {
3491 if (OPTION_SET_P (rs6000_current_cmodel)
3492 && rs6000_current_cmodel != CMODEL_SMALL)
3493 error ("%<-mcmodel%> incompatible with other toc options");
3494 if (TARGET_MINIMAL_TOC)
3495 SET_CMODEL (CMODEL_SMALL);
3496 else if (TARGET_PCREL
3497 || (PCREL_SUPPORTED_BY_OS
3498 && (rs6000_isa_flags_explicit & OPTION_MASK_PCREL) == 0))
3499 /* Ignore -mno-minimal-toc. */
3500 ;
3501 else
3502 SET_CMODEL (CMODEL_SMALL);
3503 }
3504 if (rs6000_current_cmodel != CMODEL_SMALL)
3505 {
3506 if (!OPTION_SET_P (TARGET_NO_FP_IN_TOC))
3507 TARGET_NO_FP_IN_TOC = rs6000_current_cmodel == CMODEL_MEDIUM;
3508 if (!OPTION_SET_P (TARGET_NO_SUM_IN_TOC))
3509 TARGET_NO_SUM_IN_TOC = 0;
3510 }
3511 if (TARGET_PLTSEQ && DEFAULT_ABI != ABI_ELFv2)
3512 {
3513 if (OPTION_SET_P (rs6000_pltseq))
3514 warning (0, "%qs unsupported for this ABI",
3515 "-mpltseq");
3516 rs6000_pltseq = false;
3517 }
3518 }
3519 else if (TARGET_64BIT)
3520 error (INVALID_32BIT, "32");
3521 else
3522 {
3523 if (TARGET_PROFILE_KERNEL)
3524 {
3525 profile_kernel = 0;
3526 error (INVALID_32BIT, "profile-kernel");
3527 }
3528 if (OPTION_SET_P (rs6000_current_cmodel))
3529 {
3530 SET_CMODEL (CMODEL_SMALL);
3531 error (INVALID_32BIT, "cmodel");
3532 }
3533 }
3534 }
3535 #endif
3536
3537 /* Return true if we are using GLIBC, and it supports IEEE 128-bit long double.
3538 This support is only in little endian GLIBC 2.32 or newer. */
3539 static bool
3540 glibc_supports_ieee_128bit (void)
3541 {
3542 #ifdef OPTION_GLIBC
3543 if (OPTION_GLIBC && !BYTES_BIG_ENDIAN
3544 && ((TARGET_GLIBC_MAJOR * 1000) + TARGET_GLIBC_MINOR) >= 2032)
3545 return true;
3546 #endif /* OPTION_GLIBC. */
3547
3548 return false;
3549 }
3550
3551 /* Override command line options.
3552
3553 Combine build-specific configuration information with options
3554 specified on the command line to set various state variables which
3555 influence code generation, optimization, and expansion of built-in
3556 functions. Assure that command-line configuration preferences are
3557 compatible with each other and with the build configuration; issue
3558 warnings while adjusting configuration or error messages while
3559 rejecting configuration.
3560
3561 Upon entry to this function:
3562
3563 This function is called once at the beginning of
3564 compilation, and then again at the start and end of compiling
3565 each section of code that has a different configuration, as
3566 indicated, for example, by adding the
3567
3568 __attribute__((__target__("cpu=power9")))
3569
3570 qualifier to a function definition or, for example, by bracketing
3571 code between
3572
3573 #pragma GCC target("altivec")
3574
3575 and
3576
3577 #pragma GCC reset_options
3578
3579 directives. Parameter global_init_p is true for the initial
3580 invocation, which initializes global variables, and false for all
3581 subsequent invocations.
3582
3583
3584 Various global state information is assumed to be valid. This
3585 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
3586 default CPU specified at build configure time, TARGET_DEFAULT,
3587 representing the default set of option flags for the default
3588 target, and OPTION_SET_P (rs6000_isa_flags), representing
3589 which options were requested on the command line.
3590
3591 Upon return from this function:
3592
3593 rs6000_isa_flags_explicit has a non-zero bit for each flag that
3594 was set by name on the command line. Additionally, if certain
3595 attributes are automatically enabled or disabled by this function
3596 in order to assure compatibility between options and
3597 configuration, the flags associated with those attributes are
3598 also set. By setting these "explicit bits", we avoid the risk
3599 that other code might accidentally overwrite these particular
3600 attributes with "default values".
3601
3602 The various bits of rs6000_isa_flags are set to indicate the
3603 target options that have been selected for the most current
3604 compilation efforts. This has the effect of also turning on the
3605 associated TARGET_XXX values since these are macros which are
3606 generally defined to test the corresponding bit of the
3607 rs6000_isa_flags variable.
3608
3609 Various other global variables and fields of global structures
3610 (over 50 in all) are initialized to reflect the desired options
3611 for the most current compilation efforts. */
3612
3613 static bool
3614 rs6000_option_override_internal (bool global_init_p)
3615 {
3616 bool ret = true;
3617
3618 HOST_WIDE_INT set_masks;
3619 HOST_WIDE_INT ignore_masks;
3620 int cpu_index = -1;
3621 int tune_index;
3622 struct cl_target_option *main_target_opt
3623 = ((global_init_p || target_option_default_node == NULL)
3624 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3625
3626 /* Print defaults. */
3627 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
3628 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
3629
3630 /* Remember the explicit arguments. */
3631 if (global_init_p)
3632 rs6000_isa_flags_explicit = OPTION_SET_P (rs6000_isa_flags);
3633
3634 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3635 library functions, so warn about it. The flag may be useful for
3636 performance studies from time to time though, so don't disable it
3637 entirely. */
3638 if (OPTION_SET_P (rs6000_alignment_flags)
3639 && rs6000_alignment_flags == MASK_ALIGN_POWER
3640 && DEFAULT_ABI == ABI_DARWIN
3641 && TARGET_64BIT)
3642 warning (0, "%qs is not supported for 64-bit Darwin;"
3643 " it is incompatible with the installed C and C++ libraries",
3644 "-malign-power");
3645
3646 /* Numerous experiment shows that IRA based loop pressure
3647 calculation works better for RTL loop invariant motion on targets
3648 with enough (>= 32) registers. It is an expensive optimization.
3649 So it is on only for peak performance. */
3650 if (optimize >= 3 && global_init_p
3651 && !OPTION_SET_P (flag_ira_loop_pressure))
3652 flag_ira_loop_pressure = 1;
3653
3654 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
3655 for tracebacks to be complete but not if any -fasynchronous-unwind-tables
3656 options were already specified. */
3657 if (flag_sanitize & SANITIZE_USER_ADDRESS
3658 && !OPTION_SET_P (flag_asynchronous_unwind_tables))
3659 flag_asynchronous_unwind_tables = 1;
3660
3661 /* -fvariable-expansion-in-unroller is a win for POWER whenever the
3662 loop unroller is active. It is only checked during unrolling, so
3663 we can just set it on by default. */
3664 if (!OPTION_SET_P (flag_variable_expansion_in_unroller))
3665 flag_variable_expansion_in_unroller = 1;
3666
3667 /* Set the pointer size. */
3668 if (TARGET_64BIT)
3669 {
3670 rs6000_pmode = DImode;
3671 rs6000_pointer_size = 64;
3672 }
3673 else
3674 {
3675 rs6000_pmode = SImode;
3676 rs6000_pointer_size = 32;
3677 }
3678
3679 /* Some OSs don't support saving Altivec registers. On those OSs, we don't
3680 touch the OPTION_MASK_ALTIVEC settings; if the user wants it, the user
3681 must explicitly specify it and we won't interfere with the user's
3682 specification. */
3683
3684 set_masks = POWERPC_MASKS;
3685 #ifdef OS_MISSING_ALTIVEC
3686 if (OS_MISSING_ALTIVEC)
3687 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX
3688 | OTHER_VSX_VECTOR_MASKS);
3689 #endif
3690
3691 /* Don't override by the processor default if given explicitly. */
3692 set_masks &= ~rs6000_isa_flags_explicit;
3693
3694 /* Without option powerpc64 specified explicitly, we need to ensure
3695 powerpc64 always enabled for 64 bit here, otherwise some following
3696 checks can use unexpected TARGET_POWERPC64 value. */
3697 if (!(rs6000_isa_flags_explicit & OPTION_MASK_POWERPC64)
3698 && TARGET_64BIT)
3699 {
3700 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3701 /* Need to stop powerpc64 from being unset in later processing,
3702 so clear it in set_masks. But as PR108240 shows, to keep it
3703 consistent with before, we want to make this only if 64 bit
3704 is enabled explicitly. This is a hack, revisit this later. */
3705 if (rs6000_isa_flags_explicit & OPTION_MASK_64BIT)
3706 set_masks &= ~OPTION_MASK_POWERPC64;
3707 }
3708
3709 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3710 the cpu in a target attribute or pragma, but did not specify a tuning
3711 option, use the cpu for the tuning option rather than the option specified
3712 with -mtune on the command line. Process a '--with-cpu' configuration
3713 request as an implicit --cpu. */
3714 if (rs6000_cpu_index >= 0)
3715 cpu_index = rs6000_cpu_index;
3716 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3717 cpu_index = main_target_opt->x_rs6000_cpu_index;
3718 else if (OPTION_TARGET_CPU_DEFAULT)
3719 cpu_index = rs6000_cpu_name_lookup (OPTION_TARGET_CPU_DEFAULT);
3720
3721 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3722 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3723 with those from the cpu, except for options that were explicitly set. If
3724 we don't have a cpu, do not override the target bits set in
3725 TARGET_DEFAULT. */
3726 if (cpu_index >= 0)
3727 {
3728 rs6000_cpu_index = cpu_index;
3729 rs6000_isa_flags &= ~set_masks;
3730 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3731 & set_masks);
3732 }
3733 else
3734 {
3735 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3736 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3737 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3738 to using rs6000_isa_flags, we need to do the initialization here.
3739
3740 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
3741 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
3742 HOST_WIDE_INT flags;
3743 if (TARGET_DEFAULT)
3744 flags = TARGET_DEFAULT;
3745 else
3746 {
3747 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3748 const char *default_cpu = (!TARGET_POWERPC64
3749 ? "powerpc"
3750 : (BYTES_BIG_ENDIAN
3751 ? "powerpc64"
3752 : "powerpc64le"));
3753 int default_cpu_index = rs6000_cpu_name_lookup (default_cpu);
3754 flags = processor_target_table[default_cpu_index].target_enable;
3755 }
3756 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
3757 }
3758
3759 /* Don't expect powerpc64 enabled on those OSes with OS_MISSING_POWERPC64,
3760 since they do not save and restore the high half of the GPRs correctly
3761 in all cases. If the user explicitly specifies it, we won't interfere
3762 with the user's specification. */
3763 #ifdef OS_MISSING_POWERPC64
3764 if (OS_MISSING_POWERPC64
3765 && TARGET_32BIT
3766 && TARGET_POWERPC64
3767 && !(rs6000_isa_flags_explicit & OPTION_MASK_POWERPC64))
3768 rs6000_isa_flags &= ~OPTION_MASK_POWERPC64;
3769 #endif
3770
3771 if (rs6000_tune_index >= 0)
3772 tune_index = rs6000_tune_index;
3773 else if (cpu_index >= 0)
3774 rs6000_tune_index = tune_index = cpu_index;
3775 else
3776 {
3777 size_t i;
3778 enum processor_type tune_proc
3779 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
3780
3781 tune_index = -1;
3782 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3783 if (processor_target_table[i].processor == tune_proc)
3784 {
3785 tune_index = i;
3786 break;
3787 }
3788 }
3789
3790 if (cpu_index >= 0)
3791 rs6000_cpu = processor_target_table[cpu_index].processor;
3792 else
3793 rs6000_cpu = TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT;
3794
3795 gcc_assert (tune_index >= 0);
3796 rs6000_tune = processor_target_table[tune_index].processor;
3797
3798 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
3799 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
3800 || rs6000_cpu == PROCESSOR_PPCE5500)
3801 {
3802 if (TARGET_ALTIVEC)
3803 error ("AltiVec not supported in this target");
3804 }
3805
3806 /* If we are optimizing big endian systems for space, use the load/store
3807 multiple instructions. */
3808 if (BYTES_BIG_ENDIAN && optimize_size)
3809 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE;
3810
3811 /* Don't allow -mmultiple on little endian systems unless the cpu is a 750,
3812 because the hardware doesn't support the instructions used in little
3813 endian mode, and causes an alignment trap. The 750 does not cause an
3814 alignment trap (except when the target is unaligned). */
3815
3816 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750 && TARGET_MULTIPLE)
3817 {
3818 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
3819 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
3820 warning (0, "%qs is not supported on little endian systems",
3821 "-mmultiple");
3822 }
3823
3824 /* If little-endian, default to -mstrict-align on older processors. */
3825 if (!BYTES_BIG_ENDIAN
3826 && !(processor_target_table[tune_index].target_enable
3827 & OPTION_MASK_POWER8))
3828 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
3829
3830 /* Add some warnings for VSX. */
3831 if (TARGET_VSX)
3832 {
3833 bool explicit_vsx_p = rs6000_isa_flags_explicit & OPTION_MASK_VSX;
3834 if (!TARGET_HARD_FLOAT)
3835 {
3836 if (explicit_vsx_p)
3837 {
3838 if (rs6000_isa_flags_explicit & OPTION_MASK_SOFT_FLOAT)
3839 error ("%<-mvsx%> and %<-msoft-float%> are incompatible");
3840 else
3841 warning (0, N_("%<-mvsx%> requires hardware floating-point"));
3842 }
3843 rs6000_isa_flags &= ~OPTION_MASK_VSX;
3844 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3845 }
3846 else if (TARGET_AVOID_XFORM > 0)
3847 {
3848 if (explicit_vsx_p && OPTION_SET_P (TARGET_AVOID_XFORM))
3849 error ("%<-mvsx%> and %<-mavoid-indexed-addresses%>"
3850 " are incompatible");
3851 else
3852 warning (0, N_("%<-mvsx%> needs indexed addressing"));
3853 rs6000_isa_flags &= ~OPTION_MASK_VSX;
3854 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3855 }
3856 else if (!TARGET_ALTIVEC
3857 && (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3858 {
3859 if (explicit_vsx_p)
3860 error ("%<-mvsx%> and %<-mno-altivec%> are incompatible");
3861 else
3862 warning (0, N_("%<-mno-altivec%> disables vsx"));
3863 rs6000_isa_flags &= ~OPTION_MASK_VSX;
3864 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3865 }
3866 }
3867
3868 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
3869 the -mcpu setting to enable options that conflict. */
3870 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
3871 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
3872 | OPTION_MASK_ALTIVEC
3873 | OPTION_MASK_VSX)) != 0)
3874 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO)
3875 & ~rs6000_isa_flags_explicit);
3876
3877 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3878 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
3879
3880 #ifdef XCOFF_DEBUGGING_INFO
3881 /* For AIX default to 64-bit DWARF. */
3882 if (!OPTION_SET_P (dwarf_offset_size))
3883 dwarf_offset_size = POINTER_SIZE_UNITS;
3884 #endif
3885
3886 /* Handle explicit -mno-{altivec,vsx} and turn off all of
3887 the options that depend on those flags. */
3888 ignore_masks = rs6000_disable_incompatible_switches ();
3889
3890 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
3891 unless the user explicitly used the -mno-<option> to disable the code. */
3892 if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_MISC)
3893 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3894 else if (TARGET_P9_MINMAX)
3895 {
3896 if (cpu_index >= 0)
3897 {
3898 if (cpu_index == PROCESSOR_POWER9)
3899 {
3900 /* legacy behavior: allow -mcpu=power9 with certain
3901 capabilities explicitly disabled. */
3902 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3903 }
3904 else
3905 error ("power9 target option is incompatible with %<%s=<xxx>%> "
3906 "for <xxx> less than power9", "-mcpu");
3907 }
3908 else if ((ISA_3_0_MASKS_SERVER & rs6000_isa_flags_explicit)
3909 != (ISA_3_0_MASKS_SERVER & rs6000_isa_flags
3910 & rs6000_isa_flags_explicit))
3911 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
3912 were explicitly cleared. */
3913 error ("%qs incompatible with explicitly disabled options",
3914 "-mpower9-minmax");
3915 else
3916 rs6000_isa_flags |= ISA_3_0_MASKS_SERVER;
3917 }
3918 else if (TARGET_P8_VECTOR || TARGET_POWER8 || TARGET_CRYPTO)
3919 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~ignore_masks);
3920 else if (TARGET_VSX)
3921 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~ignore_masks);
3922 else if (TARGET_POPCNTD)
3923 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~ignore_masks);
3924 else if (TARGET_DFP)
3925 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks);
3926 else if (TARGET_CMPB)
3927 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks);
3928 else if (TARGET_FPRND)
3929 rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks);
3930 else if (TARGET_POPCNTB)
3931 rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks);
3932 else if (TARGET_ALTIVEC)
3933 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~ignore_masks);
3934
3935 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
3936 target attribute or pragma which automatically enables both options,
3937 unless the altivec ABI was set. This is set by default for 64-bit, but
3938 not for 32-bit. Don't move this before the above code using ignore_masks,
3939 since it can reset the cleared VSX/ALTIVEC flag again. */
3940 if (main_target_opt && !main_target_opt->x_rs6000_altivec_abi)
3941 {
3942 rs6000_isa_flags &= ~(OPTION_MASK_VSX & ~rs6000_isa_flags_explicit);
3943 /* Don't mask off ALTIVEC if it is enabled by an explicit VSX. */
3944 if (!TARGET_VSX)
3945 rs6000_isa_flags &= ~(OPTION_MASK_ALTIVEC & ~rs6000_isa_flags_explicit);
3946 }
3947
3948 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
3949 {
3950 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
3951 error ("%qs requires %qs", "-mcrypto", "-maltivec");
3952 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
3953 }
3954
3955 if (!TARGET_FPRND && TARGET_VSX)
3956 {
3957 if (rs6000_isa_flags_explicit & OPTION_MASK_FPRND)
3958 /* TARGET_VSX = 1 implies Power 7 and newer */
3959 error ("%qs requires %qs", "-mvsx", "-mfprnd");
3960 rs6000_isa_flags &= ~OPTION_MASK_FPRND;
3961 }
3962
3963 /* Assert !TARGET_VSX if !TARGET_ALTIVEC and make some adjustments
3964 based on either !TARGET_VSX or !TARGET_ALTIVEC concise. */
3965 gcc_assert (TARGET_ALTIVEC || !TARGET_VSX);
3966
3967 if (TARGET_P8_VECTOR && !TARGET_VSX)
3968 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3969
3970 if (TARGET_DFP && !TARGET_HARD_FLOAT)
3971 {
3972 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
3973 error ("%qs requires %qs", "-mhard-dfp", "-mhard-float");
3974 rs6000_isa_flags &= ~OPTION_MASK_DFP;
3975 }
3976
3977 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
3978 silently turn off quad memory mode. */
3979 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
3980 {
3981 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3982 warning (0, N_("%<-mquad-memory%> requires 64-bit mode"));
3983
3984 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
3985 warning (0, N_("%<-mquad-memory-atomic%> requires 64-bit mode"));
3986
3987 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
3988 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
3989 }
3990
3991 /* Non-atomic quad memory load/store are disabled for little endian, since
3992 the words are reversed, but atomic operations can still be done by
3993 swapping the words. */
3994 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
3995 {
3996 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3997 warning (0, N_("%<-mquad-memory%> is not available in little endian "
3998 "mode"));
3999
4000 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
4001 }
4002
4003 /* Assume if the user asked for normal quad memory instructions, they want
4004 the atomic versions as well, unless they explicity told us not to use quad
4005 word atomic instructions. */
4006 if (TARGET_QUAD_MEMORY
4007 && !TARGET_QUAD_MEMORY_ATOMIC
4008 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
4009 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
4010
4011 /* If we can shrink-wrap the TOC register save separately, then use
4012 -msave-toc-indirect unless explicitly disabled. */
4013 if ((rs6000_isa_flags_explicit & OPTION_MASK_SAVE_TOC_INDIRECT) == 0
4014 && flag_shrink_wrap_separate
4015 && optimize_function_for_speed_p (cfun))
4016 rs6000_isa_flags |= OPTION_MASK_SAVE_TOC_INDIRECT;
4017
4018 /* Enable power8 fusion if we are tuning for power8, even if we aren't
4019 generating power8 instructions. Power9 does not optimize power8 fusion
4020 cases. */
4021 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
4022 {
4023 if (processor_target_table[tune_index].processor == PROCESSOR_POWER8)
4024 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4025 else
4026 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
4027 }
4028
4029 /* Setting additional fusion flags turns on base fusion. */
4030 if (!TARGET_P8_FUSION && TARGET_P8_FUSION_SIGN)
4031 {
4032 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4033 {
4034 if (TARGET_P8_FUSION_SIGN)
4035 error ("%qs requires %qs", "-mpower8-fusion-sign",
4036 "-mpower8-fusion");
4037
4038 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
4039 }
4040 else
4041 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4042 }
4043
4044 /* Power8 does not fuse sign extended loads with the addis. If we are
4045 optimizing at high levels for speed, convert a sign extended load into a
4046 zero extending load, and an explicit sign extension. */
4047 if (TARGET_P8_FUSION
4048 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
4049 && optimize_function_for_speed_p (cfun)
4050 && optimize >= 3)
4051 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
4052
4053 /* ISA 3.0 vector instructions include ISA 2.07. */
4054 if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
4055 rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR;
4056
4057 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4058 support. If we only have ISA 2.06 support, and the user did not specify
4059 the switch, leave it set to -1 so the movmisalign patterns are enabled,
4060 but we don't enable the full vectorization support */
4061 if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
4062 TARGET_ALLOW_MOVMISALIGN = 1;
4063
4064 else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
4065 {
4066 if (TARGET_ALLOW_MOVMISALIGN > 0
4067 && OPTION_SET_P (TARGET_ALLOW_MOVMISALIGN))
4068 error ("%qs requires %qs", "-mallow-movmisalign", "-mvsx");
4069
4070 TARGET_ALLOW_MOVMISALIGN = 0;
4071 }
4072
4073 /* Determine when unaligned vector accesses are permitted, and when
4074 they are preferred over masked Altivec loads. Note that if
4075 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4076 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
4077 not true. */
4078 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4079 {
4080 if (!TARGET_VSX)
4081 {
4082 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4083 error ("%qs requires %qs", "-mefficient-unaligned-vsx", "-mvsx");
4084
4085 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4086 }
4087
4088 else if (!TARGET_ALLOW_MOVMISALIGN)
4089 {
4090 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4091 error ("%qs requires %qs", "-munefficient-unaligned-vsx",
4092 "-mallow-movmisalign");
4093
4094 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4095 }
4096 }
4097
4098 if (!(rs6000_isa_flags_explicit & OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX))
4099 {
4100 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4101 rs6000_isa_flags |= OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX;
4102 else
4103 rs6000_isa_flags &= ~OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX;
4104 }
4105
4106 /* Use long double size to select the appropriate long double. We use
4107 TYPE_PRECISION to differentiate the 3 different long double types. We map
4108 128 into the precision used for TFmode. */
4109 int default_long_double_size = (RS6000_DEFAULT_LONG_DOUBLE_SIZE == 64
4110 ? 64
4111 : 128);
4112
4113 /* Set long double size before the IEEE 128-bit tests. */
4114 if (!OPTION_SET_P (rs6000_long_double_type_size))
4115 {
4116 if (main_target_opt != NULL
4117 && (main_target_opt->x_rs6000_long_double_type_size
4118 != default_long_double_size))
4119 error ("target attribute or pragma changes %<long double%> size");
4120 else
4121 rs6000_long_double_type_size = default_long_double_size;
4122 }
4123
4124 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
4125 systems will also set long double to be IEEE 128-bit. AIX and Darwin
4126 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
4127 those systems will not pick up this default. Warn if the user changes the
4128 default unless -Wno-psabi. */
4129 if (!OPTION_SET_P (rs6000_ieeequad))
4130 rs6000_ieeequad = TARGET_IEEEQUAD_DEFAULT;
4131
4132 else if (TARGET_LONG_DOUBLE_128)
4133 {
4134 if (global_options.x_rs6000_ieeequad
4135 && (!TARGET_POPCNTD || !TARGET_VSX))
4136 error ("%qs requires full ISA 2.06 support", "-mabi=ieeelongdouble");
4137
4138 if (rs6000_ieeequad != TARGET_IEEEQUAD_DEFAULT)
4139 {
4140 /* Determine if the user can change the default long double type at
4141 compilation time. You need GLIBC 2.32 or newer to be able to
4142 change the long double type. Only issue one warning. */
4143 static bool warned_change_long_double;
4144
4145 if (!warned_change_long_double && !glibc_supports_ieee_128bit ())
4146 {
4147 warned_change_long_double = true;
4148 if (TARGET_IEEEQUAD)
4149 warning (OPT_Wpsabi, "Using IEEE extended precision "
4150 "%<long double%>");
4151 else
4152 warning (OPT_Wpsabi, "Using IBM extended precision "
4153 "%<long double%>");
4154 }
4155 }
4156 }
4157
4158 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
4159 sytems. In GCC 7, we would enable the IEEE 128-bit floating point
4160 infrastructure (-mfloat128-type) but not enable the actual __float128 type
4161 unless the user used the explicit -mfloat128. In GCC 8, we enable both
4162 the keyword as well as the type. */
4163 TARGET_FLOAT128_TYPE = TARGET_FLOAT128_ENABLE_TYPE && TARGET_VSX;
4164
4165 /* IEEE 128-bit floating point requires VSX support. */
4166 if (TARGET_FLOAT128_KEYWORD)
4167 {
4168 if (!TARGET_VSX)
4169 {
4170 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
4171 error ("%qs requires VSX support", "-mfloat128");
4172
4173 TARGET_FLOAT128_TYPE = 0;
4174 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_KEYWORD
4175 | OPTION_MASK_FLOAT128_HW);
4176 }
4177 else if (!TARGET_FLOAT128_TYPE)
4178 {
4179 TARGET_FLOAT128_TYPE = 1;
4180 warning (0, "The %<-mfloat128%> option may not be fully supported");
4181 }
4182 }
4183
4184 /* Enable the __float128 keyword under Linux by default. */
4185 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_KEYWORD
4186 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) == 0)
4187 rs6000_isa_flags |= OPTION_MASK_FLOAT128_KEYWORD;
4188
4189 /* If we have are supporting the float128 type and full ISA 3.0 support,
4190 enable -mfloat128-hardware by default. However, don't enable the
4191 __float128 keyword if it was explicitly turned off. 64-bit mode is needed
4192 because sometimes the compiler wants to put things in an integer
4193 container, and if we don't have __int128 support, it is impossible. */
4194 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW && TARGET_64BIT
4195 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) == ISA_3_0_MASKS_IEEE
4196 && !(rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW))
4197 rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
4198
4199 if (TARGET_FLOAT128_HW
4200 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) != ISA_3_0_MASKS_IEEE)
4201 {
4202 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4203 error ("%qs requires full ISA 3.0 support", "%<-mfloat128-hardware%>");
4204
4205 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4206 }
4207
4208 if (TARGET_FLOAT128_HW && !TARGET_64BIT)
4209 {
4210 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4211 error ("%qs requires %qs", "%<-mfloat128-hardware%>", "-m64");
4212
4213 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4214 }
4215
4216 /* Enable -mprefixed by default on power10 systems. */
4217 if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED) == 0)
4218 rs6000_isa_flags |= OPTION_MASK_PREFIXED;
4219
4220 /* -mprefixed requires -mcpu=power10 (or later). */
4221 else if (TARGET_PREFIXED && !TARGET_POWER10)
4222 {
4223 if ((rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED) != 0)
4224 error ("%qs requires %qs", "-mprefixed", "-mcpu=power10");
4225
4226 rs6000_isa_flags &= ~OPTION_MASK_PREFIXED;
4227 }
4228
4229 /* -mpcrel requires prefixed load/store addressing. */
4230 if (TARGET_PCREL && !TARGET_PREFIXED)
4231 {
4232 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4233 error ("%qs requires %qs", "-mpcrel", "-mprefixed");
4234
4235 rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4236 }
4237
4238 /* Print the options after updating the defaults. */
4239 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4240 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
4241
4242 /* E500mc does "better" if we inline more aggressively. Respect the
4243 user's opinion, though. */
4244 if (rs6000_block_move_inline_limit == 0
4245 && (rs6000_tune == PROCESSOR_PPCE500MC
4246 || rs6000_tune == PROCESSOR_PPCE500MC64
4247 || rs6000_tune == PROCESSOR_PPCE5500
4248 || rs6000_tune == PROCESSOR_PPCE6500))
4249 rs6000_block_move_inline_limit = 128;
4250
4251 /* store_one_arg depends on expand_block_move to handle at least the
4252 size of reg_parm_stack_space. */
4253 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
4254 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
4255
4256 if (global_init_p)
4257 {
4258 /* If the appropriate debug option is enabled, replace the target hooks
4259 with debug versions that call the real version and then prints
4260 debugging information. */
4261 if (TARGET_DEBUG_COST)
4262 {
4263 targetm.rtx_costs = rs6000_debug_rtx_costs;
4264 targetm.address_cost = rs6000_debug_address_cost;
4265 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
4266 }
4267
4268 if (TARGET_DEBUG_ADDR)
4269 {
4270 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
4271 targetm.legitimize_address = rs6000_debug_legitimize_address;
4272 rs6000_secondary_reload_class_ptr
4273 = rs6000_debug_secondary_reload_class;
4274 targetm.secondary_memory_needed
4275 = rs6000_debug_secondary_memory_needed;
4276 targetm.can_change_mode_class
4277 = rs6000_debug_can_change_mode_class;
4278 rs6000_preferred_reload_class_ptr
4279 = rs6000_debug_preferred_reload_class;
4280 rs6000_mode_dependent_address_ptr
4281 = rs6000_debug_mode_dependent_address;
4282 }
4283
4284 if (rs6000_veclibabi_name)
4285 {
4286 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
4287 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
4288 else
4289 {
4290 error ("unknown vectorization library ABI type in "
4291 "%<-mveclibabi=%s%>", rs6000_veclibabi_name);
4292 ret = false;
4293 }
4294 }
4295 }
4296
4297 /* Enable Altivec ABI for AIX -maltivec. */
4298 if (TARGET_XCOFF
4299 && (TARGET_ALTIVEC || TARGET_VSX)
4300 && !OPTION_SET_P (rs6000_altivec_abi))
4301 {
4302 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4303 error ("target attribute or pragma changes AltiVec ABI");
4304 else
4305 rs6000_altivec_abi = 1;
4306 }
4307
4308 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4309 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4310 be explicitly overridden in either case. */
4311 if (TARGET_ELF)
4312 {
4313 if (!OPTION_SET_P (rs6000_altivec_abi)
4314 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
4315 {
4316 if (main_target_opt != NULL &&
4317 !main_target_opt->x_rs6000_altivec_abi)
4318 error ("target attribute or pragma changes AltiVec ABI");
4319 else
4320 rs6000_altivec_abi = 1;
4321 }
4322 }
4323
4324 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4325 So far, the only darwin64 targets are also MACH-O. */
4326 if (TARGET_MACHO
4327 && DEFAULT_ABI == ABI_DARWIN
4328 && TARGET_64BIT)
4329 {
4330 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
4331 error ("target attribute or pragma changes darwin64 ABI");
4332 else
4333 {
4334 rs6000_darwin64_abi = 1;
4335 /* Default to natural alignment, for better performance. */
4336 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
4337 }
4338 }
4339
4340 /* Place FP constants in the constant pool instead of TOC
4341 if section anchors enabled. */
4342 if (flag_section_anchors
4343 && !OPTION_SET_P (TARGET_NO_FP_IN_TOC))
4344 TARGET_NO_FP_IN_TOC = 1;
4345
4346 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4347 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
4348
4349 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4350 SUBTARGET_OVERRIDE_OPTIONS;
4351 #endif
4352 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4353 SUBSUBTARGET_OVERRIDE_OPTIONS;
4354 #endif
4355 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4356 SUB3TARGET_OVERRIDE_OPTIONS;
4357 #endif
4358
4359 /* If the ABI has support for PC-relative relocations, enable it by default.
4360 This test depends on the sub-target tests above setting the code model to
4361 medium for ELF v2 systems. */
4362 if (PCREL_SUPPORTED_BY_OS
4363 && (rs6000_isa_flags_explicit & OPTION_MASK_PCREL) == 0)
4364 rs6000_isa_flags |= OPTION_MASK_PCREL;
4365
4366 /* -mpcrel requires -mcmodel=medium, but we can't check TARGET_CMODEL until
4367 after the subtarget override options are done. */
4368 else if (TARGET_PCREL && TARGET_CMODEL != CMODEL_MEDIUM)
4369 {
4370 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4371 error ("%qs requires %qs", "-mpcrel", "-mcmodel=medium");
4372
4373 rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4374 }
4375
4376 /* Enable -mmma by default on power10 systems. */
4377 if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_MMA) == 0)
4378 rs6000_isa_flags |= OPTION_MASK_MMA;
4379
4380 /* Turn off vector pair/mma options on non-power10 systems. */
4381 else if (!TARGET_POWER10 && TARGET_MMA)
4382 {
4383 if ((rs6000_isa_flags_explicit & OPTION_MASK_MMA) != 0)
4384 error ("%qs requires %qs", "-mmma", "-mcpu=power10");
4385
4386 rs6000_isa_flags &= ~OPTION_MASK_MMA;
4387 }
4388
4389 /* Enable power10 fusion if we are tuning for power10, even if we aren't
4390 generating power10 instructions. */
4391 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION))
4392 {
4393 if (rs6000_tune == PROCESSOR_POWER10
4394 || rs6000_tune == PROCESSOR_POWER11)
4395 rs6000_isa_flags |= OPTION_MASK_P10_FUSION;
4396 else
4397 rs6000_isa_flags &= ~OPTION_MASK_P10_FUSION;
4398 }
4399
4400 /* MMA requires SIMD support as ISA 3.1 claims and our implementation
4401 such as "*movoo" uses vector pair access which use VSX registers.
4402 So make MMA require VSX support here. */
4403 if (TARGET_MMA && !TARGET_VSX)
4404 {
4405 if ((rs6000_isa_flags_explicit & OPTION_MASK_MMA) != 0)
4406 error ("%qs requires %qs", "-mmma", "-mvsx");
4407 rs6000_isa_flags &= ~OPTION_MASK_MMA;
4408 }
4409
4410 if (!TARGET_PCREL && TARGET_PCREL_OPT)
4411 rs6000_isa_flags &= ~OPTION_MASK_PCREL_OPT;
4412
4413 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4414 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
4415
4416 rs6000_always_hint = (rs6000_tune != PROCESSOR_POWER4
4417 && rs6000_tune != PROCESSOR_POWER5
4418 && rs6000_tune != PROCESSOR_POWER6
4419 && rs6000_tune != PROCESSOR_POWER7
4420 && rs6000_tune != PROCESSOR_POWER8
4421 && rs6000_tune != PROCESSOR_POWER9
4422 && rs6000_tune != PROCESSOR_POWER10
4423 && rs6000_tune != PROCESSOR_POWER11
4424 && rs6000_tune != PROCESSOR_PPCA2
4425 && rs6000_tune != PROCESSOR_CELL
4426 && rs6000_tune != PROCESSOR_PPC476);
4427 rs6000_sched_groups = (rs6000_tune == PROCESSOR_POWER4
4428 || rs6000_tune == PROCESSOR_POWER5
4429 || rs6000_tune == PROCESSOR_POWER7
4430 || rs6000_tune == PROCESSOR_POWER8);
4431 rs6000_align_branch_targets = (rs6000_tune == PROCESSOR_POWER4
4432 || rs6000_tune == PROCESSOR_POWER5
4433 || rs6000_tune == PROCESSOR_POWER6
4434 || rs6000_tune == PROCESSOR_POWER7
4435 || rs6000_tune == PROCESSOR_POWER8
4436 || rs6000_tune == PROCESSOR_POWER9
4437 || rs6000_tune == PROCESSOR_POWER10
4438 || rs6000_tune == PROCESSOR_POWER11
4439 || rs6000_tune == PROCESSOR_PPCE500MC
4440 || rs6000_tune == PROCESSOR_PPCE500MC64
4441 || rs6000_tune == PROCESSOR_PPCE5500
4442 || rs6000_tune == PROCESSOR_PPCE6500);
4443
4444 /* Allow debug switches to override the above settings. These are set to -1
4445 in rs6000.opt to indicate the user hasn't directly set the switch. */
4446 if (TARGET_ALWAYS_HINT >= 0)
4447 rs6000_always_hint = TARGET_ALWAYS_HINT;
4448
4449 if (TARGET_SCHED_GROUPS >= 0)
4450 rs6000_sched_groups = TARGET_SCHED_GROUPS;
4451
4452 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
4453 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
4454
4455 rs6000_sched_restricted_insns_priority
4456 = (rs6000_sched_groups ? 1 : 0);
4457
4458 /* Handle -msched-costly-dep option. */
4459 rs6000_sched_costly_dep
4460 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
4461
4462 if (rs6000_sched_costly_dep_str)
4463 {
4464 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
4465 rs6000_sched_costly_dep = no_dep_costly;
4466 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
4467 rs6000_sched_costly_dep = all_deps_costly;
4468 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
4469 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
4470 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
4471 rs6000_sched_costly_dep = store_to_load_dep_costly;
4472 else
4473 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
4474 atoi (rs6000_sched_costly_dep_str));
4475 }
4476
4477 /* Handle -minsert-sched-nops option. */
4478 rs6000_sched_insert_nops
4479 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
4480
4481 if (rs6000_sched_insert_nops_str)
4482 {
4483 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
4484 rs6000_sched_insert_nops = sched_finish_none;
4485 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
4486 rs6000_sched_insert_nops = sched_finish_pad_groups;
4487 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
4488 rs6000_sched_insert_nops = sched_finish_regroup_exact;
4489 else
4490 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
4491 atoi (rs6000_sched_insert_nops_str));
4492 }
4493
4494 /* Handle stack protector */
4495 if (!OPTION_SET_P (rs6000_stack_protector_guard))
4496 #ifdef TARGET_THREAD_SSP_OFFSET
4497 rs6000_stack_protector_guard = SSP_TLS;
4498 #else
4499 rs6000_stack_protector_guard = SSP_GLOBAL;
4500 #endif
4501
4502 #ifdef TARGET_THREAD_SSP_OFFSET
4503 rs6000_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET;
4504 rs6000_stack_protector_guard_reg = TARGET_64BIT ? 13 : 2;
4505 #endif
4506
4507 if (OPTION_SET_P (rs6000_stack_protector_guard_offset_str))
4508 {
4509 char *endp;
4510 const char *str = rs6000_stack_protector_guard_offset_str;
4511
4512 errno = 0;
4513 long offset = strtol (str, &endp, 0);
4514 if (!*str || *endp || errno)
4515 error ("%qs is not a valid number in %qs", str,
4516 "-mstack-protector-guard-offset=");
4517
4518 if (!IN_RANGE (offset, -0x8000, 0x7fff)
4519 || (TARGET_64BIT && (offset & 3)))
4520 error ("%qs is not a valid offset in %qs", str,
4521 "-mstack-protector-guard-offset=");
4522
4523 rs6000_stack_protector_guard_offset = offset;
4524 }
4525
4526 if (OPTION_SET_P (rs6000_stack_protector_guard_reg_str))
4527 {
4528 const char *str = rs6000_stack_protector_guard_reg_str;
4529 int reg = decode_reg_name (str);
4530
4531 if (!IN_RANGE (reg, 1, 31))
4532 error ("%qs is not a valid base register in %qs", str,
4533 "-mstack-protector-guard-reg=");
4534
4535 rs6000_stack_protector_guard_reg = reg;
4536 }
4537
4538 if (rs6000_stack_protector_guard == SSP_TLS
4539 && !IN_RANGE (rs6000_stack_protector_guard_reg, 1, 31))
4540 error ("%qs needs a valid base register", "-mstack-protector-guard=tls");
4541
4542 if (global_init_p)
4543 {
4544 #ifdef TARGET_REGNAMES
4545 /* If the user desires alternate register names, copy in the
4546 alternate names now. */
4547 if (TARGET_REGNAMES)
4548 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
4549 #endif
4550
4551 /* Set aix_struct_return last, after the ABI is determined.
4552 If -maix-struct-return or -msvr4-struct-return was explicitly
4553 used, don't override with the ABI default. */
4554 if (!OPTION_SET_P (aix_struct_return))
4555 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
4556
4557 #if 0
4558 /* IBM XL compiler defaults to unsigned bitfields. */
4559 if (TARGET_XL_COMPAT)
4560 flag_signed_bitfields = 0;
4561 #endif
4562
4563 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
4564 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
4565
4566 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
4567
4568 /* We can only guarantee the availability of DI pseudo-ops when
4569 assembling for 64-bit targets. */
4570 if (!TARGET_64BIT)
4571 {
4572 targetm.asm_out.aligned_op.di = NULL;
4573 targetm.asm_out.unaligned_op.di = NULL;
4574 }
4575
4576
4577 /* Set branch target alignment, if not optimizing for size. */
4578 if (!optimize_size)
4579 {
4580 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4581 aligned 8byte to avoid misprediction by the branch predictor. */
4582 if (rs6000_tune == PROCESSOR_TITAN
4583 || rs6000_tune == PROCESSOR_CELL)
4584 {
4585 if (flag_align_functions && !str_align_functions)
4586 str_align_functions = "8";
4587 if (flag_align_jumps && !str_align_jumps)
4588 str_align_jumps = "8";
4589 if (flag_align_loops && !str_align_loops)
4590 str_align_loops = "8";
4591 }
4592 if (rs6000_align_branch_targets)
4593 {
4594 if (flag_align_functions && !str_align_functions)
4595 str_align_functions = "16";
4596 if (flag_align_jumps && !str_align_jumps)
4597 str_align_jumps = "16";
4598 if (flag_align_loops && !str_align_loops)
4599 {
4600 can_override_loop_align = 1;
4601 str_align_loops = "16";
4602 }
4603 }
4604 }
4605
4606 /* Arrange to save and restore machine status around nested functions. */
4607 init_machine_status = rs6000_init_machine_status;
4608
4609 /* We should always be splitting complex arguments, but we can't break
4610 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
4611 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
4612 targetm.calls.split_complex_arg = NULL;
4613
4614 /* The AIX and ELFv1 ABIs define standard function descriptors. */
4615 if (DEFAULT_ABI == ABI_AIX)
4616 targetm.calls.custom_function_descriptors = 0;
4617 }
4618
4619 /* Initialize rs6000_cost with the appropriate target costs. */
4620 if (optimize_size)
4621 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
4622 else
4623 switch (rs6000_tune)
4624 {
4625 case PROCESSOR_RS64A:
4626 rs6000_cost = &rs64a_cost;
4627 break;
4628
4629 case PROCESSOR_MPCCORE:
4630 rs6000_cost = &mpccore_cost;
4631 break;
4632
4633 case PROCESSOR_PPC403:
4634 rs6000_cost = &ppc403_cost;
4635 break;
4636
4637 case PROCESSOR_PPC405:
4638 rs6000_cost = &ppc405_cost;
4639 break;
4640
4641 case PROCESSOR_PPC440:
4642 rs6000_cost = &ppc440_cost;
4643 break;
4644
4645 case PROCESSOR_PPC476:
4646 rs6000_cost = &ppc476_cost;
4647 break;
4648
4649 case PROCESSOR_PPC601:
4650 rs6000_cost = &ppc601_cost;
4651 break;
4652
4653 case PROCESSOR_PPC603:
4654 rs6000_cost = &ppc603_cost;
4655 break;
4656
4657 case PROCESSOR_PPC604:
4658 rs6000_cost = &ppc604_cost;
4659 break;
4660
4661 case PROCESSOR_PPC604e:
4662 rs6000_cost = &ppc604e_cost;
4663 break;
4664
4665 case PROCESSOR_PPC620:
4666 rs6000_cost = &ppc620_cost;
4667 break;
4668
4669 case PROCESSOR_PPC630:
4670 rs6000_cost = &ppc630_cost;
4671 break;
4672
4673 case PROCESSOR_CELL:
4674 rs6000_cost = &ppccell_cost;
4675 break;
4676
4677 case PROCESSOR_PPC750:
4678 case PROCESSOR_PPC7400:
4679 rs6000_cost = &ppc750_cost;
4680 break;
4681
4682 case PROCESSOR_PPC7450:
4683 rs6000_cost = &ppc7450_cost;
4684 break;
4685
4686 case PROCESSOR_PPC8540:
4687 case PROCESSOR_PPC8548:
4688 rs6000_cost = &ppc8540_cost;
4689 break;
4690
4691 case PROCESSOR_PPCE300C2:
4692 case PROCESSOR_PPCE300C3:
4693 rs6000_cost = &ppce300c2c3_cost;
4694 break;
4695
4696 case PROCESSOR_PPCE500MC:
4697 rs6000_cost = &ppce500mc_cost;
4698 break;
4699
4700 case PROCESSOR_PPCE500MC64:
4701 rs6000_cost = &ppce500mc64_cost;
4702 break;
4703
4704 case PROCESSOR_PPCE5500:
4705 rs6000_cost = &ppce5500_cost;
4706 break;
4707
4708 case PROCESSOR_PPCE6500:
4709 rs6000_cost = &ppce6500_cost;
4710 break;
4711
4712 case PROCESSOR_TITAN:
4713 rs6000_cost = &titan_cost;
4714 break;
4715
4716 case PROCESSOR_POWER4:
4717 case PROCESSOR_POWER5:
4718 rs6000_cost = &power4_cost;
4719 break;
4720
4721 case PROCESSOR_POWER6:
4722 rs6000_cost = &power6_cost;
4723 break;
4724
4725 case PROCESSOR_POWER7:
4726 rs6000_cost = &power7_cost;
4727 break;
4728
4729 case PROCESSOR_POWER8:
4730 rs6000_cost = &power8_cost;
4731 break;
4732
4733 case PROCESSOR_POWER9:
4734 rs6000_cost = &power9_cost;
4735 break;
4736
4737 case PROCESSOR_POWER10:
4738 case PROCESSOR_POWER11:
4739 rs6000_cost = &power10_cost;
4740 break;
4741
4742 case PROCESSOR_PPCA2:
4743 rs6000_cost = &ppca2_cost;
4744 break;
4745
4746 default:
4747 gcc_unreachable ();
4748 }
4749
4750 if (global_init_p)
4751 {
4752 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4753 param_simultaneous_prefetches,
4754 rs6000_cost->simultaneous_prefetches);
4755 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4756 param_l1_cache_size,
4757 rs6000_cost->l1_cache_size);
4758 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4759 param_l1_cache_line_size,
4760 rs6000_cost->cache_line_size);
4761 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4762 param_l2_cache_size,
4763 rs6000_cost->l2_cache_size);
4764
4765 /* Increase loop peeling limits based on performance analysis. */
4766 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4767 param_max_peeled_insns, 400);
4768 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4769 param_max_completely_peeled_insns, 400);
4770
4771 /* The lxvl/stxvl instructions don't perform well before Power10. */
4772 if (TARGET_POWER10)
4773 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4774 param_vect_partial_vector_usage, 1);
4775 else
4776 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4777 param_vect_partial_vector_usage, 0);
4778
4779 /* Use the 'model' -fsched-pressure algorithm by default. */
4780 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4781 param_sched_pressure_algorithm,
4782 SCHED_PRESSURE_MODEL);
4783
4784 /* If using typedef char *va_list, signal that
4785 __builtin_va_start (&ap, 0) can be optimized to
4786 ap = __builtin_next_arg (0). */
4787 if (DEFAULT_ABI != ABI_V4)
4788 targetm.expand_builtin_va_start = NULL;
4789 }
4790
4791 rs6000_override_options_after_change ();
4792
4793 /* If not explicitly specified via option, decide whether to generate indexed
4794 load/store instructions. A value of -1 indicates that the
4795 initial value of this variable has not been overwritten. During
4796 compilation, TARGET_AVOID_XFORM is either 0 or 1. */
4797 if (TARGET_AVOID_XFORM == -1)
4798 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4799 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4800 need indexed accesses and the type used is the scalar type of the element
4801 being loaded or stored. */
4802 TARGET_AVOID_XFORM = (rs6000_tune == PROCESSOR_POWER6 && TARGET_CMPB
4803 && !TARGET_ALTIVEC);
4804
4805 /* Set the -mrecip options. */
4806 if (rs6000_recip_name)
4807 {
4808 char *p = ASTRDUP (rs6000_recip_name);
4809 char *q;
4810 unsigned int mask, i;
4811 bool invert;
4812
4813 while ((q = strtok (p, ",")) != NULL)
4814 {
4815 p = NULL;
4816 if (*q == '!')
4817 {
4818 invert = true;
4819 q++;
4820 }
4821 else
4822 invert = false;
4823
4824 if (!strcmp (q, "default"))
4825 mask = ((TARGET_RECIP_PRECISION)
4826 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
4827 else
4828 {
4829 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4830 if (!strcmp (q, recip_options[i].string))
4831 {
4832 mask = recip_options[i].mask;
4833 break;
4834 }
4835
4836 if (i == ARRAY_SIZE (recip_options))
4837 {
4838 error ("unknown option for %<%s=%s%>", "-mrecip", q);
4839 invert = false;
4840 mask = 0;
4841 ret = false;
4842 }
4843 }
4844
4845 if (invert)
4846 rs6000_recip_control &= ~mask;
4847 else
4848 rs6000_recip_control |= mask;
4849 }
4850 }
4851
4852 /* We only support ROP protection on certain targets. */
4853 if (rs6000_rop_protect)
4854 {
4855 /* Disallow CPU targets we don't support. */
4856 if (!TARGET_POWER8)
4857 error ("%<-mrop-protect%> requires %<-mcpu=power8%> or later");
4858
4859 /* Disallow ABI targets we don't support. */
4860 if (DEFAULT_ABI != ABI_ELFv2)
4861 error ("%<-mrop-protect%> requires the ELFv2 ABI");
4862 }
4863
4864 /* Initialize all of the registers. */
4865 rs6000_init_hard_regno_mode_ok (global_init_p);
4866
4867 /* Save the initial options in case the user does function specific options */
4868 if (global_init_p)
4869 target_option_default_node = target_option_current_node
4870 = build_target_option_node (&global_options, &global_options_set);
4871
4872 /* If not explicitly specified via option, decide whether to generate the
4873 extra blr's required to preserve the link stack on some cpus (eg, 476). */
4874 if (TARGET_LINK_STACK == -1)
4875 SET_TARGET_LINK_STACK (rs6000_tune == PROCESSOR_PPC476 && flag_pic);
4876
4877 /* Deprecate use of -mno-speculate-indirect-jumps. */
4878 if (!rs6000_speculate_indirect_jumps)
4879 warning (0, "%qs is deprecated and not recommended in any circumstances",
4880 "-mno-speculate-indirect-jumps");
4881
4882 return ret;
4883 }
4884
4885 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
4886 define the target cpu type. */
4887
4888 static void
4889 rs6000_option_override (void)
4890 {
4891 (void) rs6000_option_override_internal (true);
4892 }
4893
4894 \f
4895 /* Implement LOOP_ALIGN. */
4896 align_flags
4897 rs6000_loop_align (rtx label)
4898 {
4899 basic_block bb;
4900 int ninsns;
4901
4902 /* Don't override loop alignment if -falign-loops was specified. */
4903 if (!can_override_loop_align)
4904 return align_loops;
4905
4906 bb = BLOCK_FOR_INSN (label);
4907 ninsns = num_loop_insns(bb->loop_father);
4908
4909 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
4910 if (ninsns > 4 && ninsns <= 8
4911 && (rs6000_tune == PROCESSOR_POWER4
4912 || rs6000_tune == PROCESSOR_POWER5
4913 || rs6000_tune == PROCESSOR_POWER6
4914 || rs6000_tune == PROCESSOR_POWER7
4915 || rs6000_tune == PROCESSOR_POWER8))
4916 return align_flags (5);
4917 else
4918 return align_loops;
4919 }
4920
4921 /* Return true iff, data reference of TYPE can reach vector alignment (16)
4922 after applying N number of iterations. This routine does not determine
4923 how may iterations are required to reach desired alignment. */
4924
4925 static bool
4926 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
4927 {
4928 if (is_packed)
4929 return false;
4930
4931 if (TARGET_32BIT)
4932 {
4933 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
4934 return true;
4935
4936 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
4937 return true;
4938
4939 return false;
4940 }
4941 else
4942 {
4943 if (TARGET_MACHO)
4944 return false;
4945
4946 /* Assuming that all other types are naturally aligned. CHECKME! */
4947 return true;
4948 }
4949 }
4950
4951 /* Return true if the vector misalignment factor is supported by the
4952 target. */
4953 static bool
4954 rs6000_builtin_support_vector_misalignment (machine_mode mode,
4955 const_tree type,
4956 int misalignment,
4957 bool is_packed)
4958 {
4959 if (TARGET_VSX)
4960 {
4961 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4962 return true;
4963
4964 /* Return if movmisalign pattern is not supported for this mode. */
4965 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
4966 return false;
4967
4968 if (misalignment == -1)
4969 {
4970 /* Misalignment factor is unknown at compile time but we know
4971 it's word aligned. */
4972 if (rs6000_vector_alignment_reachable (type, is_packed))
4973 {
4974 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
4975
4976 if (element_size == 64 || element_size == 32)
4977 return true;
4978 }
4979
4980 return false;
4981 }
4982
4983 /* VSX supports word-aligned vector. */
4984 if (misalignment % 4 == 0)
4985 return true;
4986 }
4987 return false;
4988 }
4989
4990 /* Implement targetm.vectorize.builtin_vectorization_cost. */
4991 static int
4992 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4993 tree vectype, int misalign)
4994 {
4995 unsigned elements;
4996 tree elem_type;
4997
4998 switch (type_of_cost)
4999 {
5000 case scalar_stmt:
5001 case scalar_store:
5002 case vector_stmt:
5003 case vector_store:
5004 case vec_to_scalar:
5005 case scalar_to_vec:
5006 case cond_branch_not_taken:
5007 return 1;
5008 case scalar_load:
5009 case vector_load:
5010 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5011 return 2;
5012
5013 case vec_perm:
5014 /* Power7 has only one permute unit, make it a bit expensive. */
5015 if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7)
5016 return 3;
5017 else
5018 return 1;
5019
5020 case vec_promote_demote:
5021 /* Power7 has only one permute/pack unit, make it a bit expensive. */
5022 if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7)
5023 return 4;
5024 else
5025 return 1;
5026
5027 case cond_branch_taken:
5028 return 3;
5029
5030 case unaligned_load:
5031 case vector_gather_load:
5032 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5033 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5034 return 2;
5035
5036 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5037 {
5038 elements = TYPE_VECTOR_SUBPARTS (vectype);
5039 /* See PR102767, consider V1TI to keep consistency. */
5040 if (elements == 2 || elements == 1)
5041 /* Double word aligned. */
5042 return 4;
5043
5044 if (elements == 4)
5045 {
5046 switch (misalign)
5047 {
5048 case 8:
5049 /* Double word aligned. */
5050 return 4;
5051
5052 case -1:
5053 /* Unknown misalignment. */
5054 case 4:
5055 case 12:
5056 /* Word aligned. */
5057 return 33;
5058
5059 default:
5060 gcc_unreachable ();
5061 }
5062 }
5063 }
5064
5065 if (TARGET_ALTIVEC)
5066 /* Misaligned loads are not supported. */
5067 gcc_unreachable ();
5068
5069 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5070 return 4;
5071
5072 case unaligned_store:
5073 case vector_scatter_store:
5074 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5075 return 1;
5076
5077 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5078 {
5079 elements = TYPE_VECTOR_SUBPARTS (vectype);
5080 /* See PR102767, consider V1TI to keep consistency. */
5081 if (elements == 2 || elements == 1)
5082 /* Double word aligned. */
5083 return 2;
5084
5085 if (elements == 4)
5086 {
5087 switch (misalign)
5088 {
5089 case 8:
5090 /* Double word aligned. */
5091 return 2;
5092
5093 case -1:
5094 /* Unknown misalignment. */
5095 case 4:
5096 case 12:
5097 /* Word aligned. */
5098 return 23;
5099
5100 default:
5101 gcc_unreachable ();
5102 }
5103 }
5104 }
5105
5106 if (TARGET_ALTIVEC)
5107 /* Misaligned stores are not supported. */
5108 gcc_unreachable ();
5109
5110 return 2;
5111
5112 case vec_construct:
5113 /* This is a rough approximation assuming non-constant elements
5114 constructed into a vector via element insertion. FIXME:
5115 vec_construct is not granular enough for uniformly good
5116 decisions. If the initialization is a splat, this is
5117 cheaper than we estimate. Improve this someday. */
5118 elem_type = TREE_TYPE (vectype);
5119 /* 32-bit vectors loaded into registers are stored as double
5120 precision, so we need 2 permutes, 2 converts, and 1 merge
5121 to construct a vector of short floats from them. */
5122 if (SCALAR_FLOAT_TYPE_P (elem_type)
5123 && TYPE_PRECISION (elem_type) == 32)
5124 return 5;
5125 /* On POWER9, integer vector types are built up in GPRs and then
5126 use a direct move (2 cycles). For POWER8 this is even worse,
5127 as we need two direct moves and a merge, and the direct moves
5128 are five cycles. */
5129 else if (INTEGRAL_TYPE_P (elem_type))
5130 {
5131 if (TARGET_P9_VECTOR)
5132 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 2;
5133 else
5134 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 5;
5135 }
5136 else
5137 /* V2DFmode doesn't need a direct move. */
5138 return 2;
5139
5140 default:
5141 gcc_unreachable ();
5142 }
5143 }
5144
5145 /* Implement targetm.vectorize.preferred_simd_mode. */
5146
5147 static machine_mode
5148 rs6000_preferred_simd_mode (scalar_mode mode)
5149 {
5150 opt_machine_mode vmode = mode_for_vector (mode, 16 / GET_MODE_SIZE (mode));
5151
5152 if (vmode.exists () && !VECTOR_MEM_NONE_P (vmode.require ()))
5153 return vmode.require ();
5154
5155 return word_mode;
5156 }
5157
5158 class rs6000_cost_data : public vector_costs
5159 {
5160 public:
5161 using vector_costs::vector_costs;
5162
5163 unsigned int add_stmt_cost (int count, vect_cost_for_stmt kind,
5164 stmt_vec_info stmt_info, slp_tree, tree vectype,
5165 int misalign,
5166 vect_cost_model_location where) override;
5167 void finish_cost (const vector_costs *) override;
5168
5169 protected:
5170 void update_target_cost_per_stmt (vect_cost_for_stmt, stmt_vec_info,
5171 vect_cost_model_location, unsigned int);
5172 void density_test (loop_vec_info);
5173 void adjust_vect_cost_per_loop (loop_vec_info);
5174 unsigned int determine_suggested_unroll_factor (loop_vec_info);
5175
5176 /* Total number of vectorized stmts (loop only). */
5177 unsigned m_nstmts = 0;
5178 /* Total number of loads (loop only). */
5179 unsigned m_nloads = 0;
5180 /* Total number of stores (loop only). */
5181 unsigned m_nstores = 0;
5182 /* Reduction factor for suggesting unroll factor (loop only). */
5183 unsigned m_reduc_factor = 0;
5184 /* Possible extra penalized cost on vector construction (loop only). */
5185 unsigned m_extra_ctor_cost = 0;
5186 /* For each vectorized loop, this var holds TRUE iff a non-memory vector
5187 instruction is needed by the vectorization. */
5188 bool m_vect_nonmem = false;
5189 /* If this loop gets vectorized with emulated gather load. */
5190 bool m_gather_load = false;
5191 };
5192
5193 /* Test for likely overcommitment of vector hardware resources. If a
5194 loop iteration is relatively large, and too large a percentage of
5195 instructions in the loop are vectorized, the cost model may not
5196 adequately reflect delays from unavailable vector resources.
5197 Penalize the loop body cost for this case. */
5198
5199 void
5200 rs6000_cost_data::density_test (loop_vec_info loop_vinfo)
5201 {
5202 /* This density test only cares about the cost of vector version of the
5203 loop, so immediately return if we are passed costing for the scalar
5204 version (namely computing single scalar iteration cost). */
5205 if (m_costing_for_scalar)
5206 return;
5207
5208 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5209 basic_block *bbs = get_loop_body (loop);
5210 int nbbs = loop->num_nodes;
5211 int vec_cost = m_costs[vect_body], not_vec_cost = 0;
5212
5213 for (int i = 0; i < nbbs; i++)
5214 {
5215 basic_block bb = bbs[i];
5216 gimple_stmt_iterator gsi;
5217
5218 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5219 {
5220 gimple *stmt = gsi_stmt (gsi);
5221 if (is_gimple_debug (stmt))
5222 continue;
5223
5224 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
5225
5226 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5227 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
5228 not_vec_cost++;
5229 }
5230 }
5231
5232 free (bbs);
5233 int density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
5234
5235 if (density_pct > rs6000_density_pct_threshold
5236 && vec_cost + not_vec_cost > rs6000_density_size_threshold)
5237 {
5238 m_costs[vect_body] = vec_cost * (100 + rs6000_density_penalty) / 100;
5239 if (dump_enabled_p ())
5240 dump_printf_loc (MSG_NOTE, vect_location,
5241 "density %d%%, cost %d exceeds threshold, penalizing "
5242 "loop body cost by %u%%\n", density_pct,
5243 vec_cost + not_vec_cost, rs6000_density_penalty);
5244 }
5245
5246 /* Check whether we need to penalize the body cost to account
5247 for excess strided or elementwise loads. */
5248 if (m_extra_ctor_cost > 0)
5249 {
5250 gcc_assert (m_nloads <= m_nstmts);
5251 unsigned int load_pct = (m_nloads * 100) / m_nstmts;
5252
5253 /* It's likely to be bounded by latency and execution resources
5254 from many scalar loads which are strided or elementwise loads
5255 into a vector if both conditions below are found:
5256 1. there are many loads, it's easy to result in a long wait
5257 for load units;
5258 2. load has a big proportion of all vectorized statements,
5259 it's not easy to schedule other statements to spread among
5260 the loads.
5261 One typical case is the innermost loop of the hotspot of SPEC2017
5262 503.bwaves_r without loop interchange. */
5263 if (m_nloads > (unsigned int) rs6000_density_load_num_threshold
5264 && load_pct > (unsigned int) rs6000_density_load_pct_threshold)
5265 {
5266 m_costs[vect_body] += m_extra_ctor_cost;
5267 if (dump_enabled_p ())
5268 dump_printf_loc (MSG_NOTE, vect_location,
5269 "Found %u loads and "
5270 "load pct. %u%% exceed "
5271 "the threshold, "
5272 "penalizing loop body "
5273 "cost by extra cost %u "
5274 "for ctor.\n",
5275 m_nloads, load_pct,
5276 m_extra_ctor_cost);
5277 }
5278 }
5279 }
5280
5281 /* Implement targetm.vectorize.create_costs. */
5282
5283 static vector_costs *
5284 rs6000_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar)
5285 {
5286 return new rs6000_cost_data (vinfo, costing_for_scalar);
5287 }
5288
5289 /* Adjust vectorization cost after calling rs6000_builtin_vectorization_cost.
5290 For some statement, we would like to further fine-grain tweak the cost on
5291 top of rs6000_builtin_vectorization_cost handling which doesn't have any
5292 information on statement operation codes etc. One typical case here is
5293 COND_EXPR, it takes the same cost to simple FXU instruction when evaluating
5294 for scalar cost, but it should be priced more whatever transformed to either
5295 compare + branch or compare + isel instructions. */
5296
5297 static unsigned
5298 rs6000_adjust_vect_cost_per_stmt (enum vect_cost_for_stmt kind,
5299 struct _stmt_vec_info *stmt_info)
5300 {
5301 if (kind == scalar_stmt && stmt_info && stmt_info->stmt
5302 && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
5303 {
5304 tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
5305 if (subcode == COND_EXPR)
5306 return 2;
5307 }
5308
5309 return 0;
5310 }
5311
5312 /* Helper function for add_stmt_cost. Check each statement cost
5313 entry, gather information and update the target_cost fields
5314 accordingly. */
5315 void
5316 rs6000_cost_data::update_target_cost_per_stmt (vect_cost_for_stmt kind,
5317 stmt_vec_info stmt_info,
5318 vect_cost_model_location where,
5319 unsigned int orig_count)
5320 {
5321
5322 /* Check whether we're doing something other than just a copy loop.
5323 Not all such loops may be profitably vectorized; see
5324 rs6000_finish_cost. */
5325 if (kind == vec_to_scalar
5326 || kind == vec_perm
5327 || kind == vec_promote_demote
5328 || kind == vec_construct
5329 || kind == scalar_to_vec
5330 || (where == vect_body && kind == vector_stmt))
5331 m_vect_nonmem = true;
5332
5333 /* Gather some information when we are costing the vectorized instruction
5334 for the statements located in a loop body. */
5335 if (!m_costing_for_scalar
5336 && is_a<loop_vec_info> (m_vinfo)
5337 && where == vect_body)
5338 {
5339 m_nstmts += orig_count;
5340
5341 if (kind == scalar_load
5342 || kind == vector_load
5343 || kind == unaligned_load
5344 || kind == vector_gather_load)
5345 {
5346 m_nloads += orig_count;
5347 if (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
5348 m_gather_load = true;
5349 }
5350 else if (kind == scalar_store
5351 || kind == vector_store
5352 || kind == unaligned_store
5353 || kind == vector_scatter_store)
5354 m_nstores += orig_count;
5355 else if ((kind == scalar_stmt
5356 || kind == vector_stmt
5357 || kind == vec_to_scalar)
5358 && stmt_info
5359 && vect_is_reduction (stmt_info))
5360 {
5361 /* Loop body contains normal int or fp operations and epilogue
5362 contains vector reduction. For simplicity, we assume int
5363 operation takes one cycle and fp operation takes one more. */
5364 tree lhs = gimple_get_lhs (stmt_info->stmt);
5365 bool is_float = FLOAT_TYPE_P (TREE_TYPE (lhs));
5366 unsigned int basic_cost = is_float ? 2 : 1;
5367 m_reduc_factor = MAX (basic_cost * orig_count, m_reduc_factor);
5368 }
5369
5370 /* Power processors do not currently have instructions for strided
5371 and elementwise loads, and instead we must generate multiple
5372 scalar loads. This leads to undercounting of the cost. We
5373 account for this by scaling the construction cost by the number
5374 of elements involved, and saving this as extra cost that we may
5375 or may not need to apply. When finalizing the cost of the loop,
5376 the extra penalty is applied when the load density heuristics
5377 are satisfied. */
5378 if (kind == vec_construct && stmt_info
5379 && STMT_VINFO_TYPE (stmt_info) == load_vec_info_type
5380 && (STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_ELEMENTWISE
5381 || STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_STRIDED_SLP))
5382 {
5383 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5384 unsigned int nunits = vect_nunits_for_cost (vectype);
5385 /* As PR103702 shows, it's possible that vectorizer wants to do
5386 costings for only one unit here, it's no need to do any
5387 penalization for it, so simply early return here. */
5388 if (nunits == 1)
5389 return;
5390 /* i386 port adopts nunits * stmt_cost as the penalized cost
5391 for this kind of penalization, we used to follow it but
5392 found it could result in an unreliable body cost especially
5393 for V16QI/V8HI modes. To make it better, we choose this
5394 new heuristic: for each scalar load, we use 2 as penalized
5395 cost for the case with 2 nunits and use 1 for the other
5396 cases. It's without much supporting theory, mainly
5397 concluded from the broad performance evaluations on Power8,
5398 Power9 and Power10. One possibly related point is that:
5399 vector construction for more units would use more insns,
5400 it has more chances to schedule them better (even run in
5401 parallelly when enough available units at that time), so
5402 it seems reasonable not to penalize that much for them. */
5403 unsigned int adjusted_cost = (nunits == 2) ? 2 : 1;
5404 unsigned int extra_cost = nunits * adjusted_cost;
5405 m_extra_ctor_cost += extra_cost;
5406 }
5407 }
5408 }
5409
5410 unsigned
5411 rs6000_cost_data::add_stmt_cost (int count, vect_cost_for_stmt kind,
5412 stmt_vec_info stmt_info, slp_tree,
5413 tree vectype, int misalign,
5414 vect_cost_model_location where)
5415 {
5416 unsigned retval = 0;
5417
5418 if (flag_vect_cost_model)
5419 {
5420 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
5421 misalign);
5422 stmt_cost += rs6000_adjust_vect_cost_per_stmt (kind, stmt_info);
5423 /* Statements in an inner loop relative to the loop being
5424 vectorized are weighted more heavily. The value here is
5425 arbitrary and could potentially be improved with analysis. */
5426 unsigned int orig_count = count;
5427 retval = adjust_cost_for_freq (stmt_info, where, count * stmt_cost);
5428 m_costs[where] += retval;
5429
5430 update_target_cost_per_stmt (kind, stmt_info, where, orig_count);
5431 }
5432
5433 return retval;
5434 }
5435
5436 /* For some target specific vectorization cost which can't be handled per stmt,
5437 we check the requisite conditions and adjust the vectorization cost
5438 accordingly if satisfied. One typical example is to model shift cost for
5439 vector with length by counting number of required lengths under condition
5440 LOOP_VINFO_FULLY_WITH_LENGTH_P. */
5441
5442 void
5443 rs6000_cost_data::adjust_vect_cost_per_loop (loop_vec_info loop_vinfo)
5444 {
5445 if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
5446 {
5447 rgroup_controls *rgc;
5448 unsigned int num_vectors_m1;
5449 unsigned int shift_cnt = 0;
5450 FOR_EACH_VEC_ELT (LOOP_VINFO_LENS (loop_vinfo), num_vectors_m1, rgc)
5451 if (rgc->type)
5452 /* Each length needs one shift to fill into bits 0-7. */
5453 shift_cnt += num_vectors_m1 + 1;
5454
5455 add_stmt_cost (shift_cnt, scalar_stmt, NULL, NULL,
5456 NULL_TREE, 0, vect_body);
5457 }
5458 }
5459
5460 /* Determine suggested unroll factor by considering some below factors:
5461
5462 - unroll option/pragma which can disable unrolling for this loop;
5463 - simple hardware resource model for non memory vector insns;
5464 - aggressive heuristics when iteration count is unknown:
5465 - reduction case to break cross iteration dependency;
5466 - emulated gather load;
5467 - estimated iteration count when iteration count is unknown;
5468 */
5469
5470
5471 unsigned int
5472 rs6000_cost_data::determine_suggested_unroll_factor (loop_vec_info loop_vinfo)
5473 {
5474 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5475
5476 /* Don't unroll if it's specified explicitly not to be unrolled. */
5477 if (loop->unroll == 1
5478 || (OPTION_SET_P (flag_unroll_loops) && !flag_unroll_loops)
5479 || (OPTION_SET_P (flag_unroll_all_loops) && !flag_unroll_all_loops))
5480 return 1;
5481
5482 unsigned int nstmts_nonldst = m_nstmts - m_nloads - m_nstores;
5483 /* Don't unroll if no vector instructions excepting for memory access. */
5484 if (nstmts_nonldst == 0)
5485 return 1;
5486
5487 /* Consider breaking cross iteration dependency for reduction. */
5488 unsigned int reduc_factor = m_reduc_factor > 1 ? m_reduc_factor : 1;
5489
5490 /* Use this simple hardware resource model that how many non ld/st
5491 vector instructions can be issued per cycle. */
5492 unsigned int issue_width = rs6000_vect_unroll_issue;
5493 unsigned int uf = CEIL (reduc_factor * issue_width, nstmts_nonldst);
5494 uf = MIN ((unsigned int) rs6000_vect_unroll_limit, uf);
5495 /* Make sure it is power of 2. */
5496 uf = 1 << ceil_log2 (uf);
5497
5498 /* If the iteration count is known, the costing would be exact enough,
5499 don't worry it could be worse. */
5500 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5501 return uf;
5502
5503 /* Inspired by SPEC2017 parest_r, we want to aggressively unroll the
5504 loop if either condition is satisfied:
5505 - reduction factor exceeds the threshold;
5506 - emulated gather load adopted. */
5507 if (reduc_factor > (unsigned int) rs6000_vect_unroll_reduc_threshold
5508 || m_gather_load)
5509 return uf;
5510
5511 /* Check if we can conclude it's good to unroll from the estimated
5512 iteration count. */
5513 HOST_WIDE_INT est_niter = get_estimated_loop_iterations_int (loop);
5514 unsigned int vf = vect_vf_for_cost (loop_vinfo);
5515 unsigned int unrolled_vf = vf * uf;
5516 if (est_niter == -1 || est_niter < unrolled_vf)
5517 /* When the estimated iteration of this loop is unknown, it's possible
5518 that we are able to vectorize this loop with the original VF but fail
5519 to vectorize it with the unrolled VF any more if the actual iteration
5520 count is in between. */
5521 return 1;
5522 else
5523 {
5524 unsigned int epil_niter_unr = est_niter % unrolled_vf;
5525 unsigned int epil_niter = est_niter % vf;
5526 /* Even if we have partial vector support, it can be still inefficent
5527 to calculate the length when the iteration count is unknown, so
5528 only expect it's good to unroll when the epilogue iteration count
5529 is not bigger than VF (only one time length calculation). */
5530 if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
5531 && epil_niter_unr <= vf)
5532 return uf;
5533 /* Without partial vector support, conservatively unroll this when
5534 the epilogue iteration count is less than the original one
5535 (epilogue execution time wouldn't be longer than before). */
5536 else if (!LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
5537 && epil_niter_unr <= epil_niter)
5538 return uf;
5539 }
5540
5541 return 1;
5542 }
5543
5544 void
5545 rs6000_cost_data::finish_cost (const vector_costs *scalar_costs)
5546 {
5547 if (loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (m_vinfo))
5548 {
5549 adjust_vect_cost_per_loop (loop_vinfo);
5550 density_test (loop_vinfo);
5551
5552 /* Don't vectorize minimum-vectorization-factor, simple copy loops
5553 that require versioning for any reason. The vectorization is at
5554 best a wash inside the loop, and the versioning checks make
5555 profitability highly unlikely and potentially quite harmful. */
5556 if (!m_vect_nonmem
5557 && LOOP_VINFO_VECT_FACTOR (loop_vinfo) == 2
5558 && LOOP_REQUIRES_VERSIONING (loop_vinfo))
5559 m_costs[vect_body] += 10000;
5560
5561 m_suggested_unroll_factor
5562 = determine_suggested_unroll_factor (loop_vinfo);
5563 }
5564
5565 vector_costs::finish_cost (scalar_costs);
5566 }
5567
5568 /* Implement targetm.loop_unroll_adjust. */
5569
5570 static unsigned
5571 rs6000_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
5572 {
5573 if (unroll_only_small_loops)
5574 {
5575 /* TODO: These are hardcoded values right now. We probably should use
5576 a PARAM here. */
5577 if (loop->ninsns <= 6)
5578 return MIN (4, nunroll);
5579 if (loop->ninsns <= 10)
5580 return MIN (2, nunroll);
5581
5582 return 0;
5583 }
5584
5585 return nunroll;
5586 }
5587
5588 /* Returns a function decl for a vectorized version of the builtin function
5589 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5590 if it is not available.
5591
5592 Implement targetm.vectorize.builtin_vectorized_function. */
5593
5594 static tree
5595 rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
5596 tree type_in)
5597 {
5598 machine_mode in_mode, out_mode;
5599 int in_n, out_n;
5600
5601 if (TARGET_DEBUG_BUILTIN)
5602 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5603 combined_fn_name (combined_fn (fn)),
5604 GET_MODE_NAME (TYPE_MODE (type_out)),
5605 GET_MODE_NAME (TYPE_MODE (type_in)));
5606
5607 /* TODO: Should this be gcc_assert? */
5608 if (TREE_CODE (type_out) != VECTOR_TYPE
5609 || TREE_CODE (type_in) != VECTOR_TYPE)
5610 return NULL_TREE;
5611
5612 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5613 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5614 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5615 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5616
5617 switch (fn)
5618 {
5619 CASE_CFN_COPYSIGN:
5620 if (VECTOR_UNIT_VSX_P (V2DFmode)
5621 && out_mode == DFmode && out_n == 2
5622 && in_mode == DFmode && in_n == 2)
5623 return rs6000_builtin_decls[RS6000_BIF_CPSGNDP];
5624 if (VECTOR_UNIT_VSX_P (V4SFmode)
5625 && out_mode == SFmode && out_n == 4
5626 && in_mode == SFmode && in_n == 4)
5627 return rs6000_builtin_decls[RS6000_BIF_CPSGNSP];
5628 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5629 && out_mode == SFmode && out_n == 4
5630 && in_mode == SFmode && in_n == 4)
5631 return rs6000_builtin_decls[RS6000_BIF_COPYSIGN_V4SF];
5632 break;
5633 CASE_CFN_CEIL:
5634 if (VECTOR_UNIT_VSX_P (V2DFmode)
5635 && out_mode == DFmode && out_n == 2
5636 && in_mode == DFmode && in_n == 2)
5637 return rs6000_builtin_decls[RS6000_BIF_XVRDPIP];
5638 if (VECTOR_UNIT_VSX_P (V4SFmode)
5639 && out_mode == SFmode && out_n == 4
5640 && in_mode == SFmode && in_n == 4)
5641 return rs6000_builtin_decls[RS6000_BIF_XVRSPIP];
5642 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5643 && out_mode == SFmode && out_n == 4
5644 && in_mode == SFmode && in_n == 4)
5645 return rs6000_builtin_decls[RS6000_BIF_VRFIP];
5646 break;
5647 CASE_CFN_FLOOR:
5648 if (VECTOR_UNIT_VSX_P (V2DFmode)
5649 && out_mode == DFmode && out_n == 2
5650 && in_mode == DFmode && in_n == 2)
5651 return rs6000_builtin_decls[RS6000_BIF_XVRDPIM];
5652 if (VECTOR_UNIT_VSX_P (V4SFmode)
5653 && out_mode == SFmode && out_n == 4
5654 && in_mode == SFmode && in_n == 4)
5655 return rs6000_builtin_decls[RS6000_BIF_XVRSPIM];
5656 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5657 && out_mode == SFmode && out_n == 4
5658 && in_mode == SFmode && in_n == 4)
5659 return rs6000_builtin_decls[RS6000_BIF_VRFIM];
5660 break;
5661 CASE_CFN_FMA:
5662 if (VECTOR_UNIT_VSX_P (V2DFmode)
5663 && out_mode == DFmode && out_n == 2
5664 && in_mode == DFmode && in_n == 2)
5665 return rs6000_builtin_decls[RS6000_BIF_XVMADDDP];
5666 if (VECTOR_UNIT_VSX_P (V4SFmode)
5667 && out_mode == SFmode && out_n == 4
5668 && in_mode == SFmode && in_n == 4)
5669 return rs6000_builtin_decls[RS6000_BIF_XVMADDSP];
5670 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5671 && out_mode == SFmode && out_n == 4
5672 && in_mode == SFmode && in_n == 4)
5673 return rs6000_builtin_decls[RS6000_BIF_VMADDFP];
5674 break;
5675 CASE_CFN_TRUNC:
5676 if (VECTOR_UNIT_VSX_P (V2DFmode)
5677 && out_mode == DFmode && out_n == 2
5678 && in_mode == DFmode && in_n == 2)
5679 return rs6000_builtin_decls[RS6000_BIF_XVRDPIZ];
5680 if (VECTOR_UNIT_VSX_P (V4SFmode)
5681 && out_mode == SFmode && out_n == 4
5682 && in_mode == SFmode && in_n == 4)
5683 return rs6000_builtin_decls[RS6000_BIF_XVRSPIZ];
5684 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5685 && out_mode == SFmode && out_n == 4
5686 && in_mode == SFmode && in_n == 4)
5687 return rs6000_builtin_decls[RS6000_BIF_VRFIZ];
5688 break;
5689 CASE_CFN_NEARBYINT:
5690 if (VECTOR_UNIT_VSX_P (V2DFmode)
5691 && flag_unsafe_math_optimizations
5692 && out_mode == DFmode && out_n == 2
5693 && in_mode == DFmode && in_n == 2)
5694 return rs6000_builtin_decls[RS6000_BIF_XVRDPI];
5695 if (VECTOR_UNIT_VSX_P (V4SFmode)
5696 && flag_unsafe_math_optimizations
5697 && out_mode == SFmode && out_n == 4
5698 && in_mode == SFmode && in_n == 4)
5699 return rs6000_builtin_decls[RS6000_BIF_XVRSPI];
5700 break;
5701 CASE_CFN_RINT:
5702 if (VECTOR_UNIT_VSX_P (V2DFmode)
5703 && !flag_trapping_math
5704 && out_mode == DFmode && out_n == 2
5705 && in_mode == DFmode && in_n == 2)
5706 return rs6000_builtin_decls[RS6000_BIF_XVRDPIC];
5707 if (VECTOR_UNIT_VSX_P (V4SFmode)
5708 && !flag_trapping_math
5709 && out_mode == SFmode && out_n == 4
5710 && in_mode == SFmode && in_n == 4)
5711 return rs6000_builtin_decls[RS6000_BIF_XVRSPIC];
5712 break;
5713 default:
5714 break;
5715 }
5716
5717 /* Generate calls to libmass if appropriate. */
5718 if (rs6000_veclib_handler)
5719 return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
5720
5721 return NULL_TREE;
5722 }
5723
5724 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5725 library with vectorized intrinsics. */
5726
5727 static tree
5728 rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out,
5729 tree type_in)
5730 {
5731 char name[32];
5732 const char *suffix = NULL;
5733 tree fntype, new_fndecl, bdecl = NULL_TREE;
5734 int n_args = 1;
5735 const char *bname;
5736 machine_mode el_mode, in_mode;
5737 int n, in_n;
5738
5739 /* Libmass is suitable for unsafe math only as it does not correctly support
5740 parts of IEEE with the required precision such as denormals. Only support
5741 it if we have VSX to use the simd d2 or f4 functions.
5742 XXX: Add variable length support. */
5743 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
5744 return NULL_TREE;
5745
5746 el_mode = TYPE_MODE (TREE_TYPE (type_out));
5747 n = TYPE_VECTOR_SUBPARTS (type_out);
5748 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5749 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5750 if (el_mode != in_mode
5751 || n != in_n)
5752 return NULL_TREE;
5753
5754 switch (fn)
5755 {
5756 CASE_CFN_ATAN2:
5757 CASE_CFN_HYPOT:
5758 CASE_CFN_POW:
5759 n_args = 2;
5760 gcc_fallthrough ();
5761
5762 CASE_CFN_ACOS:
5763 CASE_CFN_ACOSH:
5764 CASE_CFN_ASIN:
5765 CASE_CFN_ASINH:
5766 CASE_CFN_ATAN:
5767 CASE_CFN_ATANH:
5768 CASE_CFN_CBRT:
5769 CASE_CFN_COS:
5770 CASE_CFN_COSH:
5771 CASE_CFN_ERF:
5772 CASE_CFN_ERFC:
5773 CASE_CFN_EXP2:
5774 CASE_CFN_EXP:
5775 CASE_CFN_EXPM1:
5776 CASE_CFN_LGAMMA:
5777 CASE_CFN_LOG10:
5778 CASE_CFN_LOG1P:
5779 CASE_CFN_LOG2:
5780 CASE_CFN_LOG:
5781 CASE_CFN_SIN:
5782 CASE_CFN_SINH:
5783 CASE_CFN_SQRT:
5784 CASE_CFN_TAN:
5785 CASE_CFN_TANH:
5786 if (el_mode == DFmode && n == 2)
5787 {
5788 bdecl = mathfn_built_in (double_type_node, fn);
5789 suffix = "d2"; /* pow -> powd2 */
5790 }
5791 else if (el_mode == SFmode && n == 4)
5792 {
5793 bdecl = mathfn_built_in (float_type_node, fn);
5794 suffix = "4"; /* powf -> powf4 */
5795 }
5796 else
5797 return NULL_TREE;
5798 if (!bdecl)
5799 return NULL_TREE;
5800 break;
5801
5802 default:
5803 return NULL_TREE;
5804 }
5805
5806 gcc_assert (suffix != NULL);
5807 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
5808 if (!bname)
5809 return NULL_TREE;
5810
5811 strcpy (name, bname + strlen ("__builtin_"));
5812 strcat (name, suffix);
5813
5814 if (n_args == 1)
5815 fntype = build_function_type_list (type_out, type_in, NULL);
5816 else if (n_args == 2)
5817 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
5818 else
5819 gcc_unreachable ();
5820
5821 /* Build a function declaration for the vectorized function. */
5822 new_fndecl = build_decl (BUILTINS_LOCATION,
5823 FUNCTION_DECL, get_identifier (name), fntype);
5824 TREE_PUBLIC (new_fndecl) = 1;
5825 DECL_EXTERNAL (new_fndecl) = 1;
5826 DECL_IS_NOVOPS (new_fndecl) = 1;
5827 TREE_READONLY (new_fndecl) = 1;
5828
5829 return new_fndecl;
5830 }
5831
5832 \f
5833 /* Default CPU string for rs6000*_file_start functions. */
5834 static const char *rs6000_default_cpu;
5835
5836 #ifdef USING_ELFOS_H
5837 const char *rs6000_machine;
5838
5839 const char *
5840 rs6000_machine_from_flags (void)
5841 {
5842 /* e300 and e500 */
5843 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3)
5844 return "e300";
5845 if (rs6000_cpu == PROCESSOR_PPC8540 || rs6000_cpu == PROCESSOR_PPC8548)
5846 return "e500";
5847 if (rs6000_cpu == PROCESSOR_PPCE500MC)
5848 return "e500mc";
5849 if (rs6000_cpu == PROCESSOR_PPCE500MC64)
5850 return "e500mc64";
5851 if (rs6000_cpu == PROCESSOR_PPCE5500)
5852 return "e5500";
5853 if (rs6000_cpu == PROCESSOR_PPCE6500)
5854 return "e6500";
5855
5856 /* 400 series */
5857 if (rs6000_cpu == PROCESSOR_PPC403)
5858 return "\"403\"";
5859 if (rs6000_cpu == PROCESSOR_PPC405)
5860 return "\"405\"";
5861 if (rs6000_cpu == PROCESSOR_PPC440)
5862 return "\"440\"";
5863 if (rs6000_cpu == PROCESSOR_PPC476)
5864 return "\"476\"";
5865
5866 /* A2 */
5867 if (rs6000_cpu == PROCESSOR_PPCA2)
5868 return "a2";
5869
5870 /* Cell BE */
5871 if (rs6000_cpu == PROCESSOR_CELL)
5872 return "cell";
5873
5874 /* Titan */
5875 if (rs6000_cpu == PROCESSOR_TITAN)
5876 return "titan";
5877
5878 /* 500 series and 800 series */
5879 if (rs6000_cpu == PROCESSOR_MPCCORE)
5880 return "\"821\"";
5881
5882 #if 0
5883 /* This (and ppc64 below) are disabled here (for now at least) because
5884 PROCESSOR_POWERPC, PROCESSOR_POWERPC64, and PROCESSOR_COMMON
5885 are #define'd as some of these. Untangling that is a job for later. */
5886
5887 /* 600 series and 700 series, "classic" */
5888 if (rs6000_cpu == PROCESSOR_PPC601 || rs6000_cpu == PROCESSOR_PPC603
5889 || rs6000_cpu == PROCESSOR_PPC604 || rs6000_cpu == PROCESSOR_PPC604e
5890 || rs6000_cpu == PROCESSOR_PPC750)
5891 return "ppc";
5892 #endif
5893
5894 /* Classic with AltiVec, "G4" */
5895 if (rs6000_cpu == PROCESSOR_PPC7400 || rs6000_cpu == PROCESSOR_PPC7450)
5896 return "\"7450\"";
5897
5898 #if 0
5899 /* The older 64-bit CPUs */
5900 if (rs6000_cpu == PROCESSOR_PPC620 || rs6000_cpu == PROCESSOR_PPC630
5901 || rs6000_cpu == PROCESSOR_RS64A)
5902 return "ppc64";
5903 #endif
5904
5905 HOST_WIDE_INT flags = rs6000_isa_flags;
5906
5907 /* Disable the flags that should never influence the .machine selection. */
5908 flags &= ~(OPTION_MASK_PPC_GFXOPT | OPTION_MASK_PPC_GPOPT | OPTION_MASK_ISEL
5909 | OPTION_MASK_ALTIVEC);
5910
5911 if ((flags & (POWER11_MASKS_SERVER & ~ISA_3_1_MASKS_SERVER)) != 0)
5912 return "power11";
5913 if ((flags & (ISA_3_1_MASKS_SERVER & ~ISA_3_0_MASKS_SERVER)) != 0)
5914 return "power10";
5915 if ((flags & (ISA_3_0_MASKS_SERVER & ~ISA_2_7_MASKS_SERVER)) != 0)
5916 return "power9";
5917 if ((flags & (ISA_2_7_MASKS_SERVER & ~ISA_2_6_MASKS_SERVER)) != 0)
5918 return "power8";
5919 if ((flags & (ISA_2_6_MASKS_SERVER & ~ISA_2_5_MASKS_SERVER)) != 0)
5920 return "power7";
5921 if ((flags & (ISA_2_5_MASKS_SERVER & ~ISA_2_4_MASKS)) != 0)
5922 return "power6";
5923 if ((flags & (ISA_2_4_MASKS & ~ISA_2_1_MASKS)) != 0)
5924 return "power5";
5925 if ((flags & ISA_2_1_MASKS) != 0)
5926 return "power4";
5927 if ((flags & OPTION_MASK_POWERPC64) != 0)
5928 return "ppc64";
5929 return "ppc";
5930 }
5931
5932 void
5933 emit_asm_machine (void)
5934 {
5935 fprintf (asm_out_file, "\t.machine %s\n", rs6000_machine);
5936 if (TARGET_ALTIVEC)
5937 fprintf (asm_out_file, "\t.machine altivec\n");
5938 }
5939 #endif
5940
5941 /* Do anything needed at the start of the asm file. */
5942
5943 static void
5944 rs6000_file_start (void)
5945 {
5946 char buffer[80];
5947 const char *start = buffer;
5948 FILE *file = asm_out_file;
5949
5950 rs6000_default_cpu = TARGET_CPU_DEFAULT;
5951
5952 default_file_start ();
5953
5954 if (flag_verbose_asm)
5955 {
5956 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
5957
5958 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
5959 {
5960 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
5961 start = "";
5962 }
5963
5964 if (OPTION_SET_P (rs6000_cpu_index))
5965 {
5966 fprintf (file, "%s -mcpu=%s", start,
5967 processor_target_table[rs6000_cpu_index].name);
5968 start = "";
5969 }
5970
5971 if (OPTION_SET_P (rs6000_tune_index))
5972 {
5973 fprintf (file, "%s -mtune=%s", start,
5974 processor_target_table[rs6000_tune_index].name);
5975 start = "";
5976 }
5977
5978 if (PPC405_ERRATUM77)
5979 {
5980 fprintf (file, "%s PPC405CR_ERRATUM77", start);
5981 start = "";
5982 }
5983
5984 #ifdef USING_ELFOS_H
5985 switch (rs6000_sdata)
5986 {
5987 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
5988 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
5989 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
5990 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
5991 }
5992
5993 if (rs6000_sdata && g_switch_value)
5994 {
5995 fprintf (file, "%s -G %d", start,
5996 g_switch_value);
5997 start = "";
5998 }
5999 #endif
6000
6001 if (*start == '\0')
6002 putc ('\n', file);
6003 }
6004
6005 #ifdef USING_ELFOS_H
6006 rs6000_machine = rs6000_machine_from_flags ();
6007 emit_asm_machine ();
6008 #endif
6009
6010 if (DEFAULT_ABI == ABI_ELFv2)
6011 fprintf (file, "\t.abiversion 2\n");
6012 }
6013
6014 \f
6015 /* Return nonzero if this function is known to have a null epilogue. */
6016
6017 int
6018 direct_return (void)
6019 {
6020 if (reload_completed)
6021 {
6022 rs6000_stack_t *info = rs6000_stack_info ();
6023
6024 if (info->first_gp_reg_save == 32
6025 && info->first_fp_reg_save == 64
6026 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
6027 && ! info->lr_save_p
6028 && ! info->cr_save_p
6029 && info->vrsave_size == 0
6030 && ! info->push_p)
6031 return 1;
6032 }
6033
6034 return 0;
6035 }
6036
6037 /* Helper for num_insns_constant. Calculate number of instructions to
6038 load VALUE to a single gpr using combinations of addi, addis, ori,
6039 oris, sldi and rldimi instructions. */
6040
6041 static int
6042 num_insns_constant_gpr (HOST_WIDE_INT value)
6043 {
6044 /* signed constant loadable with addi */
6045 if (SIGNED_INTEGER_16BIT_P (value))
6046 return 1;
6047
6048 /* constant loadable with addis */
6049 else if ((value & 0xffff) == 0
6050 && (value >> 31 == -1 || value >> 31 == 0))
6051 return 1;
6052
6053 /* PADDI can support up to 34 bit signed integers. */
6054 else if (TARGET_PREFIXED && SIGNED_INTEGER_34BIT_P (value))
6055 return 1;
6056
6057 else if (TARGET_POWERPC64)
6058 {
6059 int num_insns = 0;
6060 rs6000_emit_set_long_const (nullptr, value, &num_insns);
6061 return num_insns;
6062 }
6063
6064 else
6065 return 2;
6066 }
6067
6068 /* Helper for num_insns_constant. Allow constants formed by the
6069 num_insns_constant_gpr sequences, plus li -1, rldicl/rldicr/rlwinm,
6070 and handle modes that require multiple gprs. */
6071
6072 static int
6073 num_insns_constant_multi (HOST_WIDE_INT value, machine_mode mode)
6074 {
6075 int nregs = (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6076 int total = 0;
6077 while (nregs-- > 0)
6078 {
6079 HOST_WIDE_INT low = sext_hwi (value, BITS_PER_WORD);
6080 int insns = num_insns_constant_gpr (low);
6081 if (insns > 2
6082 /* We won't get more than 2 from num_insns_constant_gpr
6083 except when TARGET_POWERPC64 and mode is DImode or
6084 wider, so the register mode must be DImode. */
6085 && rs6000_is_valid_and_mask (GEN_INT (low), DImode))
6086 insns = 2;
6087 total += insns;
6088 /* If BITS_PER_WORD is the number of bits in HOST_WIDE_INT, doing
6089 it all at once would be UB. */
6090 value >>= (BITS_PER_WORD - 1);
6091 value >>= 1;
6092 }
6093 return total;
6094 }
6095
6096 /* Return the number of instructions it takes to form a constant in as
6097 many gprs are needed for MODE. */
6098
6099 int
6100 num_insns_constant (rtx op, machine_mode mode)
6101 {
6102 HOST_WIDE_INT val;
6103
6104 switch (GET_CODE (op))
6105 {
6106 case CONST_INT:
6107 val = INTVAL (op);
6108 break;
6109
6110 case CONST_WIDE_INT:
6111 {
6112 int insns = 0;
6113 for (int i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
6114 insns += num_insns_constant_multi (CONST_WIDE_INT_ELT (op, i),
6115 DImode);
6116 return insns;
6117 }
6118
6119 case CONST_DOUBLE:
6120 {
6121 const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (op);
6122
6123 if (mode == SFmode || mode == SDmode)
6124 {
6125 long l;
6126
6127 if (mode == SDmode)
6128 REAL_VALUE_TO_TARGET_DECIMAL32 (*rv, l);
6129 else
6130 REAL_VALUE_TO_TARGET_SINGLE (*rv, l);
6131 /* See the first define_split in rs6000.md handling a
6132 const_double_operand. */
6133 val = l;
6134 mode = SImode;
6135 }
6136 else if (mode == DFmode || mode == DDmode)
6137 {
6138 long l[2];
6139
6140 if (mode == DDmode)
6141 REAL_VALUE_TO_TARGET_DECIMAL64 (*rv, l);
6142 else
6143 REAL_VALUE_TO_TARGET_DOUBLE (*rv, l);
6144
6145 /* See the second (32-bit) and third (64-bit) define_split
6146 in rs6000.md handling a const_double_operand. */
6147 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 1] << 32;
6148 val |= l[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffffUL;
6149 mode = DImode;
6150 }
6151 else if (mode == TFmode || mode == TDmode
6152 || mode == KFmode || mode == IFmode)
6153 {
6154 long l[4];
6155 int insns;
6156
6157 if (mode == TDmode)
6158 REAL_VALUE_TO_TARGET_DECIMAL128 (*rv, l);
6159 else
6160 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*rv, l);
6161
6162 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 3] << 32;
6163 val |= l[WORDS_BIG_ENDIAN ? 1 : 2] & 0xffffffffUL;
6164 insns = num_insns_constant_multi (val, DImode);
6165 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 2 : 1] << 32;
6166 val |= l[WORDS_BIG_ENDIAN ? 3 : 0] & 0xffffffffUL;
6167 insns += num_insns_constant_multi (val, DImode);
6168 return insns;
6169 }
6170 else
6171 gcc_unreachable ();
6172 }
6173 break;
6174
6175 default:
6176 gcc_unreachable ();
6177 }
6178
6179 return num_insns_constant_multi (val, mode);
6180 }
6181
6182 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
6183 If the mode of OP is MODE_VECTOR_INT, this simply returns the
6184 corresponding element of the vector, but for V4SFmode, the
6185 corresponding "float" is interpreted as an SImode integer. */
6186
6187 HOST_WIDE_INT
6188 const_vector_elt_as_int (rtx op, unsigned int elt)
6189 {
6190 rtx tmp;
6191
6192 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
6193 gcc_assert (GET_MODE (op) != V2DImode
6194 && GET_MODE (op) != V2DFmode);
6195
6196 tmp = CONST_VECTOR_ELT (op, elt);
6197 if (GET_MODE (op) == V4SFmode)
6198 tmp = gen_lowpart (SImode, tmp);
6199 return INTVAL (tmp);
6200 }
6201
6202 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
6203 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
6204 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
6205 all items are set to the same value and contain COPIES replicas of the
6206 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
6207 operand and the others are set to the value of the operand's msb. */
6208
6209 static bool
6210 vspltis_constant (rtx op, unsigned step, unsigned copies)
6211 {
6212 machine_mode mode = GET_MODE (op);
6213 machine_mode inner = GET_MODE_INNER (mode);
6214
6215 unsigned i;
6216 unsigned nunits;
6217 unsigned bitsize;
6218 unsigned mask;
6219
6220 HOST_WIDE_INT val;
6221 HOST_WIDE_INT splat_val;
6222 HOST_WIDE_INT msb_val;
6223
6224 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
6225 return false;
6226
6227 nunits = GET_MODE_NUNITS (mode);
6228 bitsize = GET_MODE_BITSIZE (inner);
6229 mask = GET_MODE_MASK (inner);
6230
6231 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6232 splat_val = val;
6233 msb_val = val >= 0 ? 0 : -1;
6234
6235 if (val == 0 && step > 1)
6236 {
6237 /* Special case for loading most significant bit with step > 1.
6238 In that case, match 0s in all but step-1s elements, where match
6239 EASY_VECTOR_MSB. */
6240 for (i = 1; i < nunits; ++i)
6241 {
6242 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
6243 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
6244 if ((i & (step - 1)) == step - 1)
6245 {
6246 if (!EASY_VECTOR_MSB (elt_val, inner))
6247 break;
6248 }
6249 else if (elt_val)
6250 break;
6251 }
6252 if (i == nunits)
6253 return true;
6254 }
6255
6256 /* Construct the value to be splatted, if possible. If not, return 0. */
6257 for (i = 2; i <= copies; i *= 2)
6258 {
6259 HOST_WIDE_INT small_val;
6260 bitsize /= 2;
6261 small_val = splat_val >> bitsize;
6262 mask >>= bitsize;
6263 if (splat_val != ((HOST_WIDE_INT)
6264 ((unsigned HOST_WIDE_INT) small_val << bitsize)
6265 | (small_val & mask)))
6266 return false;
6267 splat_val = small_val;
6268 inner = smallest_int_mode_for_size (bitsize);
6269 }
6270
6271 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
6272 if (EASY_VECTOR_15 (splat_val))
6273 ;
6274
6275 /* Also check if we can splat, and then add the result to itself. Do so if
6276 the value is positive, of if the splat instruction is using OP's mode;
6277 for splat_val < 0, the splat and the add should use the same mode. */
6278 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
6279 && (splat_val >= 0 || (step == 1 && copies == 1)))
6280 ;
6281
6282 /* Also check if are loading up the most significant bit which can be done by
6283 loading up -1 and shifting the value left by -1. Only do this for
6284 step 1 here, for larger steps it is done earlier. */
6285 else if (EASY_VECTOR_MSB (splat_val, inner) && step == 1)
6286 ;
6287
6288 else
6289 return false;
6290
6291 /* Check if VAL is present in every STEP-th element, and the
6292 other elements are filled with its most significant bit. */
6293 for (i = 1; i < nunits; ++i)
6294 {
6295 HOST_WIDE_INT desired_val;
6296 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
6297 if ((i & (step - 1)) == 0)
6298 desired_val = val;
6299 else
6300 desired_val = msb_val;
6301
6302 if (desired_val != const_vector_elt_as_int (op, elt))
6303 return false;
6304 }
6305
6306 return true;
6307 }
6308
6309 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
6310 instruction, filling in the bottom elements with 0 or -1.
6311
6312 Return 0 if the constant cannot be generated with VSLDOI. Return positive
6313 for the number of zeroes to shift in, or negative for the number of 0xff
6314 bytes to shift in.
6315
6316 OP is a CONST_VECTOR. */
6317
6318 int
6319 vspltis_shifted (rtx op)
6320 {
6321 machine_mode mode = GET_MODE (op);
6322 machine_mode inner = GET_MODE_INNER (mode);
6323
6324 unsigned i, j;
6325 unsigned nunits;
6326 unsigned mask;
6327
6328 HOST_WIDE_INT val;
6329
6330 if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
6331 return false;
6332
6333 /* We need to create pseudo registers to do the shift, so don't recognize
6334 shift vector constants after reload. Don't match it even before RA
6335 after split1 is done, because there won't be further splitting pass
6336 before RA to do the splitting. */
6337 if (!can_create_pseudo_p ()
6338 || (cfun->curr_properties & PROP_rtl_split_insns))
6339 return false;
6340
6341 nunits = GET_MODE_NUNITS (mode);
6342 mask = GET_MODE_MASK (inner);
6343
6344 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
6345
6346 /* Check if the value can really be the operand of a vspltis[bhw]. */
6347 if (EASY_VECTOR_15 (val))
6348 ;
6349
6350 /* Also check if we are loading up the most significant bit which can be done
6351 by loading up -1 and shifting the value left by -1. */
6352 else if (EASY_VECTOR_MSB (val, inner))
6353 ;
6354
6355 else
6356 return 0;
6357
6358 /* Check if VAL is present in every STEP-th element until we find elements
6359 that are 0 or all 1 bits. */
6360 for (i = 1; i < nunits; ++i)
6361 {
6362 unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
6363 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
6364
6365 /* If the value isn't the splat value, check for the remaining elements
6366 being 0/-1. */
6367 if (val != elt_val)
6368 {
6369 if (elt_val == 0)
6370 {
6371 for (j = i+1; j < nunits; ++j)
6372 {
6373 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6374 if (const_vector_elt_as_int (op, elt2) != 0)
6375 return 0;
6376 }
6377
6378 return (nunits - i) * GET_MODE_SIZE (inner);
6379 }
6380
6381 else if ((elt_val & mask) == mask)
6382 {
6383 for (j = i+1; j < nunits; ++j)
6384 {
6385 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6386 if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
6387 return 0;
6388 }
6389
6390 return -((nunits - i) * GET_MODE_SIZE (inner));
6391 }
6392
6393 else
6394 return 0;
6395 }
6396 }
6397
6398 /* If all elements are equal, we don't need to do VSLDOI. */
6399 return 0;
6400 }
6401
6402
6403 /* Return non-zero (element mode byte size) if OP is of the given MODE
6404 and can be synthesized with a vspltisb, vspltish or vspltisw. */
6405
6406 int
6407 easy_altivec_constant (rtx op, machine_mode mode)
6408 {
6409 unsigned step, copies;
6410
6411 if (mode == VOIDmode)
6412 mode = GET_MODE (op);
6413 else if (mode != GET_MODE (op))
6414 return 0;
6415
6416 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
6417 constants. */
6418 if (mode == V2DFmode)
6419 return zero_constant (op, mode) ? 8 : 0;
6420
6421 else if (mode == V2DImode)
6422 {
6423 if (!CONST_INT_P (CONST_VECTOR_ELT (op, 0))
6424 || !CONST_INT_P (CONST_VECTOR_ELT (op, 1)))
6425 return 0;
6426
6427 if (zero_constant (op, mode))
6428 return 8;
6429
6430 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
6431 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
6432 return 8;
6433
6434 return 0;
6435 }
6436
6437 /* V1TImode is a special container for TImode. Ignore for now. */
6438 else if (mode == V1TImode)
6439 return 0;
6440
6441 /* Start with a vspltisw. */
6442 step = GET_MODE_NUNITS (mode) / 4;
6443 copies = 1;
6444
6445 if (vspltis_constant (op, step, copies))
6446 return 4;
6447
6448 /* Then try with a vspltish. */
6449 if (step == 1)
6450 copies <<= 1;
6451 else
6452 step >>= 1;
6453
6454 if (vspltis_constant (op, step, copies))
6455 return 2;
6456
6457 /* And finally a vspltisb. */
6458 if (step == 1)
6459 copies <<= 1;
6460 else
6461 step >>= 1;
6462
6463 if (vspltis_constant (op, step, copies))
6464 return 1;
6465
6466 if (vspltis_shifted (op) != 0)
6467 return GET_MODE_SIZE (GET_MODE_INNER (mode));
6468
6469 return 0;
6470 }
6471
6472 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6473 result is OP. Abort if it is not possible. */
6474
6475 rtx
6476 gen_easy_altivec_constant (rtx op)
6477 {
6478 machine_mode mode = GET_MODE (op);
6479 int nunits = GET_MODE_NUNITS (mode);
6480 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6481 unsigned step = nunits / 4;
6482 unsigned copies = 1;
6483
6484 /* Start with a vspltisw. */
6485 if (vspltis_constant (op, step, copies))
6486 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
6487
6488 /* Then try with a vspltish. */
6489 if (step == 1)
6490 copies <<= 1;
6491 else
6492 step >>= 1;
6493
6494 if (vspltis_constant (op, step, copies))
6495 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
6496
6497 /* And finally a vspltisb. */
6498 if (step == 1)
6499 copies <<= 1;
6500 else
6501 step >>= 1;
6502
6503 if (vspltis_constant (op, step, copies))
6504 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
6505
6506 gcc_unreachable ();
6507 }
6508
6509 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6510 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6511
6512 Return the number of instructions needed (1 or 2) into the address pointed
6513 via NUM_INSNS_PTR.
6514
6515 Return the constant that is being split via CONSTANT_PTR. */
6516
6517 bool
6518 xxspltib_constant_p (rtx op,
6519 machine_mode mode,
6520 int *num_insns_ptr,
6521 int *constant_ptr)
6522 {
6523 size_t nunits = GET_MODE_NUNITS (mode);
6524 size_t i;
6525 HOST_WIDE_INT value;
6526 rtx element;
6527
6528 /* Set the returned values to out of bound values. */
6529 *num_insns_ptr = -1;
6530 *constant_ptr = 256;
6531
6532 if (!TARGET_P9_VECTOR)
6533 return false;
6534
6535 if (mode == VOIDmode)
6536 mode = GET_MODE (op);
6537
6538 else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode)
6539 return false;
6540
6541 /* Handle (vec_duplicate <constant>). */
6542 if (GET_CODE (op) == VEC_DUPLICATE)
6543 {
6544 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6545 && mode != V2DImode)
6546 return false;
6547
6548 element = XEXP (op, 0);
6549 if (!CONST_INT_P (element))
6550 return false;
6551
6552 value = INTVAL (element);
6553 if (!IN_RANGE (value, -128, 127))
6554 return false;
6555 }
6556
6557 /* Handle (const_vector [...]). */
6558 else if (GET_CODE (op) == CONST_VECTOR)
6559 {
6560 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6561 && mode != V2DImode)
6562 return false;
6563
6564 element = CONST_VECTOR_ELT (op, 0);
6565 if (!CONST_INT_P (element))
6566 return false;
6567
6568 value = INTVAL (element);
6569 if (!IN_RANGE (value, -128, 127))
6570 return false;
6571
6572 for (i = 1; i < nunits; i++)
6573 {
6574 element = CONST_VECTOR_ELT (op, i);
6575 if (!CONST_INT_P (element))
6576 return false;
6577
6578 if (value != INTVAL (element))
6579 return false;
6580 }
6581 }
6582
6583 /* Handle integer constants being loaded into the upper part of the VSX
6584 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6585 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6586 else if (CONST_INT_P (op))
6587 {
6588 if (!SCALAR_INT_MODE_P (mode))
6589 return false;
6590
6591 value = INTVAL (op);
6592 if (!IN_RANGE (value, -128, 127))
6593 return false;
6594
6595 if (!IN_RANGE (value, -1, 0))
6596 {
6597 if (!(reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID))
6598 return false;
6599
6600 if (EASY_VECTOR_15 (value))
6601 return false;
6602 }
6603 }
6604
6605 else
6606 return false;
6607
6608 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6609 sign extend. Special case 0/-1 to allow getting any VSX register instead
6610 of an Altivec register. */
6611 if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0)
6612 && EASY_VECTOR_15 (value))
6613 return false;
6614
6615 /* Return # of instructions and the constant byte for XXSPLTIB. */
6616 if (mode == V16QImode)
6617 *num_insns_ptr = 1;
6618
6619 else if (IN_RANGE (value, -1, 0))
6620 *num_insns_ptr = 1;
6621
6622 /* Do not generate XXSPLTIB and a sign extend operation if we can generate a
6623 single XXSPLTIW or XXSPLTIDP instruction. */
6624 else if (vsx_prefixed_constant (op, mode))
6625 return false;
6626
6627 /* Return XXSPLITB followed by a sign extend operation to convert the
6628 constant to V8HImode or V4SImode. */
6629 else
6630 *num_insns_ptr = 2;
6631
6632 *constant_ptr = (int) value;
6633 return true;
6634 }
6635
6636 /* Return true if OP mode is V2DI and can be synthesized with ISA 2.07
6637 instructions vupkhsw and vspltisw.
6638
6639 Return the constant that is being split via CONSTANT_PTR. */
6640
6641 bool
6642 vspltisw_vupkhsw_constant_p (rtx op, machine_mode mode, int *constant_ptr)
6643 {
6644 HOST_WIDE_INT value;
6645 rtx elt;
6646
6647 if (!TARGET_P8_VECTOR)
6648 return false;
6649
6650 if (mode != V2DImode)
6651 return false;
6652
6653 if (!const_vec_duplicate_p (op, &elt))
6654 return false;
6655
6656 value = INTVAL (elt);
6657 if (value == 0 || value == 1
6658 || !EASY_VECTOR_15 (value))
6659 return false;
6660
6661 if (constant_ptr)
6662 *constant_ptr = (int) value;
6663 return true;
6664 }
6665
6666 const char *
6667 output_vec_const_move (rtx *operands)
6668 {
6669 int shift;
6670 machine_mode mode;
6671 rtx dest, vec;
6672
6673 dest = operands[0];
6674 vec = operands[1];
6675 mode = GET_MODE (dest);
6676
6677 if (TARGET_VSX)
6678 {
6679 bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest));
6680 int xxspltib_value = 256;
6681 int num_insns = -1;
6682
6683 if (zero_constant (vec, mode))
6684 {
6685 if (TARGET_P9_VECTOR)
6686 return "xxspltib %x0,0";
6687
6688 else if (dest_vmx_p)
6689 return "vspltisw %0,0";
6690
6691 else
6692 return "xxlxor %x0,%x0,%x0";
6693 }
6694
6695 if (all_ones_constant (vec, mode))
6696 {
6697 if (TARGET_P9_VECTOR)
6698 return "xxspltib %x0,255";
6699
6700 else if (dest_vmx_p)
6701 return "vspltisw %0,-1";
6702
6703 else if (TARGET_P8_VECTOR)
6704 return "xxlorc %x0,%x0,%x0";
6705
6706 else
6707 gcc_unreachable ();
6708 }
6709
6710 vec_const_128bit_type vsx_const;
6711 if (TARGET_POWER10 && vec_const_128bit_to_bytes (vec, mode, &vsx_const))
6712 {
6713 unsigned imm = constant_generates_lxvkq (&vsx_const);
6714 if (imm)
6715 {
6716 operands[2] = GEN_INT (imm);
6717 return "lxvkq %x0,%2";
6718 }
6719
6720 imm = constant_generates_xxspltiw (&vsx_const);
6721 if (imm)
6722 {
6723 operands[2] = GEN_INT (imm);
6724 return "xxspltiw %x0,%2";
6725 }
6726
6727 imm = constant_generates_xxspltidp (&vsx_const);
6728 if (imm)
6729 {
6730 operands[2] = GEN_INT (imm);
6731 return "xxspltidp %x0,%2";
6732 }
6733 }
6734
6735 if (TARGET_P9_VECTOR
6736 && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
6737 {
6738 if (num_insns == 1)
6739 {
6740 operands[2] = GEN_INT (xxspltib_value & 0xff);
6741 return "xxspltib %x0,%2";
6742 }
6743
6744 return "#";
6745 }
6746 }
6747
6748 if (TARGET_ALTIVEC)
6749 {
6750 rtx splat_vec;
6751
6752 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest)));
6753 if (zero_constant (vec, mode))
6754 return "vspltisw %0,0";
6755
6756 if (all_ones_constant (vec, mode))
6757 return "vspltisw %0,-1";
6758
6759 /* Do we need to construct a value using VSLDOI? */
6760 shift = vspltis_shifted (vec);
6761 if (shift != 0)
6762 return "#";
6763
6764 splat_vec = gen_easy_altivec_constant (vec);
6765 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
6766 operands[1] = XEXP (splat_vec, 0);
6767 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
6768 return "#";
6769
6770 switch (GET_MODE (splat_vec))
6771 {
6772 case E_V4SImode:
6773 return "vspltisw %0,%1";
6774
6775 case E_V8HImode:
6776 return "vspltish %0,%1";
6777
6778 case E_V16QImode:
6779 return "vspltisb %0,%1";
6780
6781 default:
6782 gcc_unreachable ();
6783 }
6784 }
6785
6786 gcc_unreachable ();
6787 }
6788
6789 /* Initialize vector TARGET to VALS. */
6790
6791 void
6792 rs6000_expand_vector_init (rtx target, rtx vals)
6793 {
6794 machine_mode mode = GET_MODE (target);
6795 machine_mode inner_mode = GET_MODE_INNER (mode);
6796 unsigned int n_elts = GET_MODE_NUNITS (mode);
6797 int n_var = 0, one_var = -1;
6798 bool all_same = true, all_const_zero = true;
6799 rtx x, mem;
6800 unsigned int i;
6801
6802 for (i = 0; i < n_elts; ++i)
6803 {
6804 x = XVECEXP (vals, 0, i);
6805 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6806 ++n_var, one_var = i;
6807 else if (x != CONST0_RTX (inner_mode))
6808 all_const_zero = false;
6809
6810 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6811 all_same = false;
6812 }
6813
6814 if (n_var == 0)
6815 {
6816 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6817 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
6818 if ((int_vector_p || TARGET_VSX) && all_const_zero)
6819 {
6820 /* Zero register. */
6821 emit_move_insn (target, CONST0_RTX (mode));
6822 return;
6823 }
6824 else if (int_vector_p && easy_vector_constant (const_vec, mode))
6825 {
6826 /* Splat immediate. */
6827 emit_insn (gen_rtx_SET (target, const_vec));
6828 return;
6829 }
6830 else
6831 {
6832 /* Load from constant pool. */
6833 emit_move_insn (target, const_vec);
6834 return;
6835 }
6836 }
6837
6838 /* Double word values on VSX can use xxpermdi or lxvdsx. */
6839 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
6840 {
6841 rtx op[2];
6842 size_t i;
6843 size_t num_elements = all_same ? 1 : 2;
6844 for (i = 0; i < num_elements; i++)
6845 {
6846 op[i] = XVECEXP (vals, 0, i);
6847 /* Just in case there is a SUBREG with a smaller mode, do a
6848 conversion. */
6849 if (GET_MODE (op[i]) != inner_mode)
6850 {
6851 rtx tmp = gen_reg_rtx (inner_mode);
6852 convert_move (tmp, op[i], 0);
6853 op[i] = tmp;
6854 }
6855 /* Allow load with splat double word. */
6856 else if (MEM_P (op[i]))
6857 {
6858 if (!all_same)
6859 op[i] = force_reg (inner_mode, op[i]);
6860 }
6861 else if (!REG_P (op[i]))
6862 op[i] = force_reg (inner_mode, op[i]);
6863 }
6864
6865 if (all_same)
6866 {
6867 if (mode == V2DFmode)
6868 emit_insn (gen_vsx_splat_v2df (target, op[0]));
6869 else
6870 emit_insn (gen_vsx_splat_v2di (target, op[0]));
6871 }
6872 else
6873 {
6874 if (mode == V2DFmode)
6875 emit_insn (gen_vsx_concat_v2df (target, op[0], op[1]));
6876 else
6877 emit_insn (gen_vsx_concat_v2di (target, op[0], op[1]));
6878 }
6879 return;
6880 }
6881
6882 /* Special case initializing vector int if we are on 64-bit systems with
6883 direct move or we have the ISA 3.0 instructions. */
6884 if (mode == V4SImode && VECTOR_MEM_VSX_P (V4SImode)
6885 && TARGET_DIRECT_MOVE_64BIT)
6886 {
6887 if (all_same)
6888 {
6889 rtx element0 = XVECEXP (vals, 0, 0);
6890 if (MEM_P (element0))
6891 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6892 else
6893 element0 = force_reg (SImode, element0);
6894
6895 if (TARGET_P9_VECTOR)
6896 emit_insn (gen_vsx_splat_v4si (target, element0));
6897 else
6898 {
6899 rtx tmp = gen_reg_rtx (DImode);
6900 emit_insn (gen_zero_extendsidi2 (tmp, element0));
6901 emit_insn (gen_vsx_splat_v4si_di (target, tmp));
6902 }
6903 return;
6904 }
6905 else
6906 {
6907 rtx elements[4];
6908 size_t i;
6909
6910 for (i = 0; i < 4; i++)
6911 elements[i] = force_reg (SImode, XVECEXP (vals, 0, i));
6912
6913 emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1],
6914 elements[2], elements[3]));
6915 return;
6916 }
6917 }
6918
6919 /* With single precision floating point on VSX, know that internally single
6920 precision is actually represented as a double, and either make 2 V2DF
6921 vectors, and convert these vectors to single precision, or do one
6922 conversion, and splat the result to the other elements. */
6923 if (mode == V4SFmode && VECTOR_MEM_VSX_P (V4SFmode))
6924 {
6925 if (all_same)
6926 {
6927 rtx element0 = XVECEXP (vals, 0, 0);
6928
6929 if (TARGET_P9_VECTOR)
6930 {
6931 if (MEM_P (element0))
6932 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6933
6934 emit_insn (gen_vsx_splat_v4sf (target, element0));
6935 }
6936
6937 else
6938 {
6939 rtx freg = gen_reg_rtx (V4SFmode);
6940 rtx sreg = force_reg (SFmode, element0);
6941 rtx cvt = (TARGET_XSCVDPSPN
6942 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
6943 : gen_vsx_xscvdpsp_scalar (freg, sreg));
6944
6945 emit_insn (cvt);
6946 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg,
6947 const0_rtx));
6948 }
6949 }
6950 else
6951 {
6952 if (TARGET_P8_VECTOR && TARGET_POWERPC64)
6953 {
6954 rtx tmp_sf[4];
6955 rtx tmp_si[4];
6956 rtx tmp_di[4];
6957 rtx mrg_di[4];
6958 for (i = 0; i < 4; i++)
6959 {
6960 tmp_si[i] = gen_reg_rtx (SImode);
6961 tmp_di[i] = gen_reg_rtx (DImode);
6962 mrg_di[i] = gen_reg_rtx (DImode);
6963 tmp_sf[i] = force_reg (SFmode, XVECEXP (vals, 0, i));
6964 emit_insn (gen_movsi_from_sf (tmp_si[i], tmp_sf[i]));
6965 emit_insn (gen_zero_extendsidi2 (tmp_di[i], tmp_si[i]));
6966 }
6967
6968 if (!BYTES_BIG_ENDIAN)
6969 {
6970 std::swap (tmp_di[0], tmp_di[1]);
6971 std::swap (tmp_di[2], tmp_di[3]);
6972 }
6973
6974 emit_insn (gen_ashldi3 (mrg_di[0], tmp_di[0], GEN_INT (32)));
6975 emit_insn (gen_iordi3 (mrg_di[1], mrg_di[0], tmp_di[1]));
6976 emit_insn (gen_ashldi3 (mrg_di[2], tmp_di[2], GEN_INT (32)));
6977 emit_insn (gen_iordi3 (mrg_di[3], mrg_di[2], tmp_di[3]));
6978
6979 rtx tmp_v2di = gen_reg_rtx (V2DImode);
6980 emit_insn (gen_vsx_concat_v2di (tmp_v2di, mrg_di[1], mrg_di[3]));
6981 emit_move_insn (target, gen_lowpart (V4SFmode, tmp_v2di));
6982 }
6983 else
6984 {
6985 rtx dbl_even = gen_reg_rtx (V2DFmode);
6986 rtx dbl_odd = gen_reg_rtx (V2DFmode);
6987 rtx flt_even = gen_reg_rtx (V4SFmode);
6988 rtx flt_odd = gen_reg_rtx (V4SFmode);
6989 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
6990 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
6991 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
6992 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
6993
6994 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
6995 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
6996 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6997 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6998 rs6000_expand_extract_even (target, flt_even, flt_odd);
6999 }
7000 }
7001 return;
7002 }
7003
7004 /* Special case initializing vector short/char that are splats if we are on
7005 64-bit systems with direct move. */
7006 if (all_same && TARGET_DIRECT_MOVE_64BIT
7007 && (mode == V16QImode || mode == V8HImode))
7008 {
7009 rtx op0 = XVECEXP (vals, 0, 0);
7010 rtx di_tmp = gen_reg_rtx (DImode);
7011
7012 if (!REG_P (op0))
7013 op0 = force_reg (GET_MODE_INNER (mode), op0);
7014
7015 if (mode == V16QImode)
7016 {
7017 emit_insn (gen_zero_extendqidi2 (di_tmp, op0));
7018 emit_insn (gen_vsx_vspltb_di (target, di_tmp));
7019 return;
7020 }
7021
7022 if (mode == V8HImode)
7023 {
7024 emit_insn (gen_zero_extendhidi2 (di_tmp, op0));
7025 emit_insn (gen_vsx_vsplth_di (target, di_tmp));
7026 return;
7027 }
7028 }
7029
7030 /* Store value to stack temp. Load vector element. Splat. However, splat
7031 of 64-bit items is not supported on Altivec. */
7032 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
7033 {
7034 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
7035 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
7036 XVECEXP (vals, 0, 0));
7037 x = gen_rtx_UNSPEC (VOIDmode,
7038 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
7039 emit_insn (gen_rtx_PARALLEL (VOIDmode,
7040 gen_rtvec (2,
7041 gen_rtx_SET (target, mem),
7042 x)));
7043 x = gen_rtx_VEC_SELECT (inner_mode, target,
7044 gen_rtx_PARALLEL (VOIDmode,
7045 gen_rtvec (1, const0_rtx)));
7046 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
7047 return;
7048 }
7049
7050 /* One field is non-constant. Load constant then overwrite
7051 varying field. */
7052 if (n_var == 1)
7053 {
7054 rtx copy = copy_rtx (vals);
7055
7056 /* Load constant part of vector, substitute neighboring value for
7057 varying element. */
7058 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
7059 rs6000_expand_vector_init (target, copy);
7060
7061 /* Insert variable. */
7062 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var),
7063 GEN_INT (one_var));
7064 return;
7065 }
7066
7067 if (TARGET_DIRECT_MOVE && (mode == V16QImode || mode == V8HImode))
7068 {
7069 rtx op[16];
7070 /* Force the values into word_mode registers. */
7071 for (i = 0; i < n_elts; i++)
7072 {
7073 rtx tmp = force_reg (inner_mode, XVECEXP (vals, 0, i));
7074 machine_mode tmode = TARGET_POWERPC64 ? DImode : SImode;
7075 op[i] = simplify_gen_subreg (tmode, tmp, inner_mode, 0);
7076 }
7077
7078 /* Take unsigned char big endianness on 64bit as example for below
7079 construction, the input values are: A, B, C, D, ..., O, P. */
7080
7081 if (TARGET_DIRECT_MOVE_128)
7082 {
7083 /* Move to VSX register with vec_concat, each has 2 values.
7084 eg: vr1[0] = { xxxxxxxA, xxxxxxxB };
7085 vr1[1] = { xxxxxxxC, xxxxxxxD };
7086 ...
7087 vr1[7] = { xxxxxxxO, xxxxxxxP }; */
7088 rtx vr1[8];
7089 for (i = 0; i < n_elts / 2; i++)
7090 {
7091 vr1[i] = gen_reg_rtx (V2DImode);
7092 emit_insn (gen_vsx_concat_v2di (vr1[i], op[i * 2],
7093 op[i * 2 + 1]));
7094 }
7095
7096 /* Pack vectors with 2 values into vectors with 4 values.
7097 eg: vr2[0] = { xxxAxxxB, xxxCxxxD };
7098 vr2[1] = { xxxExxxF, xxxGxxxH };
7099 vr2[1] = { xxxIxxxJ, xxxKxxxL };
7100 vr2[3] = { xxxMxxxN, xxxOxxxP }; */
7101 rtx vr2[4];
7102 for (i = 0; i < n_elts / 4; i++)
7103 {
7104 vr2[i] = gen_reg_rtx (V4SImode);
7105 emit_insn (gen_altivec_vpkudum (vr2[i], vr1[i * 2],
7106 vr1[i * 2 + 1]));
7107 }
7108
7109 /* Pack vectors with 4 values into vectors with 8 values.
7110 eg: vr3[0] = { xAxBxCxD, xExFxGxH };
7111 vr3[1] = { xIxJxKxL, xMxNxOxP }; */
7112 rtx vr3[2];
7113 for (i = 0; i < n_elts / 8; i++)
7114 {
7115 vr3[i] = gen_reg_rtx (V8HImode);
7116 emit_insn (gen_altivec_vpkuwum (vr3[i], vr2[i * 2],
7117 vr2[i * 2 + 1]));
7118 }
7119
7120 /* If it's V8HImode, it's done and return it. */
7121 if (mode == V8HImode)
7122 {
7123 emit_insn (gen_rtx_SET (target, vr3[0]));
7124 return;
7125 }
7126
7127 /* Pack vectors with 8 values into 16 values. */
7128 rtx res = gen_reg_rtx (V16QImode);
7129 emit_insn (gen_altivec_vpkuhum (res, vr3[0], vr3[1]));
7130 emit_insn (gen_rtx_SET (target, res));
7131 }
7132 else
7133 {
7134 rtx (*merge_v16qi) (rtx, rtx, rtx) = NULL;
7135 rtx (*merge_v8hi) (rtx, rtx, rtx) = NULL;
7136 rtx (*merge_v4si) (rtx, rtx, rtx) = NULL;
7137 rtx perm_idx;
7138
7139 /* Set up some common gen routines and values. */
7140 if (BYTES_BIG_ENDIAN)
7141 {
7142 if (mode == V16QImode)
7143 {
7144 merge_v16qi = gen_altivec_vmrghb;
7145 merge_v8hi = gen_altivec_vmrglh;
7146 }
7147 else
7148 merge_v8hi = gen_altivec_vmrghh;
7149
7150 merge_v4si = gen_altivec_vmrglw;
7151 perm_idx = GEN_INT (3);
7152 }
7153 else
7154 {
7155 if (mode == V16QImode)
7156 {
7157 merge_v16qi = gen_altivec_vmrglb;
7158 merge_v8hi = gen_altivec_vmrghh;
7159 }
7160 else
7161 merge_v8hi = gen_altivec_vmrglh;
7162
7163 merge_v4si = gen_altivec_vmrghw;
7164 perm_idx = GEN_INT (0);
7165 }
7166
7167 /* Move to VSX register with direct move.
7168 eg: vr_qi[0] = { xxxxxxxA, xxxxxxxx };
7169 vr_qi[1] = { xxxxxxxB, xxxxxxxx };
7170 ...
7171 vr_qi[15] = { xxxxxxxP, xxxxxxxx }; */
7172 rtx vr_qi[16];
7173 for (i = 0; i < n_elts; i++)
7174 {
7175 vr_qi[i] = gen_reg_rtx (V16QImode);
7176 if (TARGET_POWERPC64)
7177 emit_insn (gen_p8_mtvsrd_v16qidi2 (vr_qi[i], op[i]));
7178 else
7179 emit_insn (gen_p8_mtvsrwz_v16qisi2 (vr_qi[i], op[i]));
7180 }
7181
7182 /* Merge/move to vector short.
7183 eg: vr_hi[0] = { xxxxxxxx, xxxxxxAB };
7184 vr_hi[1] = { xxxxxxxx, xxxxxxCD };
7185 ...
7186 vr_hi[7] = { xxxxxxxx, xxxxxxOP }; */
7187 rtx vr_hi[8];
7188 for (i = 0; i < 8; i++)
7189 {
7190 rtx tmp = vr_qi[i];
7191 if (mode == V16QImode)
7192 {
7193 tmp = gen_reg_rtx (V16QImode);
7194 emit_insn (merge_v16qi (tmp, vr_qi[2 * i], vr_qi[2 * i + 1]));
7195 }
7196 vr_hi[i] = gen_reg_rtx (V8HImode);
7197 emit_move_insn (vr_hi[i], gen_lowpart (V8HImode, tmp));
7198 }
7199
7200 /* Merge vector short to vector int.
7201 eg: vr_si[0] = { xxxxxxxx, xxxxABCD };
7202 vr_si[1] = { xxxxxxxx, xxxxEFGH };
7203 ...
7204 vr_si[3] = { xxxxxxxx, xxxxMNOP }; */
7205 rtx vr_si[4];
7206 for (i = 0; i < 4; i++)
7207 {
7208 rtx tmp = gen_reg_rtx (V8HImode);
7209 emit_insn (merge_v8hi (tmp, vr_hi[2 * i], vr_hi[2 * i + 1]));
7210 vr_si[i] = gen_reg_rtx (V4SImode);
7211 emit_move_insn (vr_si[i], gen_lowpart (V4SImode, tmp));
7212 }
7213
7214 /* Merge vector int to vector long.
7215 eg: vr_di[0] = { xxxxxxxx, ABCDEFGH };
7216 vr_di[1] = { xxxxxxxx, IJKLMNOP }; */
7217 rtx vr_di[2];
7218 for (i = 0; i < 2; i++)
7219 {
7220 rtx tmp = gen_reg_rtx (V4SImode);
7221 emit_insn (merge_v4si (tmp, vr_si[2 * i], vr_si[2 * i + 1]));
7222 vr_di[i] = gen_reg_rtx (V2DImode);
7223 emit_move_insn (vr_di[i], gen_lowpart (V2DImode, tmp));
7224 }
7225
7226 rtx res = gen_reg_rtx (V2DImode);
7227 emit_insn (gen_vsx_xxpermdi_v2di (res, vr_di[0], vr_di[1], perm_idx));
7228 emit_insn (gen_rtx_SET (target, gen_lowpart (mode, res)));
7229 }
7230
7231 return;
7232 }
7233
7234 /* Construct the vector in memory one field at a time
7235 and load the whole vector. */
7236 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7237 for (i = 0; i < n_elts; i++)
7238 emit_move_insn (adjust_address_nv (mem, inner_mode,
7239 i * GET_MODE_SIZE (inner_mode)),
7240 XVECEXP (vals, 0, i));
7241 emit_move_insn (target, mem);
7242 }
7243
7244 /* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX
7245 is variable and also counts by vector element size for p9 and above. */
7246
7247 static void
7248 rs6000_expand_vector_set_var_p9 (rtx target, rtx val, rtx idx)
7249 {
7250 machine_mode mode = GET_MODE (target);
7251
7252 gcc_assert (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (idx));
7253
7254 machine_mode inner_mode = GET_MODE (val);
7255
7256 int width = GET_MODE_SIZE (inner_mode);
7257
7258 gcc_assert (width >= 1 && width <= 8);
7259
7260 int shift = exact_log2 (width);
7261
7262 machine_mode idx_mode = GET_MODE (idx);
7263
7264 machine_mode shift_mode;
7265 /* Gen function pointers for shifting left and generation of permutation
7266 control vectors. */
7267 rtx (*gen_ashl) (rtx, rtx, rtx);
7268 rtx (*gen_pcvr1) (rtx, rtx);
7269 rtx (*gen_pcvr2) (rtx, rtx);
7270
7271 if (TARGET_POWERPC64)
7272 {
7273 shift_mode = DImode;
7274 gen_ashl = gen_ashldi3;
7275 gen_pcvr1 = BYTES_BIG_ENDIAN ? gen_altivec_lvsl_reg_di
7276 : gen_altivec_lvsr_reg_di;
7277 gen_pcvr2 = BYTES_BIG_ENDIAN ? gen_altivec_lvsr_reg_di
7278 : gen_altivec_lvsl_reg_di;
7279 }
7280 else
7281 {
7282 shift_mode = SImode;
7283 gen_ashl = gen_ashlsi3;
7284 gen_pcvr1 = BYTES_BIG_ENDIAN ? gen_altivec_lvsl_reg_si
7285 : gen_altivec_lvsr_reg_si;
7286 gen_pcvr2 = BYTES_BIG_ENDIAN ? gen_altivec_lvsr_reg_si
7287 : gen_altivec_lvsl_reg_si;
7288 }
7289 /* Generate the IDX for permute shift, width is the vector element size.
7290 idx = idx * width. */
7291 rtx tmp = gen_reg_rtx (shift_mode);
7292 idx = convert_modes (shift_mode, idx_mode, idx, 1);
7293
7294 emit_insn (gen_ashl (tmp, idx, GEN_INT (shift)));
7295
7296 /* Generate one permutation control vector used for rotating the element
7297 at to-insert position to element zero in target vector. lvsl is
7298 used for big endianness while lvsr is used for little endianness:
7299 lvs[lr] v1,0,idx. */
7300 rtx pcvr1 = gen_reg_rtx (V16QImode);
7301 emit_insn (gen_pcvr1 (pcvr1, tmp));
7302
7303 rtx sub_target = simplify_gen_subreg (V16QImode, target, mode, 0);
7304 rtx perm1 = gen_altivec_vperm_v8hiv16qi (sub_target, sub_target, sub_target,
7305 pcvr1);
7306 emit_insn (perm1);
7307
7308 /* Insert val into element 0 of target vector. */
7309 rs6000_expand_vector_set (target, val, const0_rtx);
7310
7311 /* Rotate back with a reversed permutation control vector generated from:
7312 lvs[rl] v2,0,idx. */
7313 rtx pcvr2 = gen_reg_rtx (V16QImode);
7314 emit_insn (gen_pcvr2 (pcvr2, tmp));
7315
7316 rtx perm2 = gen_altivec_vperm_v8hiv16qi (sub_target, sub_target, sub_target,
7317 pcvr2);
7318 emit_insn (perm2);
7319 }
7320
7321 /* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX
7322 is variable and also counts by vector element size for p7 & p8. */
7323
7324 static void
7325 rs6000_expand_vector_set_var_p7 (rtx target, rtx val, rtx idx)
7326 {
7327 machine_mode mode = GET_MODE (target);
7328
7329 gcc_assert (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (idx));
7330
7331 machine_mode inner_mode = GET_MODE (val);
7332 HOST_WIDE_INT mode_mask = GET_MODE_MASK (inner_mode);
7333
7334 int width = GET_MODE_SIZE (inner_mode);
7335 gcc_assert (width >= 1 && width <= 4);
7336
7337 int shift = exact_log2 (width);
7338
7339 machine_mode idx_mode = GET_MODE (idx);
7340
7341 machine_mode shift_mode;
7342 rtx (*gen_ashl)(rtx, rtx, rtx);
7343 rtx (*gen_add)(rtx, rtx, rtx);
7344 rtx (*gen_sub)(rtx, rtx, rtx);
7345 rtx (*gen_lvsl)(rtx, rtx);
7346
7347 if (TARGET_POWERPC64)
7348 {
7349 shift_mode = DImode;
7350 gen_ashl = gen_ashldi3;
7351 gen_add = gen_adddi3;
7352 gen_sub = gen_subdi3;
7353 gen_lvsl = gen_altivec_lvsl_reg_di;
7354 }
7355 else
7356 {
7357 shift_mode = SImode;
7358 gen_ashl = gen_ashlsi3;
7359 gen_add = gen_addsi3;
7360 gen_sub = gen_subsi3;
7361 gen_lvsl = gen_altivec_lvsl_reg_si;
7362 }
7363
7364 /* idx = idx * width. */
7365 rtx tmp = gen_reg_rtx (shift_mode);
7366 idx = convert_modes (shift_mode, idx_mode, idx, 1);
7367
7368 emit_insn (gen_ashl (tmp, idx, GEN_INT (shift)));
7369
7370 /* For LE: idx = idx + 8. */
7371 if (!BYTES_BIG_ENDIAN)
7372 emit_insn (gen_add (tmp, tmp, GEN_INT (8)));
7373 else
7374 emit_insn (gen_sub (tmp, GEN_INT (24 - width), tmp));
7375
7376 /* lxv vs33, mask.
7377 DImode: 0xffffffffffffffff0000000000000000
7378 SImode: 0x00000000ffffffff0000000000000000
7379 HImode: 0x000000000000ffff0000000000000000.
7380 QImode: 0x00000000000000ff0000000000000000. */
7381 rtx mask = gen_reg_rtx (V16QImode);
7382 rtx mask_v2di = gen_reg_rtx (V2DImode);
7383 rtvec v = rtvec_alloc (2);
7384 if (!BYTES_BIG_ENDIAN)
7385 {
7386 RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, 0);
7387 RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, mode_mask);
7388 }
7389 else
7390 {
7391 RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, mode_mask);
7392 RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, 0);
7393 }
7394 emit_insn (gen_vec_initv2didi (mask_v2di, gen_rtx_PARALLEL (V2DImode, v)));
7395 rtx sub_mask = simplify_gen_subreg (V16QImode, mask_v2di, V2DImode, 0);
7396 emit_insn (gen_rtx_SET (mask, sub_mask));
7397
7398 /* mtvsrd[wz] f0,tmp_val. */
7399 rtx tmp_val = gen_reg_rtx (SImode);
7400 if (inner_mode == E_SFmode)
7401 if (TARGET_DIRECT_MOVE_64BIT)
7402 emit_insn (gen_movsi_from_sf (tmp_val, val));
7403 else
7404 {
7405 rtx stack = rs6000_allocate_stack_temp (SFmode, false, true);
7406 emit_insn (gen_movsf_hardfloat (stack, val));
7407 rtx stack2 = copy_rtx (stack);
7408 PUT_MODE (stack2, SImode);
7409 emit_move_insn (tmp_val, stack2);
7410 }
7411 else
7412 tmp_val = force_reg (SImode, val);
7413
7414 rtx val_v16qi = gen_reg_rtx (V16QImode);
7415 rtx val_v2di = gen_reg_rtx (V2DImode);
7416 rtvec vec_val = rtvec_alloc (2);
7417 if (!BYTES_BIG_ENDIAN)
7418 {
7419 RTVEC_ELT (vec_val, 0) = gen_rtx_CONST_INT (DImode, 0);
7420 RTVEC_ELT (vec_val, 1) = tmp_val;
7421 }
7422 else
7423 {
7424 RTVEC_ELT (vec_val, 0) = tmp_val;
7425 RTVEC_ELT (vec_val, 1) = gen_rtx_CONST_INT (DImode, 0);
7426 }
7427 emit_insn (
7428 gen_vec_initv2didi (val_v2di, gen_rtx_PARALLEL (V2DImode, vec_val)));
7429 rtx sub_val = simplify_gen_subreg (V16QImode, val_v2di, V2DImode, 0);
7430 emit_insn (gen_rtx_SET (val_v16qi, sub_val));
7431
7432 /* lvsl 13,0,idx. */
7433 rtx pcv = gen_reg_rtx (V16QImode);
7434 emit_insn (gen_lvsl (pcv, tmp));
7435
7436 /* vperm 1,1,1,13. */
7437 /* vperm 0,0,0,13. */
7438 rtx val_perm = gen_reg_rtx (V16QImode);
7439 rtx mask_perm = gen_reg_rtx (V16QImode);
7440 emit_insn (gen_altivec_vperm_v8hiv16qi (val_perm, val_v16qi, val_v16qi, pcv));
7441 emit_insn (gen_altivec_vperm_v8hiv16qi (mask_perm, mask, mask, pcv));
7442
7443 rtx target_v16qi = simplify_gen_subreg (V16QImode, target, mode, 0);
7444
7445 /* xxsel 34,34,32,33. */
7446 emit_insn (
7447 gen_vector_select_v16qi (target_v16qi, target_v16qi, val_perm, mask_perm));
7448 }
7449
7450 /* Set field ELT_RTX of TARGET to VAL. */
7451
7452 void
7453 rs6000_expand_vector_set (rtx target, rtx val, rtx elt_rtx)
7454 {
7455 machine_mode mode = GET_MODE (target);
7456 machine_mode inner_mode = GET_MODE_INNER (mode);
7457 rtx reg = gen_reg_rtx (mode);
7458 rtx mask, mem, x;
7459 int width = GET_MODE_SIZE (inner_mode);
7460 int i;
7461
7462 val = force_reg (GET_MODE (val), val);
7463
7464 if (VECTOR_MEM_VSX_P (mode))
7465 {
7466 if (!CONST_INT_P (elt_rtx))
7467 {
7468 /* For V2DI/V2DF, could leverage the P9 version to generate xxpermdi
7469 when elt_rtx is variable. */
7470 if ((TARGET_P9_VECTOR && TARGET_POWERPC64) || width == 8)
7471 {
7472 rs6000_expand_vector_set_var_p9 (target, val, elt_rtx);
7473 return;
7474 }
7475 else if (TARGET_VSX)
7476 {
7477 rs6000_expand_vector_set_var_p7 (target, val, elt_rtx);
7478 return;
7479 }
7480 else
7481 gcc_assert (CONST_INT_P (elt_rtx));
7482 }
7483
7484 rtx insn = NULL_RTX;
7485
7486 if (mode == V2DFmode)
7487 insn = gen_vsx_set_v2df (target, target, val, elt_rtx);
7488
7489 else if (mode == V2DImode)
7490 insn = gen_vsx_set_v2di (target, target, val, elt_rtx);
7491
7492 else if (TARGET_P9_VECTOR && TARGET_POWERPC64)
7493 {
7494 if (mode == V4SImode)
7495 insn = gen_vsx_set_v4si_p9 (target, target, val, elt_rtx);
7496 else if (mode == V8HImode)
7497 insn = gen_vsx_set_v8hi_p9 (target, target, val, elt_rtx);
7498 else if (mode == V16QImode)
7499 insn = gen_vsx_set_v16qi_p9 (target, target, val, elt_rtx);
7500 else if (mode == V4SFmode)
7501 insn = gen_vsx_set_v4sf_p9 (target, target, val, elt_rtx);
7502 }
7503
7504 if (insn)
7505 {
7506 emit_insn (insn);
7507 return;
7508 }
7509 }
7510
7511 /* Simplify setting single element vectors like V1TImode. */
7512 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode)
7513 && INTVAL (elt_rtx) == 0)
7514 {
7515 emit_move_insn (target, gen_lowpart (mode, val));
7516 return;
7517 }
7518
7519 /* Load single variable value. */
7520 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
7521 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
7522 x = gen_rtx_UNSPEC (VOIDmode,
7523 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
7524 emit_insn (gen_rtx_PARALLEL (VOIDmode,
7525 gen_rtvec (2,
7526 gen_rtx_SET (reg, mem),
7527 x)));
7528
7529 /* Linear sequence. */
7530 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
7531 for (i = 0; i < 16; ++i)
7532 XVECEXP (mask, 0, i) = GEN_INT (i);
7533
7534 /* Set permute mask to insert element into target. */
7535 for (i = 0; i < width; ++i)
7536 XVECEXP (mask, 0, INTVAL (elt_rtx) * width + i) = GEN_INT (i + 0x10);
7537 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
7538
7539 if (BYTES_BIG_ENDIAN)
7540 x = gen_rtx_UNSPEC (mode,
7541 gen_rtvec (3, target, reg,
7542 force_reg (V16QImode, x)),
7543 UNSPEC_VPERM);
7544 else
7545 {
7546 if (TARGET_P9_VECTOR)
7547 x = gen_rtx_UNSPEC (mode,
7548 gen_rtvec (3, reg, target,
7549 force_reg (V16QImode, x)),
7550 UNSPEC_VPERMR);
7551 else
7552 {
7553 /* Invert selector. We prefer to generate VNAND on P8 so
7554 that future fusion opportunities can kick in, but must
7555 generate VNOR elsewhere. */
7556 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
7557 rtx iorx = (TARGET_P8_VECTOR
7558 ? gen_rtx_IOR (V16QImode, notx, notx)
7559 : gen_rtx_AND (V16QImode, notx, notx));
7560 rtx tmp = gen_reg_rtx (V16QImode);
7561 emit_insn (gen_rtx_SET (tmp, iorx));
7562
7563 /* Permute with operands reversed and adjusted selector. */
7564 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
7565 UNSPEC_VPERM);
7566 }
7567 }
7568
7569 emit_insn (gen_rtx_SET (target, x));
7570 }
7571
7572 /* Extract field ELT from VEC into TARGET. */
7573
7574 void
7575 rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
7576 {
7577 machine_mode mode = GET_MODE (vec);
7578 machine_mode inner_mode = GET_MODE_INNER (mode);
7579 rtx mem;
7580
7581 if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (elt))
7582 {
7583 switch (mode)
7584 {
7585 default:
7586 break;
7587 case E_V1TImode:
7588 emit_move_insn (target, gen_lowpart (TImode, vec));
7589 break;
7590 case E_V2DFmode:
7591 emit_insn (gen_vsx_extract_v2df (target, vec, elt));
7592 return;
7593 case E_V2DImode:
7594 emit_insn (gen_vsx_extract_v2di (target, vec, elt));
7595 return;
7596 case E_V4SFmode:
7597 emit_insn (gen_vsx_extract_v4sf (target, vec, elt));
7598 return;
7599 case E_V16QImode:
7600 if (TARGET_DIRECT_MOVE_64BIT)
7601 {
7602 emit_insn (gen_vsx_extract_v16qi (target, vec, elt));
7603 return;
7604 }
7605 else
7606 break;
7607 case E_V8HImode:
7608 if (TARGET_DIRECT_MOVE_64BIT)
7609 {
7610 emit_insn (gen_vsx_extract_v8hi (target, vec, elt));
7611 return;
7612 }
7613 else
7614 break;
7615 case E_V4SImode:
7616 if (TARGET_DIRECT_MOVE_64BIT)
7617 {
7618 emit_insn (gen_vsx_extract_v4si (target, vec, elt));
7619 return;
7620 }
7621 break;
7622 }
7623 }
7624 else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt)
7625 && TARGET_DIRECT_MOVE_64BIT)
7626 {
7627 if (GET_MODE (elt) != DImode)
7628 {
7629 rtx tmp = gen_reg_rtx (DImode);
7630 convert_move (tmp, elt, 0);
7631 elt = tmp;
7632 }
7633 else if (!REG_P (elt))
7634 elt = force_reg (DImode, elt);
7635
7636 switch (mode)
7637 {
7638 case E_V1TImode:
7639 emit_move_insn (target, gen_lowpart (TImode, vec));
7640 return;
7641
7642 case E_V2DFmode:
7643 emit_insn (gen_vsx_extract_v2df_var (target, vec, elt));
7644 return;
7645
7646 case E_V2DImode:
7647 emit_insn (gen_vsx_extract_v2di_var (target, vec, elt));
7648 return;
7649
7650 case E_V4SFmode:
7651 emit_insn (gen_vsx_extract_v4sf_var (target, vec, elt));
7652 return;
7653
7654 case E_V4SImode:
7655 emit_insn (gen_vsx_extract_v4si_var (target, vec, elt));
7656 return;
7657
7658 case E_V8HImode:
7659 emit_insn (gen_vsx_extract_v8hi_var (target, vec, elt));
7660 return;
7661
7662 case E_V16QImode:
7663 emit_insn (gen_vsx_extract_v16qi_var (target, vec, elt));
7664 return;
7665
7666 default:
7667 gcc_unreachable ();
7668 }
7669 }
7670
7671 /* Allocate mode-sized buffer. */
7672 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7673
7674 emit_move_insn (mem, vec);
7675 if (CONST_INT_P (elt))
7676 {
7677 int modulo_elt = INTVAL (elt) % GET_MODE_NUNITS (mode);
7678
7679 /* Add offset to field within buffer matching vector element. */
7680 mem = adjust_address_nv (mem, inner_mode,
7681 modulo_elt * GET_MODE_SIZE (inner_mode));
7682 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
7683 }
7684 else
7685 {
7686 unsigned int ele_size = GET_MODE_SIZE (inner_mode);
7687 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1);
7688
7689 elt = gen_rtx_AND (Pmode, elt, num_ele_m1);
7690 if (ele_size > 1)
7691 elt = gen_rtx_MULT (Pmode, elt, GEN_INT (ele_size));
7692 rtx new_addr = gen_rtx_PLUS (Pmode, XEXP (mem, 0), elt);
7693 new_addr = change_address (mem, inner_mode, new_addr);
7694 emit_move_insn (target, new_addr);
7695 }
7696 }
7697
7698 /* Return the offset within a memory object (MEM) of a vector type to a given
7699 element within the vector (ELEMENT) with an element size (SCALAR_SIZE). If
7700 the element is constant, we return a constant integer.
7701
7702 Otherwise, we use a base register temporary to calculate the offset after
7703 masking it to fit within the bounds of the vector and scaling it. The
7704 masking is required by the 64-bit ELF version 2 ABI for the vec_extract
7705 built-in function. */
7706
7707 static rtx
7708 get_vector_offset (rtx mem, rtx element, rtx base_tmp, unsigned scalar_size)
7709 {
7710 if (CONST_INT_P (element))
7711 return GEN_INT (INTVAL (element) * scalar_size);
7712
7713 /* All insns should use the 'Q' constraint (address is a single register) if
7714 the element number is not a constant. */
7715 gcc_assert (satisfies_constraint_Q (mem));
7716
7717 /* Mask the element to make sure the element number is between 0 and the
7718 maximum number of elements - 1 so that we don't generate an address
7719 outside the vector. */
7720 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (GET_MODE (mem)) - 1);
7721 rtx and_op = gen_rtx_AND (Pmode, element, num_ele_m1);
7722 emit_insn (gen_rtx_SET (base_tmp, and_op));
7723
7724 /* Shift the element to get the byte offset from the element number. */
7725 int shift = exact_log2 (scalar_size);
7726 gcc_assert (shift >= 0);
7727
7728 if (shift > 0)
7729 {
7730 rtx shift_op = gen_rtx_ASHIFT (Pmode, base_tmp, GEN_INT (shift));
7731 emit_insn (gen_rtx_SET (base_tmp, shift_op));
7732 }
7733
7734 return base_tmp;
7735 }
7736
7737 /* Helper function update PC-relative addresses when we are adjusting a memory
7738 address (ADDR) to a vector to point to a scalar field within the vector with
7739 a constant offset (ELEMENT_OFFSET). If the address is not valid, we can
7740 use the base register temporary (BASE_TMP) to form the address. */
7741
7742 static rtx
7743 adjust_vec_address_pcrel (rtx addr, rtx element_offset, rtx base_tmp)
7744 {
7745 rtx new_addr = NULL;
7746
7747 gcc_assert (CONST_INT_P (element_offset));
7748
7749 if (GET_CODE (addr) == CONST)
7750 addr = XEXP (addr, 0);
7751
7752 if (GET_CODE (addr) == PLUS)
7753 {
7754 rtx op0 = XEXP (addr, 0);
7755 rtx op1 = XEXP (addr, 1);
7756
7757 if (CONST_INT_P (op1))
7758 {
7759 HOST_WIDE_INT offset
7760 = INTVAL (XEXP (addr, 1)) + INTVAL (element_offset);
7761
7762 if (offset == 0)
7763 new_addr = op0;
7764
7765 else
7766 {
7767 rtx plus = gen_rtx_PLUS (Pmode, op0, GEN_INT (offset));
7768 new_addr = gen_rtx_CONST (Pmode, plus);
7769 }
7770 }
7771
7772 else
7773 {
7774 emit_move_insn (base_tmp, addr);
7775 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7776 }
7777 }
7778
7779 else if (SYMBOL_REF_P (addr) || LABEL_REF_P (addr))
7780 {
7781 rtx plus = gen_rtx_PLUS (Pmode, addr, element_offset);
7782 new_addr = gen_rtx_CONST (Pmode, plus);
7783 }
7784
7785 else
7786 gcc_unreachable ();
7787
7788 return new_addr;
7789 }
7790
7791 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
7792 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
7793 temporary (BASE_TMP) to fixup the address. Return the new memory address
7794 that is valid for reads or writes to a given register (SCALAR_REG).
7795
7796 This function is expected to be called after reload is completed when we are
7797 splitting insns. The temporary BASE_TMP might be set multiple times with
7798 this code. */
7799
7800 rtx
7801 rs6000_adjust_vec_address (rtx scalar_reg,
7802 rtx mem,
7803 rtx element,
7804 rtx base_tmp,
7805 machine_mode scalar_mode)
7806 {
7807 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7808 rtx addr = XEXP (mem, 0);
7809 rtx new_addr;
7810
7811 gcc_assert (!reg_mentioned_p (base_tmp, addr));
7812 gcc_assert (!reg_mentioned_p (base_tmp, element));
7813
7814 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
7815 gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
7816
7817 /* Calculate what we need to add to the address to get the element
7818 address. */
7819 rtx element_offset = get_vector_offset (mem, element, base_tmp, scalar_size);
7820
7821 /* Create the new address pointing to the element within the vector. If we
7822 are adding 0, we don't have to change the address. */
7823 if (element_offset == const0_rtx)
7824 new_addr = addr;
7825
7826 /* A simple indirect address can be converted into a reg + offset
7827 address. */
7828 else if (REG_P (addr) || SUBREG_P (addr))
7829 new_addr = gen_rtx_PLUS (Pmode, addr, element_offset);
7830
7831 /* For references to local static variables, fold a constant offset into the
7832 address. */
7833 else if (pcrel_local_address (addr, Pmode) && CONST_INT_P (element_offset))
7834 new_addr = adjust_vec_address_pcrel (addr, element_offset, base_tmp);
7835
7836 /* Optimize D-FORM addresses with constant offset with a constant element, to
7837 include the element offset in the address directly. */
7838 else if (GET_CODE (addr) == PLUS)
7839 {
7840 rtx op0 = XEXP (addr, 0);
7841 rtx op1 = XEXP (addr, 1);
7842
7843 gcc_assert (REG_P (op0) || SUBREG_P (op0));
7844 if (CONST_INT_P (op1) && CONST_INT_P (element_offset))
7845 {
7846 /* op0 should never be r0, because r0+offset is not valid. But it
7847 doesn't hurt to make sure it is not r0. */
7848 gcc_assert (reg_or_subregno (op0) != 0);
7849
7850 /* D-FORM address with constant element number. */
7851 HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset);
7852 rtx offset_rtx = GEN_INT (offset);
7853 new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
7854 }
7855 else
7856 {
7857 /* If we don't have a D-FORM address with a constant element number,
7858 add the two elements in the current address. Then add the offset.
7859
7860 Previously, we tried to add the offset to OP1 and change the
7861 address to an X-FORM format adding OP0 and BASE_TMP, but it became
7862 complicated because we had to verify that op1 was not GPR0 and we
7863 had a constant element offset (due to the way ADDI is defined).
7864 By doing the add of OP0 and OP1 first, and then adding in the
7865 offset, it has the benefit that if D-FORM instructions are
7866 allowed, the offset is part of the memory access to the vector
7867 element. */
7868 emit_insn (gen_rtx_SET (base_tmp, gen_rtx_PLUS (Pmode, op0, op1)));
7869 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7870 }
7871 }
7872
7873 else
7874 {
7875 emit_move_insn (base_tmp, addr);
7876 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7877 }
7878
7879 /* If the address isn't valid, move the address into the temporary base
7880 register. Some reasons it could not be valid include:
7881
7882 The address offset overflowed the 16 or 34 bit offset size;
7883 We need to use a DS-FORM load, and the bottom 2 bits are non-zero;
7884 We need to use a DQ-FORM load, and the bottom 4 bits are non-zero;
7885 Only X_FORM loads can be done, and the address is D_FORM. */
7886
7887 enum insn_form iform
7888 = address_to_insn_form (new_addr, scalar_mode,
7889 reg_to_non_prefixed (scalar_reg, scalar_mode));
7890
7891 if (iform == INSN_FORM_BAD)
7892 {
7893 emit_move_insn (base_tmp, new_addr);
7894 new_addr = base_tmp;
7895 }
7896
7897 return change_address (mem, scalar_mode, new_addr);
7898 }
7899
7900 /* Split a variable vec_extract operation into the component instructions. */
7901
7902 void
7903 rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
7904 rtx tmp_altivec)
7905 {
7906 machine_mode mode = GET_MODE (src);
7907 machine_mode scalar_mode = GET_MODE_INNER (GET_MODE (src));
7908 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7909 int byte_shift = exact_log2 (scalar_size);
7910
7911 gcc_assert (byte_shift >= 0);
7912
7913 /* If we are given a memory address, optimize to load just the element. We
7914 don't have to adjust the vector element number on little endian
7915 systems. */
7916 if (MEM_P (src))
7917 {
7918 emit_move_insn (dest,
7919 rs6000_adjust_vec_address (dest, src, element, tmp_gpr,
7920 scalar_mode));
7921 return;
7922 }
7923
7924 else if (REG_P (src) || SUBREG_P (src))
7925 {
7926 int num_elements = GET_MODE_NUNITS (mode);
7927 int bits_in_element = mode_to_bits (GET_MODE_INNER (mode));
7928 int bit_shift = 7 - exact_log2 (num_elements);
7929 rtx element2;
7930 unsigned int dest_regno = reg_or_subregno (dest);
7931 unsigned int src_regno = reg_or_subregno (src);
7932 unsigned int element_regno = reg_or_subregno (element);
7933
7934 gcc_assert (REG_P (tmp_gpr));
7935
7936 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
7937 a general purpose register. */
7938 if (TARGET_P9_VECTOR
7939 && (mode == V16QImode || mode == V8HImode || mode == V4SImode)
7940 && INT_REGNO_P (dest_regno)
7941 && ALTIVEC_REGNO_P (src_regno)
7942 && INT_REGNO_P (element_regno))
7943 {
7944 rtx dest_si = gen_rtx_REG (SImode, dest_regno);
7945 rtx element_si = gen_rtx_REG (SImode, element_regno);
7946
7947 if (mode == V16QImode)
7948 emit_insn (BYTES_BIG_ENDIAN
7949 ? gen_vextublx (dest_si, element_si, src)
7950 : gen_vextubrx (dest_si, element_si, src));
7951
7952 else if (mode == V8HImode)
7953 {
7954 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7955 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const1_rtx));
7956 emit_insn (BYTES_BIG_ENDIAN
7957 ? gen_vextuhlx (dest_si, tmp_gpr_si, src)
7958 : gen_vextuhrx (dest_si, tmp_gpr_si, src));
7959 }
7960
7961
7962 else
7963 {
7964 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7965 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const2_rtx));
7966 emit_insn (BYTES_BIG_ENDIAN
7967 ? gen_vextuwlx (dest_si, tmp_gpr_si, src)
7968 : gen_vextuwrx (dest_si, tmp_gpr_si, src));
7969 }
7970
7971 return;
7972 }
7973
7974
7975 gcc_assert (REG_P (tmp_altivec));
7976
7977 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
7978 an XOR, otherwise we need to subtract. The shift amount is so VSLO
7979 will shift the element into the upper position (adding 3 to convert a
7980 byte shift into a bit shift). */
7981 if (scalar_size == 8)
7982 {
7983 if (!BYTES_BIG_ENDIAN)
7984 {
7985 emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx));
7986 element2 = tmp_gpr;
7987 }
7988 else
7989 element2 = element;
7990
7991 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
7992 bit. */
7993 emit_insn (gen_rtx_SET (tmp_gpr,
7994 gen_rtx_AND (DImode,
7995 gen_rtx_ASHIFT (DImode,
7996 element2,
7997 GEN_INT (6)),
7998 GEN_INT (64))));
7999 }
8000 else
8001 {
8002 if (!BYTES_BIG_ENDIAN)
8003 {
8004 rtx num_ele_m1 = GEN_INT (num_elements - 1);
8005
8006 emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1));
8007 emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr));
8008 element2 = tmp_gpr;
8009 }
8010 else
8011 element2 = element;
8012
8013 emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift)));
8014 }
8015
8016 /* Get the value into the lower byte of the Altivec register where VSLO
8017 expects it. */
8018 if (TARGET_P9_VECTOR)
8019 emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr));
8020 else if (can_create_pseudo_p ())
8021 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr));
8022 else
8023 {
8024 rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
8025 emit_move_insn (tmp_di, tmp_gpr);
8026 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di));
8027 }
8028
8029 /* Do the VSLO to get the value into the final location. */
8030 switch (mode)
8031 {
8032 case E_V2DFmode:
8033 emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec));
8034 return;
8035
8036 case E_V2DImode:
8037 emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec));
8038 return;
8039
8040 case E_V4SFmode:
8041 {
8042 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
8043 rtx tmp_altivec_v4sf = gen_rtx_REG (V4SFmode, REGNO (tmp_altivec));
8044 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
8045 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
8046 tmp_altivec));
8047
8048 emit_insn (gen_vsx_xscvspdp_scalar2 (dest, tmp_altivec_v4sf));
8049 return;
8050 }
8051
8052 case E_V4SImode:
8053 case E_V8HImode:
8054 case E_V16QImode:
8055 {
8056 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
8057 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
8058 rtx tmp_gpr_di = gen_rtx_REG (DImode, REGNO (dest));
8059 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
8060 tmp_altivec));
8061 emit_move_insn (tmp_gpr_di, tmp_altivec_di);
8062 emit_insn (gen_lshrdi3 (tmp_gpr_di, tmp_gpr_di,
8063 GEN_INT (64 - bits_in_element)));
8064 return;
8065 }
8066
8067 default:
8068 gcc_unreachable ();
8069 }
8070
8071 return;
8072 }
8073 else
8074 gcc_unreachable ();
8075 }
8076
8077 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
8078 selects whether the alignment is abi mandated, optional, or
8079 both abi and optional alignment. */
8080
8081 unsigned int
8082 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
8083 {
8084 if (how != align_opt)
8085 {
8086 if (VECTOR_TYPE_P (type) && align < 128)
8087 align = 128;
8088 }
8089
8090 if (how != align_abi)
8091 {
8092 if (TREE_CODE (type) == ARRAY_TYPE
8093 && TYPE_MODE (TREE_TYPE (type)) == QImode)
8094 {
8095 if (align < BITS_PER_WORD)
8096 align = BITS_PER_WORD;
8097 }
8098 }
8099
8100 return align;
8101 }
8102
8103 /* Implement TARGET_SLOW_UNALIGNED_ACCESS. Altivec vector memory
8104 instructions simply ignore the low bits; VSX memory instructions
8105 are aligned to 4 or 8 bytes. */
8106
8107 static bool
8108 rs6000_slow_unaligned_access (machine_mode mode, unsigned int align)
8109 {
8110 return (STRICT_ALIGNMENT
8111 || (!TARGET_EFFICIENT_UNALIGNED_VSX
8112 && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && align < 32)
8113 || ((VECTOR_MODE_P (mode) || VECTOR_ALIGNMENT_P (mode))
8114 && (int) align < VECTOR_ALIGN (mode)))));
8115 }
8116
8117 /* AIX word-aligns FP doubles but doubleword-aligns 64-bit ints. */
8118
8119 unsigned int
8120 rs6000_special_adjust_field_align (tree type, unsigned int computed)
8121 {
8122 if (computed <= 32 || TYPE_PACKED (type))
8123 return computed;
8124
8125 /* Strip initial arrays. */
8126 while (TREE_CODE (type) == ARRAY_TYPE)
8127 type = TREE_TYPE (type);
8128
8129 /* If RECORD or UNION, recursively find the first field. */
8130 while (AGGREGATE_TYPE_P (type))
8131 {
8132 tree field = TYPE_FIELDS (type);
8133
8134 /* Skip all non field decls */
8135 while (field != NULL
8136 && (TREE_CODE (field) != FIELD_DECL
8137 || DECL_FIELD_ABI_IGNORED (field)))
8138 field = DECL_CHAIN (field);
8139
8140 if (! field)
8141 break;
8142
8143 /* A packed field does not contribute any extra alignment. */
8144 if (DECL_PACKED (field))
8145 return computed;
8146
8147 type = TREE_TYPE (field);
8148
8149 /* Strip arrays. */
8150 while (TREE_CODE (type) == ARRAY_TYPE)
8151 type = TREE_TYPE (type);
8152 }
8153
8154 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node
8155 && (TYPE_MODE (type) == DFmode || TYPE_MODE (type) == DCmode))
8156 computed = MIN (computed, 32);
8157
8158 return computed;
8159 }
8160
8161 /* AIX increases natural record alignment to doubleword if the innermost first
8162 field is an FP double while the FP fields remain word aligned.
8163 Only called if TYPE initially is a RECORD or UNION. */
8164
8165 unsigned int
8166 rs6000_special_round_type_align (tree type, unsigned int computed,
8167 unsigned int specified)
8168 {
8169 unsigned int align = MAX (computed, specified);
8170
8171 if (TYPE_PACKED (type) || align >= 64)
8172 return align;
8173
8174 /* If RECORD or UNION, recursively find the first field. */
8175 do
8176 {
8177 tree field = TYPE_FIELDS (type);
8178
8179 /* Skip all non field decls */
8180 while (field != NULL
8181 && (TREE_CODE (field) != FIELD_DECL
8182 || DECL_FIELD_ABI_IGNORED (field)))
8183 field = DECL_CHAIN (field);
8184
8185 if (! field)
8186 break;
8187
8188 /* A packed field does not contribute any extra alignment. */
8189 if (DECL_PACKED (field))
8190 return align;
8191
8192 type = TREE_TYPE (field);
8193
8194 /* Strip arrays. */
8195 while (TREE_CODE (type) == ARRAY_TYPE)
8196 type = TREE_TYPE (type);
8197 } while (AGGREGATE_TYPE_P (type));
8198
8199 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node
8200 && (TYPE_MODE (type) == DFmode || TYPE_MODE (type) == DCmode))
8201 align = MAX (align, 64);
8202
8203 return align;
8204 }
8205
8206 /* Darwin increases record alignment to the natural alignment of
8207 the first field. */
8208
8209 unsigned int
8210 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
8211 unsigned int specified)
8212 {
8213 unsigned int align = MAX (computed, specified);
8214
8215 if (TYPE_PACKED (type))
8216 return align;
8217
8218 /* Find the first field, looking down into aggregates. */
8219 do {
8220 tree field = TYPE_FIELDS (type);
8221 /* Skip all non field decls */
8222 while (field != NULL
8223 && (TREE_CODE (field) != FIELD_DECL
8224 || DECL_FIELD_ABI_IGNORED (field)))
8225 field = DECL_CHAIN (field);
8226 if (! field)
8227 break;
8228 /* A packed field does not contribute any extra alignment. */
8229 if (DECL_PACKED (field))
8230 return align;
8231 type = TREE_TYPE (field);
8232 while (TREE_CODE (type) == ARRAY_TYPE)
8233 type = TREE_TYPE (type);
8234 } while (AGGREGATE_TYPE_P (type));
8235
8236 if (type != error_mark_node && ! AGGREGATE_TYPE_P (type)
8237 && ! TYPE_PACKED (type) && maximum_field_alignment == 0)
8238 align = MAX (align, TYPE_ALIGN (type));
8239
8240 return align;
8241 }
8242
8243 /* Return 1 for an operand in small memory on V.4/eabi. */
8244
8245 int
8246 small_data_operand (rtx op ATTRIBUTE_UNUSED,
8247 machine_mode mode ATTRIBUTE_UNUSED)
8248 {
8249 #if TARGET_ELF
8250 rtx sym_ref;
8251
8252 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
8253 return 0;
8254
8255 if (DEFAULT_ABI != ABI_V4)
8256 return 0;
8257
8258 if (SYMBOL_REF_P (op))
8259 sym_ref = op;
8260
8261 else if (GET_CODE (op) != CONST
8262 || GET_CODE (XEXP (op, 0)) != PLUS
8263 || !SYMBOL_REF_P (XEXP (XEXP (op, 0), 0))
8264 || !CONST_INT_P (XEXP (XEXP (op, 0), 1)))
8265 return 0;
8266
8267 else
8268 {
8269 rtx sum = XEXP (op, 0);
8270 HOST_WIDE_INT summand;
8271
8272 /* We have to be careful here, because it is the referenced address
8273 that must be 32k from _SDA_BASE_, not just the symbol. */
8274 summand = INTVAL (XEXP (sum, 1));
8275 if (summand < 0 || summand > g_switch_value)
8276 return 0;
8277
8278 sym_ref = XEXP (sum, 0);
8279 }
8280
8281 return SYMBOL_REF_SMALL_P (sym_ref);
8282 #else
8283 return 0;
8284 #endif
8285 }
8286
8287 /* Return true if either operand is a general purpose register. */
8288
8289 bool
8290 gpr_or_gpr_p (rtx op0, rtx op1)
8291 {
8292 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
8293 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
8294 }
8295
8296 /* Return true if this is a move direct operation between GPR registers and
8297 floating point/VSX registers. */
8298
8299 bool
8300 direct_move_p (rtx op0, rtx op1)
8301 {
8302 if (!REG_P (op0) || !REG_P (op1))
8303 return false;
8304
8305 if (!TARGET_DIRECT_MOVE)
8306 return false;
8307
8308 int regno0 = REGNO (op0);
8309 int regno1 = REGNO (op1);
8310 if (!HARD_REGISTER_NUM_P (regno0) || !HARD_REGISTER_NUM_P (regno1))
8311 return false;
8312
8313 if (INT_REGNO_P (regno0) && VSX_REGNO_P (regno1))
8314 return true;
8315
8316 if (VSX_REGNO_P (regno0) && INT_REGNO_P (regno1))
8317 return true;
8318
8319 return false;
8320 }
8321
8322 /* Return true if the ADDR is an acceptable address for a quad memory
8323 operation of mode MODE (either LQ/STQ for general purpose registers, or
8324 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
8325 is intended for LQ/STQ. If it is false, the address is intended for the ISA
8326 3.0 LXV/STXV instruction. */
8327
8328 bool
8329 quad_address_p (rtx addr, machine_mode mode, bool strict)
8330 {
8331 rtx op0, op1;
8332
8333 if (GET_MODE_SIZE (mode) < 16)
8334 return false;
8335
8336 if (legitimate_indirect_address_p (addr, strict))
8337 return true;
8338
8339 if (VECTOR_MODE_P (mode) && !mode_supports_dq_form (mode))
8340 return false;
8341
8342 /* Is this a valid prefixed address? If the bottom four bits of the offset
8343 are non-zero, we could use a prefixed instruction (which does not have the
8344 DQ-form constraint that the traditional instruction had) instead of
8345 forcing the unaligned offset to a GPR. */
8346 if (address_is_prefixed (addr, mode, NON_PREFIXED_DQ))
8347 return true;
8348
8349 if (GET_CODE (addr) != PLUS)
8350 return false;
8351
8352 op0 = XEXP (addr, 0);
8353 if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, strict))
8354 return false;
8355
8356 op1 = XEXP (addr, 1);
8357 if (!CONST_INT_P (op1))
8358 return false;
8359
8360 return quad_address_offset_p (INTVAL (op1));
8361 }
8362
8363 /* Return true if this is a load or store quad operation. This function does
8364 not handle the atomic quad memory instructions. */
8365
8366 bool
8367 quad_load_store_p (rtx op0, rtx op1)
8368 {
8369 bool ret;
8370
8371 if (!TARGET_QUAD_MEMORY)
8372 ret = false;
8373
8374 else if (REG_P (op0) && MEM_P (op1))
8375 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
8376 && quad_memory_operand (op1, GET_MODE (op1))
8377 && !reg_overlap_mentioned_p (op0, op1));
8378
8379 else if (MEM_P (op0) && REG_P (op1))
8380 ret = (quad_memory_operand (op0, GET_MODE (op0))
8381 && quad_int_reg_operand (op1, GET_MODE (op1)));
8382
8383 else
8384 ret = false;
8385
8386 if (TARGET_DEBUG_ADDR)
8387 {
8388 fprintf (stderr, "\n========== quad_load_store, return %s\n",
8389 ret ? "true" : "false");
8390 debug_rtx (gen_rtx_SET (op0, op1));
8391 }
8392
8393 return ret;
8394 }
8395
8396 /* Given an address, return a constant offset term if one exists. */
8397
8398 static rtx
8399 address_offset (rtx op)
8400 {
8401 if (GET_CODE (op) == PRE_INC
8402 || GET_CODE (op) == PRE_DEC)
8403 op = XEXP (op, 0);
8404 else if (GET_CODE (op) == PRE_MODIFY
8405 || GET_CODE (op) == LO_SUM)
8406 op = XEXP (op, 1);
8407
8408 if (GET_CODE (op) == CONST)
8409 op = XEXP (op, 0);
8410
8411 if (GET_CODE (op) == PLUS)
8412 op = XEXP (op, 1);
8413
8414 if (CONST_INT_P (op))
8415 return op;
8416
8417 return NULL_RTX;
8418 }
8419
8420 /* This tests that a lo_sum {constant, symbol, symbol+offset} is valid for
8421 the mode. If we can't find (or don't know) the alignment of the symbol
8422 we assume (optimistically) that it's sufficiently aligned [??? maybe we
8423 should be pessimistic]. Offsets are validated in the same way as for
8424 reg + offset. */
8425 static bool
8426 darwin_rs6000_legitimate_lo_sum_const_p (rtx x, machine_mode mode)
8427 {
8428 /* We should not get here with this. */
8429 gcc_checking_assert (! mode_supports_dq_form (mode));
8430
8431 if (GET_CODE (x) == CONST)
8432 x = XEXP (x, 0);
8433
8434 /* If we are building PIC code, then any symbol must be wrapped in an
8435 UNSPEC_MACHOPIC_OFFSET so that it will get the picbase subtracted. */
8436 bool machopic_offs_p = false;
8437 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_MACHOPIC_OFFSET)
8438 {
8439 x = XVECEXP (x, 0, 0);
8440 machopic_offs_p = true;
8441 }
8442
8443 rtx sym = NULL_RTX;
8444 unsigned HOST_WIDE_INT offset = 0;
8445
8446 if (GET_CODE (x) == PLUS)
8447 {
8448 sym = XEXP (x, 0);
8449 if (! SYMBOL_REF_P (sym))
8450 return false;
8451 if (!CONST_INT_P (XEXP (x, 1)))
8452 return false;
8453 offset = INTVAL (XEXP (x, 1));
8454 }
8455 else if (SYMBOL_REF_P (x))
8456 sym = x;
8457 else if (CONST_INT_P (x))
8458 offset = INTVAL (x);
8459 else if (GET_CODE (x) == LABEL_REF)
8460 offset = 0; // We assume code labels are Pmode aligned
8461 else
8462 return false; // not sure what we have here.
8463
8464 /* If we don't know the alignment of the thing to which the symbol refers,
8465 we assume optimistically it is "enough".
8466 ??? maybe we should be pessimistic instead. */
8467 unsigned align = 0;
8468
8469 if (sym)
8470 {
8471 tree decl = SYMBOL_REF_DECL (sym);
8472 /* As noted above, PIC code cannot use a bare SYMBOL_REF. */
8473 if (TARGET_MACHO && flag_pic && !machopic_offs_p)
8474 return false;
8475 #if TARGET_MACHO
8476 if (MACHO_SYMBOL_INDIRECTION_P (sym))
8477 /* The decl in an indirection symbol is the original one, which might
8478 be less aligned than the indirection. Our indirections are always
8479 pointer-aligned. */
8480 ;
8481 else
8482 #endif
8483 if (decl && DECL_ALIGN (decl))
8484 align = DECL_ALIGN_UNIT (decl);
8485 }
8486
8487 unsigned int extra = 0;
8488 switch (mode)
8489 {
8490 case E_DFmode:
8491 case E_DDmode:
8492 case E_DImode:
8493 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8494 addressing. */
8495 if (VECTOR_MEM_VSX_P (mode))
8496 return false;
8497
8498 if (!TARGET_POWERPC64)
8499 extra = 4;
8500 else if ((offset & 3) || (align & 3))
8501 return false;
8502 break;
8503
8504 case E_TFmode:
8505 case E_IFmode:
8506 case E_KFmode:
8507 case E_TDmode:
8508 case E_TImode:
8509 case E_PTImode:
8510 extra = 8;
8511 if (!TARGET_POWERPC64)
8512 extra = 12;
8513 else if ((offset & 3) || (align & 3))
8514 return false;
8515 break;
8516
8517 default:
8518 break;
8519 }
8520
8521 /* We only care if the access(es) would cause a change to the high part. */
8522 offset = sext_hwi (offset, 16);
8523 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8524 }
8525
8526 /* Return true if the MEM operand is a memory operand suitable for use
8527 with a (full width, possibly multiple) gpr load/store. On
8528 powerpc64 this means the offset must be divisible by 4.
8529 Implements 'Y' constraint.
8530
8531 Accept direct, indexed, offset, lo_sum and tocref. Since this is
8532 a constraint function we know the operand has satisfied a suitable
8533 memory predicate.
8534
8535 Offsetting a lo_sum should not be allowed, except where we know by
8536 alignment that a 32k boundary is not crossed. Note that by
8537 "offsetting" here we mean a further offset to access parts of the
8538 MEM. It's fine to have a lo_sum where the inner address is offset
8539 from a sym, since the same sym+offset will appear in the high part
8540 of the address calculation. */
8541
8542 bool
8543 mem_operand_gpr (rtx op, machine_mode mode)
8544 {
8545 unsigned HOST_WIDE_INT offset;
8546 int extra;
8547 rtx addr = XEXP (op, 0);
8548
8549 /* PR85755: Allow PRE_INC and PRE_DEC addresses. */
8550 if (TARGET_UPDATE
8551 && (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
8552 && mode_supports_pre_incdec_p (mode)
8553 && legitimate_indirect_address_p (XEXP (addr, 0), false))
8554 return true;
8555
8556 /* Allow prefixed instructions if supported. If the bottom two bits of the
8557 offset are non-zero, we could use a prefixed instruction (which does not
8558 have the DS-form constraint that the traditional instruction had) instead
8559 of forcing the unaligned offset to a GPR. */
8560 if (address_is_prefixed (addr, mode, NON_PREFIXED_DS))
8561 return true;
8562
8563 /* We need to look through Mach-O PIC unspecs to determine if a lo_sum is
8564 really OK. Doing this early avoids teaching all the other machinery
8565 about them. */
8566 if (TARGET_MACHO && GET_CODE (addr) == LO_SUM)
8567 return darwin_rs6000_legitimate_lo_sum_const_p (XEXP (addr, 1), mode);
8568
8569 /* Only allow offsettable addresses. See PRs 83969 and 84279. */
8570 if (!rs6000_offsettable_memref_p (op, mode, false))
8571 return false;
8572
8573 op = address_offset (addr);
8574 if (op == NULL_RTX)
8575 return true;
8576
8577 offset = INTVAL (op);
8578 if (TARGET_POWERPC64 && (offset & 3) != 0)
8579 return false;
8580
8581 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8582 if (extra < 0)
8583 extra = 0;
8584
8585 if (GET_CODE (addr) == LO_SUM)
8586 /* For lo_sum addresses, we must allow any offset except one that
8587 causes a wrap, so test only the low 16 bits. */
8588 offset = sext_hwi (offset, 16);
8589
8590 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8591 }
8592
8593 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
8594 enforce an offset divisible by 4 even for 32-bit. */
8595
8596 bool
8597 mem_operand_ds_form (rtx op, machine_mode mode)
8598 {
8599 unsigned HOST_WIDE_INT offset;
8600 int extra;
8601 rtx addr = XEXP (op, 0);
8602
8603 /* Allow prefixed instructions if supported. If the bottom two bits of the
8604 offset are non-zero, we could use a prefixed instruction (which does not
8605 have the DS-form constraint that the traditional instruction had) instead
8606 of forcing the unaligned offset to a GPR. */
8607 if (address_is_prefixed (addr, mode, NON_PREFIXED_DS))
8608 return true;
8609
8610 if (!offsettable_address_p (false, mode, addr))
8611 return false;
8612
8613 op = address_offset (addr);
8614 if (op == NULL_RTX)
8615 return true;
8616
8617 offset = INTVAL (op);
8618 if ((offset & 3) != 0)
8619 return false;
8620
8621 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8622 if (extra < 0)
8623 extra = 0;
8624
8625 if (GET_CODE (addr) == LO_SUM)
8626 /* For lo_sum addresses, we must allow any offset except one that
8627 causes a wrap, so test only the low 16 bits. */
8628 offset = sext_hwi (offset, 16);
8629
8630 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8631 }
8632 \f
8633 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
8634
8635 static bool
8636 reg_offset_addressing_ok_p (machine_mode mode)
8637 {
8638 switch (mode)
8639 {
8640 case E_V16QImode:
8641 case E_V8HImode:
8642 case E_V4SFmode:
8643 case E_V4SImode:
8644 case E_V2DFmode:
8645 case E_V2DImode:
8646 case E_V1TImode:
8647 case E_TImode:
8648 case E_TFmode:
8649 case E_KFmode:
8650 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
8651 ISA 3.0 vector d-form addressing mode was added. While TImode is not
8652 a vector mode, if we want to use the VSX registers to move it around,
8653 we need to restrict ourselves to reg+reg addressing. Similarly for
8654 IEEE 128-bit floating point that is passed in a single vector
8655 register. */
8656 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
8657 return mode_supports_dq_form (mode);
8658 break;
8659
8660 /* The vector pair/quad types support offset addressing if the
8661 underlying vectors support offset addressing. */
8662 case E_OOmode:
8663 case E_XOmode:
8664 return TARGET_MMA;
8665
8666 case E_SDmode:
8667 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
8668 addressing for the LFIWZX and STFIWX instructions. */
8669 if (TARGET_NO_SDMODE_STACK)
8670 return false;
8671 break;
8672
8673 default:
8674 break;
8675 }
8676
8677 return true;
8678 }
8679
8680 static bool
8681 virtual_stack_registers_memory_p (rtx op)
8682 {
8683 int regnum;
8684
8685 if (REG_P (op))
8686 regnum = REGNO (op);
8687
8688 else if (GET_CODE (op) == PLUS
8689 && REG_P (XEXP (op, 0))
8690 && CONST_INT_P (XEXP (op, 1)))
8691 regnum = REGNO (XEXP (op, 0));
8692
8693 else
8694 return false;
8695
8696 return (regnum >= FIRST_VIRTUAL_REGISTER
8697 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
8698 }
8699
8700 /* Return true if a MODE sized memory accesses to OP plus OFFSET
8701 is known to not straddle a 32k boundary. This function is used
8702 to determine whether -mcmodel=medium code can use TOC pointer
8703 relative addressing for OP. This means the alignment of the TOC
8704 pointer must also be taken into account, and unfortunately that is
8705 only 8 bytes. */
8706
8707 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
8708 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
8709 #endif
8710
8711 static bool
8712 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
8713 machine_mode mode)
8714 {
8715 tree decl;
8716 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
8717
8718 if (!SYMBOL_REF_P (op))
8719 return false;
8720
8721 /* ISA 3.0 vector d-form addressing is restricted, don't allow
8722 SYMBOL_REF. */
8723 if (mode_supports_dq_form (mode))
8724 return false;
8725
8726 dsize = GET_MODE_SIZE (mode);
8727 decl = SYMBOL_REF_DECL (op);
8728 if (!decl)
8729 {
8730 if (dsize == 0)
8731 return false;
8732
8733 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
8734 replacing memory addresses with an anchor plus offset. We
8735 could find the decl by rummaging around in the block->objects
8736 VEC for the given offset but that seems like too much work. */
8737 dalign = BITS_PER_UNIT;
8738 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
8739 && SYMBOL_REF_ANCHOR_P (op)
8740 && SYMBOL_REF_BLOCK (op) != NULL)
8741 {
8742 struct object_block *block = SYMBOL_REF_BLOCK (op);
8743
8744 dalign = block->alignment;
8745 offset += SYMBOL_REF_BLOCK_OFFSET (op);
8746 }
8747 else if (CONSTANT_POOL_ADDRESS_P (op))
8748 {
8749 /* It would be nice to have get_pool_align().. */
8750 machine_mode cmode = get_pool_mode (op);
8751
8752 dalign = GET_MODE_ALIGNMENT (cmode);
8753 }
8754 }
8755 else if (DECL_P (decl))
8756 {
8757 dalign = DECL_ALIGN (decl);
8758
8759 if (dsize == 0)
8760 {
8761 /* Allow BLKmode when the entire object is known to not
8762 cross a 32k boundary. */
8763 if (!DECL_SIZE_UNIT (decl))
8764 return false;
8765
8766 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
8767 return false;
8768
8769 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
8770 if (dsize > 32768)
8771 return false;
8772
8773 dalign /= BITS_PER_UNIT;
8774 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8775 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8776 return dalign >= dsize;
8777 }
8778 }
8779 else
8780 gcc_unreachable ();
8781
8782 /* Find how many bits of the alignment we know for this access. */
8783 dalign /= BITS_PER_UNIT;
8784 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8785 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8786 mask = dalign - 1;
8787 lsb = offset & -offset;
8788 mask &= lsb - 1;
8789 dalign = mask + 1;
8790
8791 return dalign >= dsize;
8792 }
8793
8794 static bool
8795 constant_pool_expr_p (rtx op)
8796 {
8797 rtx base, offset;
8798
8799 split_const (op, &base, &offset);
8800 return (SYMBOL_REF_P (base)
8801 && CONSTANT_POOL_ADDRESS_P (base)
8802 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
8803 }
8804
8805 /* Create a TOC reference for symbol_ref SYMBOL. If LARGETOC_REG is non-null,
8806 use that as the register to put the HIGH value into if register allocation
8807 is already done. */
8808
8809 rtx
8810 create_TOC_reference (rtx symbol, rtx largetoc_reg)
8811 {
8812 rtx tocrel, tocreg, hi;
8813
8814 gcc_assert (TARGET_TOC);
8815
8816 if (TARGET_DEBUG_ADDR)
8817 {
8818 if (SYMBOL_REF_P (symbol))
8819 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
8820 XSTR (symbol, 0));
8821 else
8822 {
8823 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
8824 GET_RTX_NAME (GET_CODE (symbol)));
8825 debug_rtx (symbol);
8826 }
8827 }
8828
8829 if (!can_create_pseudo_p ())
8830 df_set_regs_ever_live (TOC_REGISTER, true);
8831
8832 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
8833 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
8834 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
8835 return tocrel;
8836
8837 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
8838 if (largetoc_reg != NULL)
8839 {
8840 emit_move_insn (largetoc_reg, hi);
8841 hi = largetoc_reg;
8842 }
8843 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
8844 }
8845
8846 /* These are only used to pass through from print_operand/print_operand_address
8847 to rs6000_output_addr_const_extra over the intervening function
8848 output_addr_const which is not target code. */
8849 static const_rtx tocrel_base_oac, tocrel_offset_oac;
8850
8851 /* Return true if OP is a toc pointer relative address (the output
8852 of create_TOC_reference). If STRICT, do not match non-split
8853 -mcmodel=large/medium toc pointer relative addresses. If the pointers
8854 are non-NULL, place base and offset pieces in TOCREL_BASE_RET and
8855 TOCREL_OFFSET_RET respectively. */
8856
8857 bool
8858 toc_relative_expr_p (const_rtx op, bool strict, const_rtx *tocrel_base_ret,
8859 const_rtx *tocrel_offset_ret)
8860 {
8861 if (!TARGET_TOC)
8862 return false;
8863
8864 if (TARGET_CMODEL != CMODEL_SMALL)
8865 {
8866 /* When strict ensure we have everything tidy. */
8867 if (strict
8868 && !(GET_CODE (op) == LO_SUM
8869 && REG_P (XEXP (op, 0))
8870 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict)))
8871 return false;
8872
8873 /* When not strict, allow non-split TOC addresses and also allow
8874 (lo_sum (high ..)) TOC addresses created during reload. */
8875 if (GET_CODE (op) == LO_SUM)
8876 op = XEXP (op, 1);
8877 }
8878
8879 const_rtx tocrel_base = op;
8880 const_rtx tocrel_offset = const0_rtx;
8881
8882 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
8883 {
8884 tocrel_base = XEXP (op, 0);
8885 tocrel_offset = XEXP (op, 1);
8886 }
8887
8888 if (tocrel_base_ret)
8889 *tocrel_base_ret = tocrel_base;
8890 if (tocrel_offset_ret)
8891 *tocrel_offset_ret = tocrel_offset;
8892
8893 return (GET_CODE (tocrel_base) == UNSPEC
8894 && XINT (tocrel_base, 1) == UNSPEC_TOCREL
8895 && REG_P (XVECEXP (tocrel_base, 0, 1))
8896 && REGNO (XVECEXP (tocrel_base, 0, 1)) == TOC_REGISTER);
8897 }
8898
8899 /* Return true if X is a constant pool address, and also for cmodel=medium
8900 if X is a toc-relative address known to be offsettable within MODE. */
8901
8902 bool
8903 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
8904 bool strict)
8905 {
8906 const_rtx tocrel_base, tocrel_offset;
8907 return (toc_relative_expr_p (x, strict, &tocrel_base, &tocrel_offset)
8908 && (TARGET_CMODEL != CMODEL_MEDIUM
8909 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
8910 || mode == QImode
8911 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
8912 INTVAL (tocrel_offset), mode)));
8913 }
8914
8915 static bool
8916 legitimate_small_data_p (machine_mode mode, rtx x)
8917 {
8918 return (DEFAULT_ABI == ABI_V4
8919 && !flag_pic && !TARGET_TOC
8920 && (SYMBOL_REF_P (x) || GET_CODE (x) == CONST)
8921 && small_data_operand (x, mode));
8922 }
8923
8924 bool
8925 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
8926 bool strict, bool worst_case)
8927 {
8928 unsigned HOST_WIDE_INT offset;
8929 unsigned int extra;
8930
8931 if (GET_CODE (x) != PLUS)
8932 return false;
8933 if (!REG_P (XEXP (x, 0)))
8934 return false;
8935 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8936 return false;
8937 if (mode_supports_dq_form (mode))
8938 return quad_address_p (x, mode, strict);
8939 if (!reg_offset_addressing_ok_p (mode))
8940 return virtual_stack_registers_memory_p (x);
8941 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
8942 return true;
8943 if (!CONST_INT_P (XEXP (x, 1)))
8944 return false;
8945
8946 offset = INTVAL (XEXP (x, 1));
8947 extra = 0;
8948 switch (mode)
8949 {
8950 case E_DFmode:
8951 case E_DDmode:
8952 case E_DImode:
8953 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8954 addressing. */
8955 if (VECTOR_MEM_VSX_P (mode))
8956 return false;
8957
8958 if (!worst_case)
8959 break;
8960 if (!TARGET_POWERPC64)
8961 extra = 4;
8962 else if (offset & 3)
8963 return false;
8964 break;
8965
8966 case E_TFmode:
8967 case E_IFmode:
8968 case E_KFmode:
8969 case E_TDmode:
8970 case E_TImode:
8971 case E_PTImode:
8972 extra = 8;
8973 if (!worst_case)
8974 break;
8975 if (!TARGET_POWERPC64)
8976 extra = 12;
8977 else if (offset & 3)
8978 return false;
8979 break;
8980
8981 default:
8982 break;
8983 }
8984
8985 if (TARGET_PREFIXED)
8986 return SIGNED_34BIT_OFFSET_EXTRA_P (offset, extra);
8987 else
8988 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8989 }
8990
8991 bool
8992 legitimate_indexed_address_p (rtx x, int strict)
8993 {
8994 rtx op0, op1;
8995
8996 if (GET_CODE (x) != PLUS)
8997 return false;
8998
8999 op0 = XEXP (x, 0);
9000 op1 = XEXP (x, 1);
9001
9002 return (REG_P (op0) && REG_P (op1)
9003 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
9004 && INT_REG_OK_FOR_INDEX_P (op1, strict))
9005 || (INT_REG_OK_FOR_BASE_P (op1, strict)
9006 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
9007 }
9008
9009 bool
9010 avoiding_indexed_address_p (machine_mode mode)
9011 {
9012 unsigned int msize = GET_MODE_SIZE (mode);
9013
9014 /* Avoid indexed addressing for modes that have non-indexed load/store
9015 instruction forms. On power10, vector pairs have an indexed
9016 form, but vector quads don't. */
9017 if (msize > 16)
9018 return msize != 32;
9019
9020 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
9021 }
9022
9023 bool
9024 legitimate_indirect_address_p (rtx x, int strict)
9025 {
9026 return REG_P (x) && INT_REG_OK_FOR_BASE_P (x, strict);
9027 }
9028
9029 bool
9030 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
9031 {
9032 if (!TARGET_MACHO || !flag_pic
9033 || mode != SImode || !MEM_P (x))
9034 return false;
9035 x = XEXP (x, 0);
9036
9037 if (GET_CODE (x) != LO_SUM)
9038 return false;
9039 if (!REG_P (XEXP (x, 0)))
9040 return false;
9041 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
9042 return false;
9043 x = XEXP (x, 1);
9044
9045 return CONSTANT_P (x);
9046 }
9047
9048 static bool
9049 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
9050 {
9051 if (GET_CODE (x) != LO_SUM)
9052 return false;
9053 if (!REG_P (XEXP (x, 0)))
9054 return false;
9055 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
9056 return false;
9057 /* quad word addresses are restricted, and we can't use LO_SUM. */
9058 if (mode_supports_dq_form (mode))
9059 return false;
9060 x = XEXP (x, 1);
9061
9062 if (TARGET_ELF)
9063 {
9064 bool large_toc_ok;
9065
9066 if (DEFAULT_ABI == ABI_V4 && flag_pic)
9067 return false;
9068 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
9069 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
9070 recognizes some LO_SUM addresses as valid although this
9071 function says opposite. In most cases, LRA through different
9072 transformations can generate correct code for address reloads.
9073 It cannot manage only some LO_SUM cases. So we need to add
9074 code here saying that some addresses are still valid. */
9075 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
9076 && small_toc_ref (x, VOIDmode));
9077 if (TARGET_TOC && ! large_toc_ok)
9078 return false;
9079 if (GET_MODE_NUNITS (mode) != 1)
9080 return false;
9081 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
9082 && !(/* ??? Assume floating point reg based on mode? */
9083 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
9084 return false;
9085
9086 return CONSTANT_P (x) || large_toc_ok;
9087 }
9088 else if (TARGET_MACHO)
9089 {
9090 if (GET_MODE_NUNITS (mode) != 1)
9091 return false;
9092 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
9093 && !(/* see above */
9094 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
9095 return false;
9096 #if TARGET_MACHO
9097 if (MACHO_DYNAMIC_NO_PIC_P || !flag_pic)
9098 return CONSTANT_P (x);
9099 #endif
9100 /* Macho-O PIC code from here. */
9101 if (GET_CODE (x) == CONST)
9102 x = XEXP (x, 0);
9103
9104 /* SYMBOL_REFs need to be wrapped in an UNSPEC_MACHOPIC_OFFSET. */
9105 if (SYMBOL_REF_P (x))
9106 return false;
9107
9108 /* So this is OK if the wrapped object is const. */
9109 if (GET_CODE (x) == UNSPEC
9110 && XINT (x, 1) == UNSPEC_MACHOPIC_OFFSET)
9111 return CONSTANT_P (XVECEXP (x, 0, 0));
9112 return CONSTANT_P (x);
9113 }
9114 return false;
9115 }
9116
9117
9118 /* Try machine-dependent ways of modifying an illegitimate address
9119 to be legitimate. If we find one, return the new, valid address.
9120 This is used from only one place: `memory_address' in explow.cc.
9121
9122 OLDX is the address as it was before break_out_memory_refs was
9123 called. In some cases it is useful to look at this to decide what
9124 needs to be done.
9125
9126 It is always safe for this function to do nothing. It exists to
9127 recognize opportunities to optimize the output.
9128
9129 On RS/6000, first check for the sum of a register with a constant
9130 integer that is out of range. If so, generate code to add the
9131 constant with the low-order 16 bits masked to the register and force
9132 this result into another register (this can be done with `cau').
9133 Then generate an address of REG+(CONST&0xffff), allowing for the
9134 possibility of bit 16 being a one.
9135
9136 Then check for the sum of a register and something not constant, try to
9137 load the other things into a register and return the sum. */
9138
9139 static rtx
9140 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
9141 machine_mode mode)
9142 {
9143 unsigned int extra;
9144
9145 if (!reg_offset_addressing_ok_p (mode)
9146 || mode_supports_dq_form (mode))
9147 {
9148 if (virtual_stack_registers_memory_p (x))
9149 return x;
9150
9151 /* In theory we should not be seeing addresses of the form reg+0,
9152 but just in case it is generated, optimize it away. */
9153 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
9154 return force_reg (Pmode, XEXP (x, 0));
9155
9156 /* For TImode with load/store quad, restrict addresses to just a single
9157 pointer, so it works with both GPRs and VSX registers. */
9158 /* Make sure both operands are registers. */
9159 else if (GET_CODE (x) == PLUS
9160 && (mode != TImode || !TARGET_VSX))
9161 return gen_rtx_PLUS (Pmode,
9162 force_reg (Pmode, XEXP (x, 0)),
9163 force_reg (Pmode, XEXP (x, 1)));
9164 else
9165 return force_reg (Pmode, x);
9166 }
9167 if (SYMBOL_REF_P (x) && !TARGET_MACHO)
9168 {
9169 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
9170 if (model != 0)
9171 return rs6000_legitimize_tls_address (x, model);
9172 }
9173
9174 extra = 0;
9175 switch (mode)
9176 {
9177 case E_TFmode:
9178 case E_TDmode:
9179 case E_TImode:
9180 case E_PTImode:
9181 case E_IFmode:
9182 case E_KFmode:
9183 /* As in legitimate_offset_address_p we do not assume
9184 worst-case. The mode here is just a hint as to the registers
9185 used. A TImode is usually in gprs, but may actually be in
9186 fprs. Leave worst-case scenario for reload to handle via
9187 insn constraints. PTImode is only GPRs. */
9188 extra = 8;
9189 break;
9190 default:
9191 break;
9192 }
9193
9194 if (GET_CODE (x) == PLUS
9195 && REG_P (XEXP (x, 0))
9196 && CONST_INT_P (XEXP (x, 1))
9197 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
9198 >= 0x10000 - extra))
9199 {
9200 HOST_WIDE_INT high_int, low_int;
9201 rtx sum;
9202 low_int = sext_hwi (INTVAL (XEXP (x, 1)), 16);
9203 if (low_int >= 0x8000 - extra)
9204 low_int = 0;
9205 high_int = INTVAL (XEXP (x, 1)) - low_int;
9206 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
9207 gen_int_mode (high_int, Pmode)), 0);
9208 return plus_constant (Pmode, sum, low_int);
9209 }
9210 else if (GET_CODE (x) == PLUS
9211 && REG_P (XEXP (x, 0))
9212 && !CONST_INT_P (XEXP (x, 1))
9213 && GET_MODE_NUNITS (mode) == 1
9214 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
9215 || (/* ??? Assume floating point reg based on mode? */
9216 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
9217 && !avoiding_indexed_address_p (mode))
9218 {
9219 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
9220 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
9221 }
9222 else if ((TARGET_ELF
9223 #if TARGET_MACHO
9224 || !MACHO_DYNAMIC_NO_PIC_P
9225 #endif
9226 )
9227 && TARGET_32BIT
9228 && TARGET_NO_TOC_OR_PCREL
9229 && !flag_pic
9230 && !CONST_INT_P (x)
9231 && !CONST_WIDE_INT_P (x)
9232 && !CONST_DOUBLE_P (x)
9233 && CONSTANT_P (x)
9234 && GET_MODE_NUNITS (mode) == 1
9235 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
9236 || (/* ??? Assume floating point reg based on mode? */
9237 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode))))
9238 {
9239 rtx reg = gen_reg_rtx (Pmode);
9240 if (TARGET_ELF)
9241 emit_insn (gen_elf_high (reg, x));
9242 else
9243 emit_insn (gen_macho_high (Pmode, reg, x));
9244 return gen_rtx_LO_SUM (Pmode, reg, x);
9245 }
9246 else if (TARGET_TOC
9247 && SYMBOL_REF_P (x)
9248 && constant_pool_expr_p (x)
9249 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
9250 return create_TOC_reference (x, NULL_RTX);
9251 else
9252 return x;
9253 }
9254
9255 /* Debug version of rs6000_legitimize_address. */
9256 static rtx
9257 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
9258 {
9259 rtx ret;
9260 rtx_insn *insns;
9261
9262 start_sequence ();
9263 ret = rs6000_legitimize_address (x, oldx, mode);
9264 insns = get_insns ();
9265 end_sequence ();
9266
9267 if (ret != x)
9268 {
9269 fprintf (stderr,
9270 "\nrs6000_legitimize_address: mode %s, old code %s, "
9271 "new code %s, modified\n",
9272 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
9273 GET_RTX_NAME (GET_CODE (ret)));
9274
9275 fprintf (stderr, "Original address:\n");
9276 debug_rtx (x);
9277
9278 fprintf (stderr, "oldx:\n");
9279 debug_rtx (oldx);
9280
9281 fprintf (stderr, "New address:\n");
9282 debug_rtx (ret);
9283
9284 if (insns)
9285 {
9286 fprintf (stderr, "Insns added:\n");
9287 debug_rtx_list (insns, 20);
9288 }
9289 }
9290 else
9291 {
9292 fprintf (stderr,
9293 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
9294 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
9295
9296 debug_rtx (x);
9297 }
9298
9299 if (insns)
9300 emit_insn (insns);
9301
9302 return ret;
9303 }
9304
9305 /* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL.
9306 We need to emit DTP-relative relocations. */
9307
9308 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
9309 static void
9310 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
9311 {
9312 switch (size)
9313 {
9314 case 4:
9315 fputs ("\t.long\t", file);
9316 break;
9317 case 8:
9318 fputs (DOUBLE_INT_ASM_OP, file);
9319 break;
9320 default:
9321 gcc_unreachable ();
9322 }
9323 output_addr_const (file, x);
9324 if (TARGET_ELF)
9325 fputs ("@dtprel+0x8000", file);
9326 }
9327
9328 /* Return true if X is a symbol that refers to real (rather than emulated)
9329 TLS. */
9330
9331 static bool
9332 rs6000_real_tls_symbol_ref_p (rtx x)
9333 {
9334 return (SYMBOL_REF_P (x)
9335 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
9336 }
9337
9338 /* In the name of slightly smaller debug output, and to cater to
9339 general assembler lossage, recognize various UNSPEC sequences
9340 and turn them back into a direct symbol reference. */
9341
9342 static rtx
9343 rs6000_delegitimize_address (rtx orig_x)
9344 {
9345 rtx x, y, offset;
9346
9347 /* UNSPEC_FUSION_GPR is created by the peephole2 for power8 fusion. It
9348 encodes loading up the high part of the address of a TOC reference along
9349 with a load of a GPR using the same base register used for the load. We
9350 return the original SYMBOL_REF.
9351
9352 (set (reg:INT1 <reg>
9353 (unspec:INT1 [<combined-address>] UNSPEC_FUSION_GPR)))
9354
9355 UNSPEC_PCREL_OPT_LD_ADDR is used by the power10 PCREL_OPT pass. These
9356 UNSPECs include the external SYMBOL_REF along with the value being loaded.
9357 We return the original SYMBOL_REF.
9358
9359 (parallel [(set (reg:DI <base-reg>)
9360 (unspec:DI [(symbol_ref <symbol>)
9361 (const_int <marker>)]
9362 UNSPEC_PCREL_OPT_LD_ADDR))
9363 (set (reg:DI <load-reg>)
9364 (unspec:DI [(const_int 0)]
9365 UNSPEC_PCREL_OPT_LD_DATA))])
9366
9367 UNSPEC_PCREL_OPT_LD_SAME_REG is an alternative that is used if the
9368 GPR being loaded is the same as the GPR used to hold the external address.
9369
9370 (set (reg:DI <base-reg>)
9371 (unspec:DI [(symbol_ref <symbol>)
9372 (const_int <marker>)]
9373 UNSPEC_PCREL_OPT_LD_SAME_REG))
9374
9375 UNSPEC_PCREL_OPT_ST_ADDR is used by the power10 PCREL_OPT pass. This
9376 UNSPEC include the external SYMBOL_REF along with the value being loaded.
9377 We return the original SYMBOL_REF.
9378
9379 (parallel [(set (reg:DI <base-reg>)
9380 (unspec:DI [(symbol_ref <symbol>)
9381 (const_int <marker>)]
9382 UNSPEC_PCREL_OPT_ST_ADDR))
9383 (use (reg <store-reg>))]) */
9384
9385 if (GET_CODE (orig_x) == UNSPEC)
9386 switch (XINT (orig_x, 1))
9387 {
9388 case UNSPEC_FUSION_GPR:
9389 case UNSPEC_PCREL_OPT_LD_ADDR:
9390 case UNSPEC_PCREL_OPT_LD_SAME_REG:
9391 case UNSPEC_PCREL_OPT_ST_ADDR:
9392 orig_x = XVECEXP (orig_x, 0, 0);
9393 break;
9394
9395 default:
9396 break;
9397 }
9398
9399 orig_x = delegitimize_mem_from_attrs (orig_x);
9400
9401 x = orig_x;
9402 if (MEM_P (x))
9403 x = XEXP (x, 0);
9404
9405 y = x;
9406 if (TARGET_CMODEL != CMODEL_SMALL && GET_CODE (y) == LO_SUM)
9407 y = XEXP (y, 1);
9408
9409 offset = NULL_RTX;
9410 if (GET_CODE (y) == PLUS
9411 && GET_MODE (y) == Pmode
9412 && CONST_INT_P (XEXP (y, 1)))
9413 {
9414 offset = XEXP (y, 1);
9415 y = XEXP (y, 0);
9416 }
9417
9418 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_TOCREL)
9419 {
9420 y = XVECEXP (y, 0, 0);
9421
9422 #ifdef HAVE_AS_TLS
9423 /* Do not associate thread-local symbols with the original
9424 constant pool symbol. */
9425 if (TARGET_XCOFF
9426 && SYMBOL_REF_P (y)
9427 && CONSTANT_POOL_ADDRESS_P (y)
9428 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
9429 return orig_x;
9430 #endif
9431
9432 if (offset != NULL_RTX)
9433 y = gen_rtx_PLUS (Pmode, y, offset);
9434 if (!MEM_P (orig_x))
9435 return y;
9436 else
9437 return replace_equiv_address_nv (orig_x, y);
9438 }
9439
9440 if (TARGET_MACHO
9441 && GET_CODE (orig_x) == LO_SUM
9442 && GET_CODE (XEXP (orig_x, 1)) == CONST)
9443 {
9444 y = XEXP (XEXP (orig_x, 1), 0);
9445 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
9446 return XVECEXP (y, 0, 0);
9447 }
9448
9449 return orig_x;
9450 }
9451
9452 /* Return true if X shouldn't be emitted into the debug info.
9453 The linker doesn't like .toc section references from
9454 .debug_* sections, so reject .toc section symbols. */
9455
9456 static bool
9457 rs6000_const_not_ok_for_debug_p (rtx x)
9458 {
9459 if (GET_CODE (x) == UNSPEC)
9460 return true;
9461 if (SYMBOL_REF_P (x)
9462 && CONSTANT_POOL_ADDRESS_P (x))
9463 {
9464 rtx c = get_pool_constant (x);
9465 machine_mode cmode = get_pool_mode (x);
9466 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
9467 return true;
9468 }
9469
9470 return false;
9471 }
9472
9473 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
9474
9475 static bool
9476 rs6000_legitimate_combined_insn (rtx_insn *insn)
9477 {
9478 int icode = INSN_CODE (insn);
9479
9480 /* Reject creating doloop insns. Combine should not be allowed
9481 to create these for a number of reasons:
9482 1) In a nested loop, if combine creates one of these in an
9483 outer loop and the register allocator happens to allocate ctr
9484 to the outer loop insn, then the inner loop can't use ctr.
9485 Inner loops ought to be more highly optimized.
9486 2) Combine often wants to create one of these from what was
9487 originally a three insn sequence, first combining the three
9488 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not
9489 allocated ctr, the splitter takes use back to the three insn
9490 sequence. It's better to stop combine at the two insn
9491 sequence.
9492 3) Faced with not being able to allocate ctr for ctrsi/crtdi
9493 insns, the register allocator sometimes uses floating point
9494 or vector registers for the pseudo. Since ctrsi/ctrdi is a
9495 jump insn and output reloads are not implemented for jumps,
9496 the ctrsi/ctrdi splitters need to handle all possible cases.
9497 That's a pain, and it gets to be seriously difficult when a
9498 splitter that runs after reload needs memory to transfer from
9499 a gpr to fpr. See PR70098 and PR71763 which are not fixed
9500 for the difficult case. It's better to not create problems
9501 in the first place. */
9502 if (icode != CODE_FOR_nothing
9503 && (icode == CODE_FOR_bdz_si
9504 || icode == CODE_FOR_bdz_di
9505 || icode == CODE_FOR_bdnz_si
9506 || icode == CODE_FOR_bdnz_di
9507 || icode == CODE_FOR_bdztf_si
9508 || icode == CODE_FOR_bdztf_di
9509 || icode == CODE_FOR_bdnztf_si
9510 || icode == CODE_FOR_bdnztf_di))
9511 return false;
9512
9513 return true;
9514 }
9515
9516 /* Construct the SYMBOL_REF for the tls_get_addr function. */
9517
9518 static GTY(()) rtx rs6000_tls_symbol;
9519 static rtx
9520 rs6000_tls_get_addr (void)
9521 {
9522 if (!rs6000_tls_symbol)
9523 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
9524
9525 return rs6000_tls_symbol;
9526 }
9527
9528 /* Construct the SYMBOL_REF for TLS GOT references. */
9529
9530 static GTY(()) rtx rs6000_got_symbol;
9531 rtx
9532 rs6000_got_sym (void)
9533 {
9534 if (!rs6000_got_symbol)
9535 {
9536 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
9537 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
9538 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
9539 }
9540
9541 return rs6000_got_symbol;
9542 }
9543
9544 /* AIX Thread-Local Address support. */
9545
9546 static rtx
9547 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
9548 {
9549 rtx sym, mem, tocref, tlsreg, tmpreg, dest;
9550 const char *name;
9551 char *tlsname;
9552
9553 /* Place addr into TOC constant pool. */
9554 sym = force_const_mem (GET_MODE (addr), addr);
9555
9556 /* Output the TOC entry and create the MEM referencing the value. */
9557 if (constant_pool_expr_p (XEXP (sym, 0))
9558 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
9559 {
9560 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
9561 mem = gen_const_mem (Pmode, tocref);
9562 set_mem_alias_set (mem, get_TOC_alias_set ());
9563 }
9564 else
9565 return sym;
9566
9567 /* Use global-dynamic for local-dynamic. */
9568 if (model == TLS_MODEL_GLOBAL_DYNAMIC
9569 || model == TLS_MODEL_LOCAL_DYNAMIC)
9570 {
9571 /* Create new TOC reference for @m symbol. */
9572 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
9573 tlsname = XALLOCAVEC (char, strlen (name) + 1);
9574 strcpy (tlsname, "*LCM");
9575 strcat (tlsname, name + 3);
9576 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
9577 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
9578 tocref = create_TOC_reference (modaddr, NULL_RTX);
9579 rtx modmem = gen_const_mem (Pmode, tocref);
9580 set_mem_alias_set (modmem, get_TOC_alias_set ());
9581
9582 rtx modreg = gen_reg_rtx (Pmode);
9583 emit_insn (gen_rtx_SET (modreg, modmem));
9584
9585 tmpreg = gen_reg_rtx (Pmode);
9586 emit_insn (gen_rtx_SET (tmpreg, mem));
9587
9588 dest = gen_reg_rtx (Pmode);
9589 if (TARGET_32BIT)
9590 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
9591 else
9592 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
9593 return dest;
9594 }
9595 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
9596 else if (TARGET_32BIT)
9597 {
9598 tlsreg = gen_reg_rtx (SImode);
9599 emit_insn (gen_tls_get_tpointer (tlsreg));
9600 }
9601 else
9602 {
9603 tlsreg = gen_rtx_REG (DImode, 13);
9604 xcoff_tls_exec_model_detected = true;
9605 }
9606
9607 /* Load the TOC value into temporary register. */
9608 tmpreg = gen_reg_rtx (Pmode);
9609 emit_insn (gen_rtx_SET (tmpreg, mem));
9610 set_unique_reg_note (get_last_insn (), REG_EQUAL,
9611 gen_rtx_MINUS (Pmode, addr, tlsreg));
9612
9613 /* Add TOC symbol value to TLS pointer. */
9614 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
9615
9616 return dest;
9617 }
9618
9619 /* Passes the tls arg value for global dynamic and local dynamic
9620 emit_library_call_value in rs6000_legitimize_tls_address to
9621 rs6000_call_aix and rs6000_call_sysv. This is used to emit the
9622 marker relocs put on __tls_get_addr calls. */
9623 static rtx global_tlsarg;
9624
9625 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
9626 this (thread-local) address. */
9627
9628 static rtx
9629 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
9630 {
9631 rtx dest, insn;
9632
9633 if (TARGET_XCOFF)
9634 return rs6000_legitimize_tls_address_aix (addr, model);
9635
9636 dest = gen_reg_rtx (Pmode);
9637 if (model == TLS_MODEL_LOCAL_EXEC
9638 && (rs6000_tls_size == 16 || rs6000_pcrel_p ()))
9639 {
9640 rtx tlsreg;
9641
9642 if (TARGET_64BIT)
9643 {
9644 tlsreg = gen_rtx_REG (Pmode, 13);
9645 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
9646 }
9647 else
9648 {
9649 tlsreg = gen_rtx_REG (Pmode, 2);
9650 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
9651 }
9652 emit_insn (insn);
9653 }
9654 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
9655 {
9656 rtx tlsreg, tmp;
9657
9658 tmp = gen_reg_rtx (Pmode);
9659 if (TARGET_64BIT)
9660 {
9661 tlsreg = gen_rtx_REG (Pmode, 13);
9662 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
9663 }
9664 else
9665 {
9666 tlsreg = gen_rtx_REG (Pmode, 2);
9667 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
9668 }
9669 emit_insn (insn);
9670 if (TARGET_64BIT)
9671 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
9672 else
9673 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
9674 emit_insn (insn);
9675 }
9676 else
9677 {
9678 rtx got, tga, tmp1, tmp2;
9679
9680 /* We currently use relocations like @got@tlsgd for tls, which
9681 means the linker will handle allocation of tls entries, placing
9682 them in the .got section. So use a pointer to the .got section,
9683 not one to secondary TOC sections used by 64-bit -mminimal-toc,
9684 or to secondary GOT sections used by 32-bit -fPIC. */
9685 if (rs6000_pcrel_p ())
9686 got = const0_rtx;
9687 else if (TARGET_64BIT)
9688 got = gen_rtx_REG (Pmode, 2);
9689 else
9690 {
9691 if (flag_pic == 1)
9692 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
9693 else
9694 {
9695 rtx gsym = rs6000_got_sym ();
9696 got = gen_reg_rtx (Pmode);
9697 if (flag_pic == 0)
9698 rs6000_emit_move (got, gsym, Pmode);
9699 else
9700 {
9701 rtx mem, lab;
9702
9703 tmp1 = gen_reg_rtx (Pmode);
9704 tmp2 = gen_reg_rtx (Pmode);
9705 mem = gen_const_mem (Pmode, tmp1);
9706 lab = gen_label_rtx ();
9707 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
9708 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
9709 if (TARGET_LINK_STACK)
9710 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
9711 emit_move_insn (tmp2, mem);
9712 rtx_insn *last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
9713 set_unique_reg_note (last, REG_EQUAL, gsym);
9714 }
9715 }
9716 }
9717
9718 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
9719 {
9720 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addr, got),
9721 UNSPEC_TLSGD);
9722 tga = rs6000_tls_get_addr ();
9723 rtx argreg = gen_rtx_REG (Pmode, 3);
9724 emit_insn (gen_rtx_SET (argreg, arg));
9725 global_tlsarg = arg;
9726 emit_library_call_value (tga, dest, LCT_CONST, Pmode, argreg, Pmode);
9727 global_tlsarg = NULL_RTX;
9728
9729 /* Make a note so that the result of this call can be CSEd. */
9730 rtvec vec = gen_rtvec (1, copy_rtx (arg));
9731 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
9732 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
9733 }
9734 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
9735 {
9736 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got), UNSPEC_TLSLD);
9737 tga = rs6000_tls_get_addr ();
9738 tmp1 = gen_reg_rtx (Pmode);
9739 rtx argreg = gen_rtx_REG (Pmode, 3);
9740 emit_insn (gen_rtx_SET (argreg, arg));
9741 global_tlsarg = arg;
9742 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode, argreg, Pmode);
9743 global_tlsarg = NULL_RTX;
9744
9745 /* Make a note so that the result of this call can be CSEd. */
9746 rtvec vec = gen_rtvec (1, copy_rtx (arg));
9747 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
9748 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
9749
9750 if (rs6000_tls_size == 16 || rs6000_pcrel_p ())
9751 {
9752 if (TARGET_64BIT)
9753 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
9754 else
9755 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
9756 }
9757 else if (rs6000_tls_size == 32)
9758 {
9759 tmp2 = gen_reg_rtx (Pmode);
9760 if (TARGET_64BIT)
9761 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
9762 else
9763 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
9764 emit_insn (insn);
9765 if (TARGET_64BIT)
9766 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
9767 else
9768 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
9769 }
9770 else
9771 {
9772 tmp2 = gen_reg_rtx (Pmode);
9773 if (TARGET_64BIT)
9774 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
9775 else
9776 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
9777 emit_insn (insn);
9778 insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
9779 }
9780 emit_insn (insn);
9781 }
9782 else
9783 {
9784 /* IE, or 64-bit offset LE. */
9785 tmp2 = gen_reg_rtx (Pmode);
9786 if (TARGET_64BIT)
9787 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
9788 else
9789 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
9790 emit_insn (insn);
9791 if (rs6000_pcrel_p ())
9792 {
9793 if (TARGET_64BIT)
9794 insn = gen_tls_tls_pcrel_64 (dest, tmp2, addr);
9795 else
9796 insn = gen_tls_tls_pcrel_32 (dest, tmp2, addr);
9797 }
9798 else if (TARGET_64BIT)
9799 insn = gen_tls_tls_64 (dest, tmp2, addr);
9800 else
9801 insn = gen_tls_tls_32 (dest, tmp2, addr);
9802 emit_insn (insn);
9803 }
9804 }
9805
9806 return dest;
9807 }
9808
9809 /* Only create the global variable for the stack protect guard if we are using
9810 the global flavor of that guard. */
9811 static tree
9812 rs6000_init_stack_protect_guard (void)
9813 {
9814 if (rs6000_stack_protector_guard == SSP_GLOBAL)
9815 return default_stack_protect_guard ();
9816
9817 return NULL_TREE;
9818 }
9819
9820 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9821
9822 static bool
9823 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9824 {
9825 /* If GET_CODE (x) is HIGH, the 'X' represets the high part of a symbol_ref.
9826 It can not be put into a constant pool. e.g.
9827 (high:DI (unspec:DI [(symbol_ref/u:DI ("*.LC0")..)
9828 (high:DI (symbol_ref:DI ("var")..)). */
9829 if (GET_CODE (x) == HIGH)
9830 return true;
9831
9832 /* A TLS symbol in the TOC cannot contain a sum. */
9833 if (GET_CODE (x) == CONST
9834 && GET_CODE (XEXP (x, 0)) == PLUS
9835 && SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
9836 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
9837 return true;
9838
9839 /* Allow AIX TOC TLS symbols in the constant pool,
9840 but not ELF TLS symbols. */
9841 return TARGET_ELF && tls_referenced_p (x);
9842 }
9843
9844 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
9845 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
9846 can be addressed relative to the toc pointer. */
9847
9848 static bool
9849 use_toc_relative_ref (rtx sym, machine_mode mode)
9850 {
9851 return ((constant_pool_expr_p (sym)
9852 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
9853 get_pool_mode (sym)))
9854 || (TARGET_CMODEL == CMODEL_MEDIUM
9855 && SYMBOL_REF_LOCAL_P (sym)
9856 && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
9857 }
9858
9859 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
9860 that is a valid memory address for an instruction.
9861 The MODE argument is the machine mode for the MEM expression
9862 that wants to use this address.
9863
9864 On the RS/6000, there are four valid address: a SYMBOL_REF that
9865 refers to a constant pool entry of an address (or the sum of it
9866 plus a constant), a short (16-bit signed) constant plus a register,
9867 the sum of two registers, or a register indirect, possibly with an
9868 auto-increment. For DFmode, DDmode and DImode with a constant plus
9869 register, we must ensure that both words are addressable or PowerPC64
9870 with offset word aligned.
9871
9872 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
9873 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
9874 because adjacent memory cells are accessed by adding word-sized offsets
9875 during assembly output. */
9876 static bool
9877 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict,
9878 code_helper ch = ERROR_MARK)
9879 {
9880 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
9881 bool quad_offset_p = mode_supports_dq_form (mode);
9882
9883 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
9884 return 0;
9885
9886 /* lxvl and stxvl doesn't support any addressing modes with PLUS. */
9887 if (ch.is_internal_fn ()
9888 && (ch == IFN_LEN_LOAD || ch == IFN_LEN_STORE)
9889 && GET_CODE (x) == PLUS)
9890 return 0;
9891
9892 /* Handle unaligned altivec lvx/stvx type addresses. */
9893 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
9894 && GET_CODE (x) == AND
9895 && CONST_INT_P (XEXP (x, 1))
9896 && INTVAL (XEXP (x, 1)) == -16)
9897 {
9898 x = XEXP (x, 0);
9899 return (legitimate_indirect_address_p (x, reg_ok_strict)
9900 || legitimate_indexed_address_p (x, reg_ok_strict)
9901 || virtual_stack_registers_memory_p (x));
9902 }
9903
9904 if (legitimate_indirect_address_p (x, reg_ok_strict))
9905 return 1;
9906 if (TARGET_UPDATE
9907 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
9908 && mode_supports_pre_incdec_p (mode)
9909 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
9910 return 1;
9911
9912 /* Handle prefixed addresses (PC-relative or 34-bit offset). */
9913 if (address_is_prefixed (x, mode, NON_PREFIXED_DEFAULT))
9914 return 1;
9915
9916 /* Handle restricted vector d-form offsets in ISA 3.0. */
9917 if (quad_offset_p)
9918 {
9919 if (quad_address_p (x, mode, reg_ok_strict))
9920 return 1;
9921 }
9922 else if (virtual_stack_registers_memory_p (x))
9923 return 1;
9924
9925 else if (reg_offset_p)
9926 {
9927 if (legitimate_small_data_p (mode, x))
9928 return 1;
9929 if (legitimate_constant_pool_address_p (x, mode,
9930 reg_ok_strict || lra_in_progress))
9931 return 1;
9932 }
9933
9934 /* For TImode, if we have TImode in VSX registers, only allow register
9935 indirect addresses. This will allow the values to go in either GPRs
9936 or VSX registers without reloading. The vector types would tend to
9937 go into VSX registers, so we allow REG+REG, while TImode seems
9938 somewhat split, in that some uses are GPR based, and some VSX based. */
9939 /* FIXME: We could loosen this by changing the following to
9940 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX)
9941 but currently we cannot allow REG+REG addressing for TImode. See
9942 PR72827 for complete details on how this ends up hoodwinking DSE. */
9943 if (mode == TImode && TARGET_VSX)
9944 return 0;
9945 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
9946 if (! reg_ok_strict
9947 && reg_offset_p
9948 && GET_CODE (x) == PLUS
9949 && REG_P (XEXP (x, 0))
9950 && (XEXP (x, 0) == virtual_stack_vars_rtx
9951 || XEXP (x, 0) == arg_pointer_rtx)
9952 && CONST_INT_P (XEXP (x, 1)))
9953 return 1;
9954 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
9955 return 1;
9956 if (!FLOAT128_2REG_P (mode)
9957 && (TARGET_HARD_FLOAT
9958 || TARGET_POWERPC64
9959 || (mode != DFmode && mode != DDmode))
9960 && (TARGET_POWERPC64 || mode != DImode)
9961 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
9962 && mode != PTImode
9963 && !avoiding_indexed_address_p (mode)
9964 && legitimate_indexed_address_p (x, reg_ok_strict))
9965 return 1;
9966 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
9967 && mode_supports_pre_modify_p (mode)
9968 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
9969 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
9970 reg_ok_strict, false)
9971 || (!avoiding_indexed_address_p (mode)
9972 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
9973 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
9974 {
9975 /* There is no prefixed version of the load/store with update. */
9976 rtx addr = XEXP (x, 1);
9977 return !address_is_prefixed (addr, mode, NON_PREFIXED_DEFAULT);
9978 }
9979 if (reg_offset_p && !quad_offset_p
9980 && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
9981 return 1;
9982 return 0;
9983 }
9984
9985 /* Debug version of rs6000_legitimate_address_p. */
9986 static bool
9987 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict,
9988 code_helper ch)
9989 {
9990 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict, ch);
9991 fprintf (stderr,
9992 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
9993 "strict = %d, reload = %s, code = %s\n",
9994 ret ? "true" : "false",
9995 GET_MODE_NAME (mode),
9996 reg_ok_strict,
9997 (reload_completed ? "after" : "before"),
9998 GET_RTX_NAME (GET_CODE (x)));
9999 debug_rtx (x);
10000
10001 return ret;
10002 }
10003
10004 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
10005
10006 static bool
10007 rs6000_mode_dependent_address_p (const_rtx addr,
10008 addr_space_t as ATTRIBUTE_UNUSED)
10009 {
10010 return rs6000_mode_dependent_address_ptr (addr);
10011 }
10012
10013 /* Go to LABEL if ADDR (a legitimate address expression)
10014 has an effect that depends on the machine mode it is used for.
10015
10016 On the RS/6000 this is true of all integral offsets (since AltiVec
10017 and VSX modes don't allow them) or is a pre-increment or decrement.
10018
10019 ??? Except that due to conceptual problems in offsettable_address_p
10020 we can't really report the problems of integral offsets. So leave
10021 this assuming that the adjustable offset must be valid for the
10022 sub-words of a TFmode operand, which is what we had before. */
10023
10024 static bool
10025 rs6000_mode_dependent_address (const_rtx addr)
10026 {
10027 switch (GET_CODE (addr))
10028 {
10029 case PLUS:
10030 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
10031 is considered a legitimate address before reload, so there
10032 are no offset restrictions in that case. Note that this
10033 condition is safe in strict mode because any address involving
10034 virtual_stack_vars_rtx or arg_pointer_rtx would already have
10035 been rejected as illegitimate. */
10036 if (XEXP (addr, 0) != virtual_stack_vars_rtx
10037 && XEXP (addr, 0) != arg_pointer_rtx
10038 && CONST_INT_P (XEXP (addr, 1)))
10039 {
10040 HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
10041 HOST_WIDE_INT extra = TARGET_POWERPC64 ? 8 : 12;
10042 if (TARGET_PREFIXED)
10043 return !SIGNED_34BIT_OFFSET_EXTRA_P (val, extra);
10044 else
10045 return !SIGNED_16BIT_OFFSET_EXTRA_P (val, extra);
10046 }
10047 break;
10048
10049 case LO_SUM:
10050 /* Anything in the constant pool is sufficiently aligned that
10051 all bytes have the same high part address. */
10052 return !legitimate_constant_pool_address_p (addr, QImode, false);
10053
10054 /* Auto-increment cases are now treated generically in recog.cc. */
10055 case PRE_MODIFY:
10056 return TARGET_UPDATE;
10057
10058 /* AND is only allowed in Altivec loads. */
10059 case AND:
10060 return true;
10061
10062 default:
10063 break;
10064 }
10065
10066 return false;
10067 }
10068
10069 /* Debug version of rs6000_mode_dependent_address. */
10070 static bool
10071 rs6000_debug_mode_dependent_address (const_rtx addr)
10072 {
10073 bool ret = rs6000_mode_dependent_address (addr);
10074
10075 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
10076 ret ? "true" : "false");
10077 debug_rtx (addr);
10078
10079 return ret;
10080 }
10081
10082 /* Implement FIND_BASE_TERM. */
10083
10084 rtx
10085 rs6000_find_base_term (rtx op)
10086 {
10087 rtx base;
10088
10089 base = op;
10090 if (GET_CODE (base) == CONST)
10091 base = XEXP (base, 0);
10092 if (GET_CODE (base) == PLUS)
10093 base = XEXP (base, 0);
10094 if (GET_CODE (base) == UNSPEC)
10095 switch (XINT (base, 1))
10096 {
10097 case UNSPEC_TOCREL:
10098 case UNSPEC_MACHOPIC_OFFSET:
10099 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
10100 for aliasing purposes. */
10101 return XVECEXP (base, 0, 0);
10102 }
10103
10104 return op;
10105 }
10106
10107 /* More elaborate version of recog's offsettable_memref_p predicate
10108 that works around the ??? note of rs6000_mode_dependent_address.
10109 In particular it accepts
10110
10111 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
10112
10113 in 32-bit mode, that the recog predicate rejects. */
10114
10115 static bool
10116 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode, bool strict)
10117 {
10118 bool worst_case;
10119
10120 if (!MEM_P (op))
10121 return false;
10122
10123 /* First mimic offsettable_memref_p. */
10124 if (offsettable_address_p (strict, GET_MODE (op), XEXP (op, 0)))
10125 return true;
10126
10127 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
10128 the latter predicate knows nothing about the mode of the memory
10129 reference and, therefore, assumes that it is the largest supported
10130 mode (TFmode). As a consequence, legitimate offsettable memory
10131 references are rejected. rs6000_legitimate_offset_address_p contains
10132 the correct logic for the PLUS case of rs6000_mode_dependent_address,
10133 at least with a little bit of help here given that we know the
10134 actual registers used. */
10135 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
10136 || GET_MODE_SIZE (reg_mode) == 4);
10137 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
10138 strict, worst_case);
10139 }
10140
10141 /* Determine the reassociation width to be used in reassociate_bb.
10142 This takes into account how many parallel operations we
10143 can actually do of a given type, and also the latency.
10144 P8:
10145 int add/sub 6/cycle
10146 mul 2/cycle
10147 vect add/sub/mul 2/cycle
10148 fp add/sub/mul 2/cycle
10149 dfp 1/cycle
10150 */
10151
10152 static int
10153 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
10154 machine_mode mode)
10155 {
10156 switch (rs6000_tune)
10157 {
10158 case PROCESSOR_POWER8:
10159 case PROCESSOR_POWER9:
10160 case PROCESSOR_POWER10:
10161 case PROCESSOR_POWER11:
10162 if (DECIMAL_FLOAT_MODE_P (mode))
10163 return 1;
10164 if (VECTOR_MODE_P (mode))
10165 return 4;
10166 if (INTEGRAL_MODE_P (mode))
10167 return 1;
10168 if (FLOAT_MODE_P (mode))
10169 return 4;
10170 break;
10171 default:
10172 break;
10173 }
10174 return 1;
10175 }
10176
10177 /* Change register usage conditional on target flags. */
10178 static void
10179 rs6000_conditional_register_usage (void)
10180 {
10181 int i;
10182
10183 if (TARGET_DEBUG_TARGET)
10184 fprintf (stderr, "rs6000_conditional_register_usage called\n");
10185
10186 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
10187 if (TARGET_64BIT)
10188 fixed_regs[13] = call_used_regs[13] = 1;
10189
10190 /* Conditionally disable FPRs. */
10191 if (TARGET_SOFT_FLOAT)
10192 for (i = 32; i < 64; i++)
10193 fixed_regs[i] = call_used_regs[i] = 1;
10194
10195 /* For non PC-relative code, GPR2 is unavailable for register allocation. */
10196 if (FIXED_R2 && !rs6000_pcrel_p ())
10197 fixed_regs[2] = 1;
10198
10199 /* The TOC register is not killed across calls in a way that is
10200 visible to the compiler. */
10201 if (fixed_regs[2] && (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2))
10202 call_used_regs[2] = 0;
10203
10204 if (DEFAULT_ABI == ABI_V4 && flag_pic == 2)
10205 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10206
10207 if (DEFAULT_ABI == ABI_V4 && flag_pic == 1)
10208 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10209 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10210
10211 if (DEFAULT_ABI == ABI_DARWIN && flag_pic)
10212 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10213 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10214
10215 if (TARGET_TOC && TARGET_MINIMAL_TOC)
10216 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10217
10218 if (!TARGET_ALTIVEC && !TARGET_VSX)
10219 {
10220 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
10221 fixed_regs[i] = call_used_regs[i] = 1;
10222 call_used_regs[VRSAVE_REGNO] = 1;
10223 }
10224
10225 if (TARGET_ALTIVEC || TARGET_VSX)
10226 global_regs[VSCR_REGNO] = 1;
10227
10228 if (TARGET_ALTIVEC_ABI)
10229 {
10230 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
10231 call_used_regs[i] = 1;
10232
10233 /* AIX reserves VR20:31 in non-extended ABI mode. */
10234 if (TARGET_XCOFF && !rs6000_aix_extabi)
10235 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
10236 fixed_regs[i] = call_used_regs[i] = 1;
10237 }
10238 }
10239
10240 \f
10241 /* Output insns to set DEST equal to the constant SOURCE as a series of
10242 lis, ori and shl instructions and return TRUE. */
10243
10244 bool
10245 rs6000_emit_set_const (rtx dest, rtx source)
10246 {
10247 machine_mode mode = GET_MODE (dest);
10248 rtx temp, set;
10249 rtx_insn *insn;
10250 HOST_WIDE_INT c;
10251
10252 gcc_checking_assert (CONST_INT_P (source));
10253 c = INTVAL (source);
10254 switch (mode)
10255 {
10256 case E_QImode:
10257 case E_HImode:
10258 emit_insn (gen_rtx_SET (dest, source));
10259 return true;
10260
10261 case E_SImode:
10262 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
10263
10264 emit_insn (gen_rtx_SET (temp, GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
10265 emit_insn (gen_rtx_SET (dest,
10266 gen_rtx_IOR (SImode, temp,
10267 GEN_INT (c & 0xffff))));
10268 break;
10269
10270 case E_DImode:
10271 if (!TARGET_POWERPC64)
10272 {
10273 rtx hi, lo;
10274
10275 hi = operand_subword_force (dest, WORDS_BIG_ENDIAN == 0, DImode);
10276 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0, DImode);
10277 emit_move_insn (hi, GEN_INT (c >> 32));
10278 c = sext_hwi (c, 32);
10279 emit_move_insn (lo, GEN_INT (c));
10280 }
10281 else
10282 rs6000_emit_set_long_const (dest, c);
10283 break;
10284
10285 default:
10286 gcc_unreachable ();
10287 }
10288
10289 insn = get_last_insn ();
10290 set = single_set (insn);
10291 if (! CONSTANT_P (SET_SRC (set)))
10292 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
10293
10294 return true;
10295 }
10296
10297 /* Check if C can be rotated to a negative value which 'lis' instruction is
10298 able to load: 1..1xx0..0. If so, set *ROT to the number by which C is
10299 rotated, and return true. Return false otherwise. */
10300
10301 static bool
10302 can_be_rotated_to_negative_lis (HOST_WIDE_INT c, int *rot)
10303 {
10304 /* case a. 1..1xxx0..01..1: up to 15 x's, at least 16 0's. */
10305 int leading_ones = clz_hwi (~c);
10306 int tailing_ones = ctz_hwi (~c);
10307 int middle_zeros = ctz_hwi (c >> tailing_ones);
10308 if (middle_zeros >= 16 && leading_ones + tailing_ones >= 33)
10309 {
10310 *rot = HOST_BITS_PER_WIDE_INT - tailing_ones;
10311 return true;
10312 }
10313
10314 /* case b. xx0..01..1xx: some of 15 x's (and some of 16 0's) are
10315 rotated over the highest bit. */
10316 int pos_one = clz_hwi ((c << 16) >> 16);
10317 middle_zeros = ctz_hwi (c >> (HOST_BITS_PER_WIDE_INT - pos_one));
10318 int middle_ones = clz_hwi (~(c << pos_one));
10319 if (middle_zeros >= 16 && middle_ones >= 33)
10320 {
10321 *rot = pos_one;
10322 return true;
10323 }
10324
10325 return false;
10326 }
10327
10328 /* Check if value C can be built by 2 instructions: one is 'li or lis',
10329 another is rotldi.
10330
10331 If so, *SHIFT is set to the shift operand of rotldi(rldicl), and *MASK
10332 is set to the mask operand of rotldi(rldicl), and return true.
10333 Return false otherwise. */
10334
10335 static bool
10336 can_be_built_by_li_lis_and_rotldi (HOST_WIDE_INT c, int *shift,
10337 HOST_WIDE_INT *mask)
10338 {
10339 /* If C or ~C contains at least 49 successive zeros, then C can be rotated
10340 to/from a positive or negative value that 'li' is able to load. */
10341 int n;
10342 if (can_be_rotated_to_lowbits (c, 15, &n)
10343 || can_be_rotated_to_lowbits (~c, 15, &n)
10344 || can_be_rotated_to_negative_lis (c, &n))
10345 {
10346 *mask = HOST_WIDE_INT_M1;
10347 *shift = HOST_BITS_PER_WIDE_INT - n;
10348 return true;
10349 }
10350
10351 return false;
10352 }
10353
10354 /* Check if value C can be built by 2 instructions: one is 'li or lis',
10355 another is rldicl.
10356
10357 If so, *SHIFT is set to the shift operand of rldicl, and *MASK is set to
10358 the mask operand of rldicl, and return true.
10359 Return false otherwise. */
10360
10361 static bool
10362 can_be_built_by_li_lis_and_rldicl (HOST_WIDE_INT c, int *shift,
10363 HOST_WIDE_INT *mask)
10364 {
10365 /* Leading zeros may be cleaned by rldicl with a mask. Change leading zeros
10366 to ones and then recheck it. */
10367 int lz = clz_hwi (c);
10368
10369 /* If lz == 0, the left shift is undefined. */
10370 if (!lz)
10371 return false;
10372
10373 HOST_WIDE_INT unmask_c
10374 = c | (HOST_WIDE_INT_M1U << (HOST_BITS_PER_WIDE_INT - lz));
10375 int n;
10376 if (can_be_rotated_to_lowbits (~unmask_c, 15, &n)
10377 || can_be_rotated_to_negative_lis (unmask_c, &n))
10378 {
10379 *mask = HOST_WIDE_INT_M1U >> lz;
10380 *shift = n == 0 ? 0 : HOST_BITS_PER_WIDE_INT - n;
10381 return true;
10382 }
10383
10384 return false;
10385 }
10386
10387 /* Check if value C can be built by 2 instructions: one is 'li or lis',
10388 another is rldicr.
10389
10390 If so, *SHIFT is set to the shift operand of rldicr, and *MASK is set to
10391 the mask operand of rldicr, and return true.
10392 Return false otherwise. */
10393
10394 static bool
10395 can_be_built_by_li_lis_and_rldicr (HOST_WIDE_INT c, int *shift,
10396 HOST_WIDE_INT *mask)
10397 {
10398 /* Tailing zeros may be cleaned by rldicr with a mask. Change tailing zeros
10399 to ones and then recheck it. */
10400 int tz = ctz_hwi (c);
10401
10402 /* If tz == HOST_BITS_PER_WIDE_INT, the left shift is undefined. */
10403 if (tz >= HOST_BITS_PER_WIDE_INT)
10404 return false;
10405
10406 HOST_WIDE_INT unmask_c = c | ((HOST_WIDE_INT_1U << tz) - 1);
10407 int n;
10408 if (can_be_rotated_to_lowbits (~unmask_c, 15, &n)
10409 || can_be_rotated_to_negative_lis (unmask_c, &n))
10410 {
10411 *mask = HOST_WIDE_INT_M1U << tz;
10412 *shift = HOST_BITS_PER_WIDE_INT - n;
10413 return true;
10414 }
10415
10416 return false;
10417 }
10418
10419 /* Check if value C can be built by 2 instructions: one is 'li', another is
10420 rldic.
10421
10422 If so, *SHIFT is set to the 'shift' operand of rldic; and *MASK is set
10423 to the mask value about the 'mb' operand of rldic; and return true.
10424 Return false otherwise. */
10425
10426 static bool
10427 can_be_built_by_li_and_rldic (HOST_WIDE_INT c, int *shift, HOST_WIDE_INT *mask)
10428 {
10429 /* There are 49 successive ones in the negative value of 'li'. */
10430 int ones = 49;
10431
10432 /* 1..1xx1..1: negative value of li --> 0..01..1xx0..0:
10433 right bits are shifted as 0's, and left 1's(and x's) are cleaned. */
10434 int tz = ctz_hwi (c);
10435 int lz = clz_hwi (c);
10436
10437 /* If lz == HOST_BITS_PER_WIDE_INT, the left shift is undefined. */
10438 if (lz >= HOST_BITS_PER_WIDE_INT)
10439 return false;
10440
10441 int middle_ones = clz_hwi (~(c << lz));
10442 if (tz + lz + middle_ones >= ones
10443 && (tz - lz) < HOST_BITS_PER_WIDE_INT
10444 && tz < HOST_BITS_PER_WIDE_INT)
10445 {
10446 *mask = ((1LL << (HOST_BITS_PER_WIDE_INT - tz - lz)) - 1LL) << tz;
10447 *shift = tz;
10448 return true;
10449 }
10450
10451 /* 1..1xx1..1 --> 1..1xx0..01..1: some 1's(following x's) are cleaned. */
10452 int leading_ones = clz_hwi (~c);
10453 int tailing_ones = ctz_hwi (~c);
10454 int middle_zeros = ctz_hwi (c >> tailing_ones);
10455 if (leading_ones + tailing_ones + middle_zeros >= ones
10456 && middle_zeros < HOST_BITS_PER_WIDE_INT)
10457 {
10458 *mask = ~(((1ULL << middle_zeros) - 1ULL) << tailing_ones);
10459 *shift = tailing_ones + middle_zeros;
10460 return true;
10461 }
10462
10463 /* xx1..1xx: --> xx0..01..1xx: some 1's(following x's) are cleaned. */
10464 /* Get the position for the first bit of successive 1.
10465 The 24th bit would be in successive 0 or 1. */
10466 HOST_WIDE_INT low_mask = (HOST_WIDE_INT_1U << 24) - HOST_WIDE_INT_1U;
10467 int pos_first_1 = ((c & (low_mask + 1)) == 0)
10468 ? clz_hwi (c & low_mask)
10469 : HOST_BITS_PER_WIDE_INT - ctz_hwi (~(c | low_mask));
10470
10471 /* Make sure the left and right shifts are defined. */
10472 if (!IN_RANGE (pos_first_1, 1, HOST_BITS_PER_WIDE_INT-1))
10473 return false;
10474
10475 middle_ones = clz_hwi (~c << pos_first_1);
10476 middle_zeros = ctz_hwi (c >> (HOST_BITS_PER_WIDE_INT - pos_first_1));
10477 if (pos_first_1 < HOST_BITS_PER_WIDE_INT
10478 && middle_ones + middle_zeros < HOST_BITS_PER_WIDE_INT
10479 && middle_ones + middle_zeros >= ones)
10480 {
10481 *mask = ~(((1ULL << middle_zeros) - 1LL)
10482 << (HOST_BITS_PER_WIDE_INT - pos_first_1));
10483 *shift = HOST_BITS_PER_WIDE_INT - pos_first_1 + middle_zeros;
10484 return true;
10485 }
10486
10487 return false;
10488 }
10489
10490 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
10491 Output insns to set DEST equal to the constant C as a series of
10492 lis, ori and shl instructions. If NUM_INSNS is not NULL, then
10493 only increase *NUM_INSNS as the number of insns, and do not emit
10494 any insns. */
10495
10496 static void
10497 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c, int *num_insns)
10498 {
10499 HOST_WIDE_INT ud1, ud2, ud3, ud4;
10500
10501 ud1 = c & 0xffff;
10502 ud2 = (c >> 16) & 0xffff;
10503 ud3 = (c >> 32) & 0xffff;
10504 ud4 = (c >> 48) & 0xffff;
10505
10506 /* This lambda is used to emit one insn or just increase the insn count.
10507 When counting the insn number, no need to emit the insn. */
10508 auto count_or_emit_insn = [&num_insns] (rtx dest_or_insn, rtx src = nullptr) {
10509 if (num_insns)
10510 {
10511 (*num_insns)++;
10512 return;
10513 }
10514
10515 if (src)
10516 emit_move_insn (dest_or_insn, src);
10517 else
10518 emit_insn (dest_or_insn);
10519 };
10520
10521 if (TARGET_PREFIXED && SIGNED_INTEGER_34BIT_P (c))
10522 {
10523 /* li/lis/pli */
10524 count_or_emit_insn (dest, GEN_INT (c));
10525 return;
10526 }
10527
10528 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
10529 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && !(ud1 & 0x8000)))
10530 {
10531 /* li */
10532 count_or_emit_insn (dest, GEN_INT (sext_hwi (ud1, 16)));
10533 return;
10534 }
10535
10536 rtx temp
10537 = (num_insns || !can_create_pseudo_p ()) ? dest : gen_reg_rtx (DImode);
10538
10539 if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
10540 || (ud4 == 0 && ud3 == 0 && !(ud2 & 0x8000)))
10541 {
10542 /* lis[; ori] */
10543 count_or_emit_insn (ud1 != 0 ? temp : dest,
10544 GEN_INT (sext_hwi (ud2 << 16, 32)));
10545 if (ud1 != 0)
10546 count_or_emit_insn (dest, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
10547 return;
10548 }
10549
10550 if (ud4 == 0xffff && ud3 == 0xffff && !(ud2 & 0x8000) && ud1 == 0)
10551 {
10552 /* lis; xoris */
10553 count_or_emit_insn (temp, GEN_INT (sext_hwi ((ud2 | 0x8000) << 16, 32)));
10554 count_or_emit_insn (dest,
10555 gen_rtx_XOR (DImode, temp, GEN_INT (0x80000000)));
10556 return;
10557 }
10558
10559 if (ud4 == 0xffff && ud3 == 0xffff && (ud1 & 0x8000))
10560 {
10561 /* li; xoris */
10562 count_or_emit_insn (temp, GEN_INT (sext_hwi (ud1, 16)));
10563 count_or_emit_insn (dest, gen_rtx_XOR (DImode, temp,
10564 GEN_INT ((ud2 ^ 0xffff) << 16)));
10565 return;
10566 }
10567
10568 int shift;
10569 HOST_WIDE_INT mask;
10570 if (can_be_built_by_li_lis_and_rotldi (c, &shift, &mask)
10571 || can_be_built_by_li_lis_and_rldicl (c, &shift, &mask)
10572 || can_be_built_by_li_lis_and_rldicr (c, &shift, &mask)
10573 || can_be_built_by_li_and_rldic (c, &shift, &mask))
10574 {
10575 /* li/lis; rldicX */
10576 unsigned HOST_WIDE_INT imm = (c | ~mask);
10577 imm = (imm >> shift) | (imm << (HOST_BITS_PER_WIDE_INT - shift));
10578
10579 count_or_emit_insn (temp, GEN_INT (imm));
10580 if (shift != 0)
10581 temp = gen_rtx_ROTATE (DImode, temp, GEN_INT (shift));
10582 if (mask != HOST_WIDE_INT_M1)
10583 temp = gen_rtx_AND (DImode, temp, GEN_INT (mask));
10584 count_or_emit_insn (dest, temp);
10585
10586 return;
10587 }
10588
10589 if (ud3 == 0 && ud4 == 0)
10590 {
10591 gcc_assert ((ud2 & 0x8000) && ud1 != 0);
10592 if (!(ud1 & 0x8000))
10593 {
10594 /* li; oris */
10595 count_or_emit_insn (temp, GEN_INT (ud1));
10596 count_or_emit_insn (dest,
10597 gen_rtx_IOR (DImode, temp, GEN_INT (ud2 << 16)));
10598 return;
10599 }
10600
10601 /* lis; ori; rldicl */
10602 count_or_emit_insn (temp, GEN_INT (sext_hwi (ud2 << 16, 32)));
10603 count_or_emit_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
10604 count_or_emit_insn (dest,
10605 gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff)));
10606 return;
10607 }
10608
10609 if (ud1 == ud3 && ud2 == ud4)
10610 {
10611 /* load low 32bits first, e.g. "lis; ori", then "rldimi". */
10612 HOST_WIDE_INT num = (ud2 << 16) | ud1;
10613 rs6000_emit_set_long_const (temp, sext_hwi (num, 32), num_insns);
10614
10615 rtx rldimi = gen_rotldi3_insert_3 (dest, temp, GEN_INT (32), temp,
10616 GEN_INT (0xffffffff));
10617 count_or_emit_insn (rldimi);
10618 return;
10619 }
10620
10621 if ((ud4 == 0xffff && (ud3 & 0x8000)) || (ud4 == 0 && !(ud3 & 0x8000)))
10622 {
10623 /* li; [ori;] rldicl [;oir]. */
10624 count_or_emit_insn (temp, GEN_INT (sext_hwi (ud3 << 16, 32)));
10625 if (ud2 != 0)
10626 count_or_emit_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud2)));
10627 count_or_emit_insn (ud1 != 0 ? temp : dest,
10628 gen_rtx_ASHIFT (DImode, temp, GEN_INT (16)));
10629 if (ud1 != 0)
10630 count_or_emit_insn (dest, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
10631 return;
10632 }
10633
10634 if (TARGET_PREFIXED)
10635 {
10636 if (can_create_pseudo_p ())
10637 {
10638 /* pli A,L; pli B,H; rldimi A,B,32,0. */
10639 rtx temp1 = num_insns ? nullptr : gen_reg_rtx (DImode);
10640 count_or_emit_insn (temp, GEN_INT ((ud4 << 16) | ud3));
10641 count_or_emit_insn (temp1, GEN_INT ((ud2 << 16) | ud1));
10642 rtx rldimi = gen_rotldi3_insert_3 (dest, temp, GEN_INT (32), temp1,
10643 GEN_INT (0xffffffff));
10644 count_or_emit_insn (rldimi);
10645 return;
10646 }
10647
10648 /* pli A,H; sldi A,32; paddi A,A,L. */
10649 count_or_emit_insn (dest, GEN_INT ((ud4 << 16) | ud3));
10650 count_or_emit_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32)));
10651
10652 bool can_use_paddi = dest ? REGNO (dest) != FIRST_GPR_REGNO : false;
10653 /* Use paddi for the low 32 bits. */
10654 if (ud2 != 0 && ud1 != 0 && can_use_paddi)
10655 count_or_emit_insn (dest, gen_rtx_PLUS (DImode, dest,
10656 GEN_INT ((ud2 << 16) | ud1)));
10657 /* Use oris, ori for low 32 bits. */
10658 if (ud2 != 0 && (ud1 == 0 || !can_use_paddi))
10659 count_or_emit_insn (dest,
10660 gen_rtx_IOR (DImode, dest, GEN_INT (ud2 << 16)));
10661 if (ud1 != 0 && (ud2 == 0 || !can_use_paddi))
10662 count_or_emit_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1)));
10663 return;
10664 }
10665
10666 if (can_create_pseudo_p ())
10667 {
10668 /* lis HIGH,UD4 ; ori HIGH,UD3 ;
10669 lis LOW,UD2 ; ori LOW,UD1 ; rldimi LOW,HIGH,32,0. */
10670 rtx high = num_insns ? nullptr : gen_reg_rtx (DImode);
10671 rtx low = num_insns ? nullptr : gen_reg_rtx (DImode);
10672 HOST_WIDE_INT num = (ud2 << 16) | ud1;
10673 rs6000_emit_set_long_const (low, sext_hwi (num, 32), num_insns);
10674 num = (ud4 << 16) | ud3;
10675 rs6000_emit_set_long_const (high, sext_hwi (num, 32), num_insns);
10676
10677 rtx rldimi = gen_rotldi3_insert_3 (dest, high, GEN_INT (32), low,
10678 GEN_INT (0xffffffff));
10679 count_or_emit_insn (rldimi);
10680 return;
10681 }
10682
10683 /* lis DEST,UD4 ; ori DEST,UD3 ; rotl DEST,32 ;
10684 oris DEST,UD2 ; ori DEST,UD1. */
10685 count_or_emit_insn (dest, GEN_INT (sext_hwi (ud4 << 16, 32)));
10686 if (ud3 != 0)
10687 count_or_emit_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud3)));
10688
10689 count_or_emit_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32)));
10690 if (ud2 != 0)
10691 count_or_emit_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud2 << 16)));
10692 if (ud1 != 0)
10693 count_or_emit_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1)));
10694
10695 return;
10696 }
10697
10698 /* Helper for the following. Get rid of [r+r] memory refs
10699 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
10700
10701 static void
10702 rs6000_eliminate_indexed_memrefs (rtx operands[2])
10703 {
10704 if (MEM_P (operands[0])
10705 && !REG_P (XEXP (operands[0], 0))
10706 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
10707 GET_MODE (operands[0]), false))
10708 operands[0]
10709 = replace_equiv_address (operands[0],
10710 copy_addr_to_reg (XEXP (operands[0], 0)));
10711
10712 if (MEM_P (operands[1])
10713 && !REG_P (XEXP (operands[1], 0))
10714 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
10715 GET_MODE (operands[1]), false))
10716 operands[1]
10717 = replace_equiv_address (operands[1],
10718 copy_addr_to_reg (XEXP (operands[1], 0)));
10719 }
10720
10721 /* Generate a vector of constants to permute MODE for a little-endian
10722 storage operation by swapping the two halves of a vector. */
10723 static rtvec
10724 rs6000_const_vec (machine_mode mode)
10725 {
10726 int i, subparts;
10727 rtvec v;
10728
10729 switch (mode)
10730 {
10731 case E_V1TImode:
10732 subparts = 1;
10733 break;
10734 case E_V2DFmode:
10735 case E_V2DImode:
10736 subparts = 2;
10737 break;
10738 case E_V4SFmode:
10739 case E_V4SImode:
10740 subparts = 4;
10741 break;
10742 case E_V8HImode:
10743 subparts = 8;
10744 break;
10745 case E_V16QImode:
10746 subparts = 16;
10747 break;
10748 default:
10749 gcc_unreachable();
10750 }
10751
10752 v = rtvec_alloc (subparts);
10753
10754 for (i = 0; i < subparts / 2; ++i)
10755 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
10756 for (i = subparts / 2; i < subparts; ++i)
10757 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
10758
10759 return v;
10760 }
10761
10762 /* Emit an lxvd2x, stxvd2x, or xxpermdi instruction for a VSX load or
10763 store operation. */
10764 void
10765 rs6000_emit_le_vsx_permute (rtx dest, rtx source, machine_mode mode)
10766 {
10767 gcc_assert (!altivec_indexed_or_indirect_operand (dest, mode));
10768 gcc_assert (!altivec_indexed_or_indirect_operand (source, mode));
10769
10770 /* Scalar permutations are easier to express in integer modes rather than
10771 floating-point modes, so cast them here. We use V1TImode instead
10772 of TImode to ensure that the values don't go through GPRs. */
10773 if (FLOAT128_VECTOR_P (mode))
10774 {
10775 dest = gen_lowpart (V1TImode, dest);
10776 source = gen_lowpart (V1TImode, source);
10777 mode = V1TImode;
10778 }
10779
10780 /* Use ROTATE instead of VEC_SELECT if the mode contains only a single
10781 scalar. */
10782 if (mode == TImode || mode == V1TImode)
10783 emit_insn (gen_rtx_SET (dest, gen_rtx_ROTATE (mode, source,
10784 GEN_INT (64))));
10785 else
10786 {
10787 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
10788 emit_insn (gen_rtx_SET (dest, gen_rtx_VEC_SELECT (mode, source, par)));
10789 }
10790 }
10791
10792 /* Emit a little-endian load from vector memory location SOURCE to VSX
10793 register DEST in mode MODE. The load is done with two permuting
10794 insn's that represent an lxvd2x and xxpermdi. */
10795 void
10796 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
10797 {
10798 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
10799 V1TImode). */
10800 if (mode == TImode || mode == V1TImode)
10801 {
10802 mode = V2DImode;
10803 dest = gen_lowpart (V2DImode, dest);
10804 source = adjust_address (source, V2DImode, 0);
10805 }
10806
10807 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
10808 rs6000_emit_le_vsx_permute (tmp, source, mode);
10809 rs6000_emit_le_vsx_permute (dest, tmp, mode);
10810 }
10811
10812 /* Emit a little-endian store to vector memory location DEST from VSX
10813 register SOURCE in mode MODE. The store is done with two permuting
10814 insn's that represent an xxpermdi and an stxvd2x. */
10815 void
10816 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
10817 {
10818 /* This should never be called after LRA. */
10819 gcc_assert (can_create_pseudo_p ());
10820
10821 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
10822 V1TImode). */
10823 if (mode == TImode || mode == V1TImode)
10824 {
10825 mode = V2DImode;
10826 dest = adjust_address (dest, V2DImode, 0);
10827 source = gen_lowpart (V2DImode, source);
10828 }
10829
10830 rtx tmp = gen_reg_rtx_and_attrs (source);
10831 rs6000_emit_le_vsx_permute (tmp, source, mode);
10832 rs6000_emit_le_vsx_permute (dest, tmp, mode);
10833 }
10834
10835 /* Emit a sequence representing a little-endian VSX load or store,
10836 moving data from SOURCE to DEST in mode MODE. This is done
10837 separately from rs6000_emit_move to ensure it is called only
10838 during expand. LE VSX loads and stores introduced later are
10839 handled with a split. The expand-time RTL generation allows
10840 us to optimize away redundant pairs of register-permutes. */
10841 void
10842 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
10843 {
10844 gcc_assert (!BYTES_BIG_ENDIAN
10845 && VECTOR_MEM_VSX_P (mode)
10846 && !TARGET_P9_VECTOR
10847 && !gpr_or_gpr_p (dest, source)
10848 && (MEM_P (source) ^ MEM_P (dest)));
10849
10850 if (MEM_P (source))
10851 {
10852 gcc_assert (REG_P (dest) || SUBREG_P (dest));
10853 rs6000_emit_le_vsx_load (dest, source, mode);
10854 }
10855 else
10856 {
10857 if (!REG_P (source))
10858 source = force_reg (mode, source);
10859 rs6000_emit_le_vsx_store (dest, source, mode);
10860 }
10861 }
10862
10863 /* Return whether a SFmode or SImode move can be done without converting one
10864 mode to another. This arrises when we have:
10865
10866 (SUBREG:SF (REG:SI ...))
10867 (SUBREG:SI (REG:SF ...))
10868
10869 and one of the values is in a floating point/vector register, where SFmode
10870 scalars are stored in DFmode format. */
10871
10872 bool
10873 valid_sf_si_move (rtx dest, rtx src, machine_mode mode)
10874 {
10875 if (TARGET_ALLOW_SF_SUBREG)
10876 return true;
10877
10878 if (mode != SFmode && GET_MODE_CLASS (mode) != MODE_INT)
10879 return true;
10880
10881 if (!SUBREG_P (src) || !sf_subreg_operand (src, mode))
10882 return true;
10883
10884 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */
10885 if (SUBREG_P (dest))
10886 {
10887 rtx dest_subreg = SUBREG_REG (dest);
10888 rtx src_subreg = SUBREG_REG (src);
10889 return GET_MODE (dest_subreg) == GET_MODE (src_subreg);
10890 }
10891
10892 return false;
10893 }
10894
10895
10896 /* Helper function to change moves with:
10897
10898 (SUBREG:SF (REG:SI)) and
10899 (SUBREG:SI (REG:SF))
10900
10901 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode
10902 values are stored as DFmode values in the VSX registers. We need to convert
10903 the bits before we can use a direct move or operate on the bits in the
10904 vector register as an integer type.
10905
10906 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */
10907
10908 static bool
10909 rs6000_emit_move_si_sf_subreg (rtx dest, rtx source, machine_mode mode)
10910 {
10911 if (TARGET_DIRECT_MOVE_64BIT && !reload_completed
10912 && (!SUBREG_P (dest) || !sf_subreg_operand (dest, mode))
10913 && SUBREG_P (source) && sf_subreg_operand (source, mode))
10914 {
10915 rtx inner_source = SUBREG_REG (source);
10916 machine_mode inner_mode = GET_MODE (inner_source);
10917
10918 if (mode == SImode && inner_mode == SFmode)
10919 {
10920 emit_insn (gen_movsi_from_sf (dest, inner_source));
10921 return true;
10922 }
10923
10924 if (mode == SFmode && inner_mode == SImode)
10925 {
10926 emit_insn (gen_movsf_from_si (dest, inner_source));
10927 return true;
10928 }
10929 }
10930
10931 return false;
10932 }
10933
10934 /* Emit a move from SOURCE to DEST in mode MODE. */
10935 void
10936 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
10937 {
10938 rtx operands[2];
10939 operands[0] = dest;
10940 operands[1] = source;
10941
10942 if (TARGET_DEBUG_ADDR)
10943 {
10944 fprintf (stderr,
10945 "\nrs6000_emit_move: mode = %s, lra_in_progress = %d, "
10946 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
10947 GET_MODE_NAME (mode),
10948 lra_in_progress,
10949 reload_completed,
10950 can_create_pseudo_p ());
10951 debug_rtx (dest);
10952 fprintf (stderr, "source:\n");
10953 debug_rtx (source);
10954 }
10955
10956 /* Check that we get CONST_WIDE_INT only when we should. */
10957 if (CONST_WIDE_INT_P (operands[1])
10958 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
10959 gcc_unreachable ();
10960
10961 #ifdef HAVE_AS_GNU_ATTRIBUTE
10962 /* If we use a long double type, set the flags in .gnu_attribute that say
10963 what the long double type is. This is to allow the linker's warning
10964 message for the wrong long double to be useful, even if the function does
10965 not do a call (for example, doing a 128-bit add on power9 if the long
10966 double type is IEEE 128-bit. Do not set this if __ibm128 or __floa128 are
10967 used if they aren't the default long dobule type. */
10968 if (rs6000_gnu_attr && (HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT))
10969 {
10970 if (TARGET_LONG_DOUBLE_128 && (mode == TFmode || mode == TCmode))
10971 rs6000_passes_float = rs6000_passes_long_double = true;
10972
10973 else if (!TARGET_LONG_DOUBLE_128 && (mode == DFmode || mode == DCmode))
10974 rs6000_passes_float = rs6000_passes_long_double = true;
10975 }
10976 #endif
10977
10978 /* See if we need to special case SImode/SFmode SUBREG moves. */
10979 if ((mode == SImode || mode == SFmode) && SUBREG_P (source)
10980 && rs6000_emit_move_si_sf_subreg (dest, source, mode))
10981 return;
10982
10983 /* Check if GCC is setting up a block move that will end up using FP
10984 registers as temporaries. We must make sure this is acceptable. */
10985 if (MEM_P (operands[0])
10986 && MEM_P (operands[1])
10987 && mode == DImode
10988 && (rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[0]))
10989 || rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[1])))
10990 && ! (rs6000_slow_unaligned_access (SImode,
10991 (MEM_ALIGN (operands[0]) > 32
10992 ? 32 : MEM_ALIGN (operands[0])))
10993 || rs6000_slow_unaligned_access (SImode,
10994 (MEM_ALIGN (operands[1]) > 32
10995 ? 32 : MEM_ALIGN (operands[1]))))
10996 && ! MEM_VOLATILE_P (operands [0])
10997 && ! MEM_VOLATILE_P (operands [1]))
10998 {
10999 emit_move_insn (adjust_address (operands[0], SImode, 0),
11000 adjust_address (operands[1], SImode, 0));
11001 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
11002 adjust_address (copy_rtx (operands[1]), SImode, 4));
11003 return;
11004 }
11005
11006 if (can_create_pseudo_p () && MEM_P (operands[0])
11007 && !gpc_reg_operand (operands[1], mode))
11008 operands[1] = force_reg (mode, operands[1]);
11009
11010 /* Recognize the case where operand[1] is a reference to thread-local
11011 data and load its address to a register. */
11012 if (tls_referenced_p (operands[1]))
11013 {
11014 enum tls_model model;
11015 rtx tmp = operands[1];
11016 rtx addend = NULL;
11017
11018 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
11019 {
11020 addend = XEXP (XEXP (tmp, 0), 1);
11021 tmp = XEXP (XEXP (tmp, 0), 0);
11022 }
11023
11024 gcc_assert (SYMBOL_REF_P (tmp));
11025 model = SYMBOL_REF_TLS_MODEL (tmp);
11026 gcc_assert (model != 0);
11027
11028 tmp = rs6000_legitimize_tls_address (tmp, model);
11029 if (addend)
11030 {
11031 tmp = gen_rtx_PLUS (mode, tmp, addend);
11032 tmp = force_operand (tmp, operands[0]);
11033 }
11034 operands[1] = tmp;
11035 }
11036
11037 /* 128-bit constant floating-point values on Darwin should really be loaded
11038 as two parts. However, this premature splitting is a problem when DFmode
11039 values can go into Altivec registers. */
11040 if (TARGET_MACHO && CONST_DOUBLE_P (operands[1]) && FLOAT128_IBM_P (mode)
11041 && !reg_addr[DFmode].scalar_in_vmx_p)
11042 {
11043 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
11044 simplify_gen_subreg (DFmode, operands[1], mode, 0),
11045 DFmode);
11046 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
11047 GET_MODE_SIZE (DFmode)),
11048 simplify_gen_subreg (DFmode, operands[1], mode,
11049 GET_MODE_SIZE (DFmode)),
11050 DFmode);
11051 return;
11052 }
11053
11054 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
11055 p1:SD) if p1 is not of floating point class and p0 is spilled as
11056 we can have no analogous movsd_store for this. */
11057 if (lra_in_progress && mode == DDmode
11058 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
11059 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
11060 && SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1]))
11061 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
11062 {
11063 enum reg_class cl;
11064 int regno = REGNO (SUBREG_REG (operands[1]));
11065
11066 if (!HARD_REGISTER_NUM_P (regno))
11067 {
11068 cl = reg_preferred_class (regno);
11069 regno = reg_renumber[regno];
11070 if (regno < 0)
11071 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
11072 }
11073 if (regno >= 0 && ! FP_REGNO_P (regno))
11074 {
11075 mode = SDmode;
11076 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
11077 operands[1] = SUBREG_REG (operands[1]);
11078 }
11079 }
11080 if (lra_in_progress
11081 && mode == SDmode
11082 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
11083 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
11084 && (REG_P (operands[1])
11085 || (SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1])))))
11086 {
11087 int regno = reg_or_subregno (operands[1]);
11088 enum reg_class cl;
11089
11090 if (!HARD_REGISTER_NUM_P (regno))
11091 {
11092 cl = reg_preferred_class (regno);
11093 gcc_assert (cl != NO_REGS);
11094 regno = reg_renumber[regno];
11095 if (regno < 0)
11096 regno = ira_class_hard_regs[cl][0];
11097 }
11098 if (FP_REGNO_P (regno))
11099 {
11100 if (GET_MODE (operands[0]) != DDmode)
11101 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
11102 emit_insn (gen_movsd_store (operands[0], operands[1]));
11103 }
11104 else if (INT_REGNO_P (regno))
11105 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
11106 else
11107 gcc_unreachable();
11108 return;
11109 }
11110 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
11111 p:DD)) if p0 is not of floating point class and p1 is spilled as
11112 we can have no analogous movsd_load for this. */
11113 if (lra_in_progress && mode == DDmode
11114 && SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))
11115 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
11116 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
11117 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
11118 {
11119 enum reg_class cl;
11120 int regno = REGNO (SUBREG_REG (operands[0]));
11121
11122 if (!HARD_REGISTER_NUM_P (regno))
11123 {
11124 cl = reg_preferred_class (regno);
11125 regno = reg_renumber[regno];
11126 if (regno < 0)
11127 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
11128 }
11129 if (regno >= 0 && ! FP_REGNO_P (regno))
11130 {
11131 mode = SDmode;
11132 operands[0] = SUBREG_REG (operands[0]);
11133 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
11134 }
11135 }
11136 if (lra_in_progress
11137 && mode == SDmode
11138 && (REG_P (operands[0])
11139 || (SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))))
11140 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
11141 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
11142 {
11143 int regno = reg_or_subregno (operands[0]);
11144 enum reg_class cl;
11145
11146 if (!HARD_REGISTER_NUM_P (regno))
11147 {
11148 cl = reg_preferred_class (regno);
11149 gcc_assert (cl != NO_REGS);
11150 regno = reg_renumber[regno];
11151 if (regno < 0)
11152 regno = ira_class_hard_regs[cl][0];
11153 }
11154 if (FP_REGNO_P (regno))
11155 {
11156 if (GET_MODE (operands[1]) != DDmode)
11157 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
11158 emit_insn (gen_movsd_load (operands[0], operands[1]));
11159 }
11160 else if (INT_REGNO_P (regno))
11161 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
11162 else
11163 gcc_unreachable();
11164 return;
11165 }
11166
11167 /* FIXME: In the long term, this switch statement should go away
11168 and be replaced by a sequence of tests based on things like
11169 mode == Pmode. */
11170 switch (mode)
11171 {
11172 case E_HImode:
11173 case E_QImode:
11174 if (CONSTANT_P (operands[1])
11175 && !CONST_INT_P (operands[1]))
11176 operands[1] = force_const_mem (mode, operands[1]);
11177 break;
11178
11179 case E_TFmode:
11180 case E_TDmode:
11181 case E_IFmode:
11182 case E_KFmode:
11183 if (FLOAT128_2REG_P (mode))
11184 rs6000_eliminate_indexed_memrefs (operands);
11185 /* fall through */
11186
11187 case E_DFmode:
11188 case E_DDmode:
11189 case E_SFmode:
11190 case E_SDmode:
11191 if (CONSTANT_P (operands[1])
11192 && ! easy_fp_constant (operands[1], mode))
11193 operands[1] = force_const_mem (mode, operands[1]);
11194 break;
11195
11196 case E_V16QImode:
11197 case E_V8HImode:
11198 case E_V4SFmode:
11199 case E_V4SImode:
11200 case E_V2DFmode:
11201 case E_V2DImode:
11202 case E_V1TImode:
11203 if (CONSTANT_P (operands[1])
11204 && !easy_vector_constant (operands[1], mode))
11205 operands[1] = force_const_mem (mode, operands[1]);
11206 break;
11207
11208 case E_OOmode:
11209 case E_XOmode:
11210 if (CONST_INT_P (operands[1]) && INTVAL (operands[1]) != 0)
11211 error ("%qs is an opaque type, and you cannot set it to other values",
11212 (mode == OOmode) ? "__vector_pair" : "__vector_quad");
11213 break;
11214
11215 case E_SImode:
11216 case E_DImode:
11217 /* Use default pattern for address of ELF small data */
11218 if (TARGET_ELF
11219 && mode == Pmode
11220 && DEFAULT_ABI == ABI_V4
11221 && (SYMBOL_REF_P (operands[1])
11222 || GET_CODE (operands[1]) == CONST)
11223 && small_data_operand (operands[1], mode))
11224 {
11225 emit_insn (gen_rtx_SET (operands[0], operands[1]));
11226 return;
11227 }
11228
11229 /* Use the default pattern for loading up PC-relative addresses. */
11230 if (TARGET_PCREL && mode == Pmode
11231 && pcrel_local_or_external_address (operands[1], Pmode))
11232 {
11233 emit_insn (gen_rtx_SET (operands[0], operands[1]));
11234 return;
11235 }
11236
11237 if (DEFAULT_ABI == ABI_V4
11238 && mode == Pmode && mode == SImode
11239 && flag_pic == 1 && got_operand (operands[1], mode))
11240 {
11241 emit_insn (gen_movsi_got (operands[0], operands[1]));
11242 return;
11243 }
11244
11245 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
11246 && TARGET_NO_TOC_OR_PCREL
11247 && ! flag_pic
11248 && mode == Pmode
11249 && CONSTANT_P (operands[1])
11250 && GET_CODE (operands[1]) != HIGH
11251 && !CONST_INT_P (operands[1]))
11252 {
11253 rtx target = (!can_create_pseudo_p ()
11254 ? operands[0]
11255 : gen_reg_rtx (mode));
11256
11257 /* If this is a function address on -mcall-aixdesc,
11258 convert it to the address of the descriptor. */
11259 if (DEFAULT_ABI == ABI_AIX
11260 && SYMBOL_REF_P (operands[1])
11261 && XSTR (operands[1], 0)[0] == '.')
11262 {
11263 const char *name = XSTR (operands[1], 0);
11264 rtx new_ref;
11265 while (*name == '.')
11266 name++;
11267 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
11268 CONSTANT_POOL_ADDRESS_P (new_ref)
11269 = CONSTANT_POOL_ADDRESS_P (operands[1]);
11270 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
11271 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
11272 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
11273 operands[1] = new_ref;
11274 }
11275
11276 if (DEFAULT_ABI == ABI_DARWIN)
11277 {
11278 #if TARGET_MACHO
11279 /* This is not PIC code, but could require the subset of
11280 indirections used by mdynamic-no-pic. */
11281 if (MACHO_DYNAMIC_NO_PIC_P)
11282 {
11283 /* Take care of any required data indirection. */
11284 operands[1] = rs6000_machopic_legitimize_pic_address (
11285 operands[1], mode, operands[0]);
11286 if (operands[0] != operands[1])
11287 emit_insn (gen_rtx_SET (operands[0], operands[1]));
11288 return;
11289 }
11290 #endif
11291 emit_insn (gen_macho_high (Pmode, target, operands[1]));
11292 emit_insn (gen_macho_low (Pmode, operands[0],
11293 target, operands[1]));
11294 return;
11295 }
11296
11297 emit_insn (gen_elf_high (target, operands[1]));
11298 emit_insn (gen_elf_low (operands[0], target, operands[1]));
11299 return;
11300 }
11301
11302 /* If this is a SYMBOL_REF that refers to a constant pool entry,
11303 and we have put it in the TOC, we just need to make a TOC-relative
11304 reference to it. */
11305 if (TARGET_TOC
11306 && SYMBOL_REF_P (operands[1])
11307 && use_toc_relative_ref (operands[1], mode))
11308 operands[1] = create_TOC_reference (operands[1], operands[0]);
11309 else if (mode == Pmode
11310 && CONSTANT_P (operands[1])
11311 && GET_CODE (operands[1]) != HIGH
11312 && ((REG_P (operands[0])
11313 && FP_REGNO_P (REGNO (operands[0])))
11314 || !CONST_INT_P (operands[1])
11315 || (num_insns_constant (operands[1], mode)
11316 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
11317 && !toc_relative_expr_p (operands[1], false, NULL, NULL)
11318 && (TARGET_CMODEL == CMODEL_SMALL
11319 || can_create_pseudo_p ()
11320 || (REG_P (operands[0])
11321 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
11322 {
11323
11324 #if TARGET_MACHO
11325 /* Darwin uses a special PIC legitimizer. */
11326 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
11327 {
11328 operands[1] =
11329 rs6000_machopic_legitimize_pic_address (operands[1], mode,
11330 operands[0]);
11331 if (operands[0] != operands[1])
11332 emit_insn (gen_rtx_SET (operands[0], operands[1]));
11333 return;
11334 }
11335 #endif
11336
11337 /* If we are to limit the number of things we put in the TOC and
11338 this is a symbol plus a constant we can add in one insn,
11339 just put the symbol in the TOC and add the constant. */
11340 if (GET_CODE (operands[1]) == CONST
11341 && TARGET_NO_SUM_IN_TOC
11342 && GET_CODE (XEXP (operands[1], 0)) == PLUS
11343 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
11344 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
11345 || SYMBOL_REF_P (XEXP (XEXP (operands[1], 0), 0)))
11346 && ! side_effects_p (operands[0]))
11347 {
11348 rtx sym =
11349 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
11350 rtx other = XEXP (XEXP (operands[1], 0), 1);
11351
11352 sym = force_reg (mode, sym);
11353 emit_insn (gen_add3_insn (operands[0], sym, other));
11354 return;
11355 }
11356
11357 operands[1] = force_const_mem (mode, operands[1]);
11358
11359 if (TARGET_TOC
11360 && SYMBOL_REF_P (XEXP (operands[1], 0))
11361 && use_toc_relative_ref (XEXP (operands[1], 0), mode))
11362 {
11363 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
11364 operands[0]);
11365 operands[1] = gen_const_mem (mode, tocref);
11366 set_mem_alias_set (operands[1], get_TOC_alias_set ());
11367 }
11368 }
11369 break;
11370
11371 case E_TImode:
11372 if (!VECTOR_MEM_VSX_P (TImode))
11373 rs6000_eliminate_indexed_memrefs (operands);
11374 break;
11375
11376 case E_PTImode:
11377 rs6000_eliminate_indexed_memrefs (operands);
11378 break;
11379
11380 default:
11381 fatal_insn ("bad move", gen_rtx_SET (dest, source));
11382 }
11383
11384 /* Above, we may have called force_const_mem which may have returned
11385 an invalid address. If we can, fix this up; otherwise, reload will
11386 have to deal with it. */
11387 if (MEM_P (operands[1]))
11388 operands[1] = validize_mem (operands[1]);
11389
11390 emit_insn (gen_rtx_SET (operands[0], operands[1]));
11391 }
11392 \f
11393
11394 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
11395 static void
11396 init_float128_ibm (machine_mode mode)
11397 {
11398 if (!TARGET_XL_COMPAT)
11399 {
11400 set_optab_libfunc (add_optab, mode, "__gcc_qadd");
11401 set_optab_libfunc (sub_optab, mode, "__gcc_qsub");
11402 set_optab_libfunc (smul_optab, mode, "__gcc_qmul");
11403 set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv");
11404
11405 if (!TARGET_HARD_FLOAT)
11406 {
11407 set_optab_libfunc (neg_optab, mode, "__gcc_qneg");
11408 set_optab_libfunc (eq_optab, mode, "__gcc_qeq");
11409 set_optab_libfunc (ne_optab, mode, "__gcc_qne");
11410 set_optab_libfunc (gt_optab, mode, "__gcc_qgt");
11411 set_optab_libfunc (ge_optab, mode, "__gcc_qge");
11412 set_optab_libfunc (lt_optab, mode, "__gcc_qlt");
11413 set_optab_libfunc (le_optab, mode, "__gcc_qle");
11414 set_optab_libfunc (unord_optab, mode, "__gcc_qunord");
11415
11416 set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq");
11417 set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq");
11418 set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos");
11419 set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod");
11420 set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi");
11421 set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou");
11422 set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq");
11423 set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq");
11424 }
11425 }
11426 else
11427 {
11428 set_optab_libfunc (add_optab, mode, "_xlqadd");
11429 set_optab_libfunc (sub_optab, mode, "_xlqsub");
11430 set_optab_libfunc (smul_optab, mode, "_xlqmul");
11431 set_optab_libfunc (sdiv_optab, mode, "_xlqdiv");
11432 }
11433
11434 /* Add various conversions for IFmode to use the traditional TFmode
11435 names. */
11436 if (mode == IFmode)
11437 {
11438 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf");
11439 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf");
11440 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdtf");
11441 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd");
11442 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd");
11443 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtftd");
11444
11445 set_conv_libfunc (sfix_optab, DImode, mode, "__fixtfdi");
11446 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunstfdi");
11447
11448 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatditf");
11449 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatunditf");
11450
11451 if (TARGET_POWERPC64)
11452 {
11453 set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti");
11454 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti");
11455 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf");
11456 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf");
11457 }
11458 }
11459 }
11460
11461 /* Set up IEEE 128-bit floating point routines. Use different names if the
11462 arguments can be passed in a vector register. The historical PowerPC
11463 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
11464 continue to use that if we aren't using vector registers to pass IEEE
11465 128-bit floating point. */
11466
11467 static void
11468 init_float128_ieee (machine_mode mode)
11469 {
11470 if (FLOAT128_VECTOR_P (mode))
11471 {
11472 set_optab_libfunc (add_optab, mode, "__addkf3");
11473 set_optab_libfunc (sub_optab, mode, "__subkf3");
11474 set_optab_libfunc (neg_optab, mode, "__negkf2");
11475 set_optab_libfunc (smul_optab, mode, "__mulkf3");
11476 set_optab_libfunc (sdiv_optab, mode, "__divkf3");
11477 set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2");
11478 set_optab_libfunc (abs_optab, mode, "__abskf2");
11479 set_optab_libfunc (powi_optab, mode, "__powikf2");
11480
11481 set_optab_libfunc (eq_optab, mode, "__eqkf2");
11482 set_optab_libfunc (ne_optab, mode, "__nekf2");
11483 set_optab_libfunc (gt_optab, mode, "__gtkf2");
11484 set_optab_libfunc (ge_optab, mode, "__gekf2");
11485 set_optab_libfunc (lt_optab, mode, "__ltkf2");
11486 set_optab_libfunc (le_optab, mode, "__lekf2");
11487 set_optab_libfunc (unord_optab, mode, "__unordkf2");
11488
11489 set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2");
11490 set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2");
11491 set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2");
11492 set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2");
11493
11494 set_conv_libfunc (trunc_optab, mode, IFmode, "__trunctfkf2");
11495 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
11496 set_conv_libfunc (trunc_optab, mode, TFmode, "__trunctfkf2");
11497
11498 set_conv_libfunc (sext_optab, IFmode, mode, "__extendkftf2");
11499 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
11500 set_conv_libfunc (sext_optab, TFmode, mode, "__extendkftf2");
11501
11502 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf");
11503 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf");
11504 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdkf");
11505 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd");
11506 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd");
11507 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendkftd");
11508
11509 set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi");
11510 set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi");
11511 set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi");
11512 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi");
11513
11514 set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf");
11515 set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf");
11516 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf");
11517 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf");
11518
11519 if (TARGET_POWERPC64)
11520 {
11521 set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti_sw");
11522 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti_sw");
11523 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf_sw");
11524 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf_sw");
11525 }
11526 }
11527
11528 else
11529 {
11530 set_optab_libfunc (add_optab, mode, "_q_add");
11531 set_optab_libfunc (sub_optab, mode, "_q_sub");
11532 set_optab_libfunc (neg_optab, mode, "_q_neg");
11533 set_optab_libfunc (smul_optab, mode, "_q_mul");
11534 set_optab_libfunc (sdiv_optab, mode, "_q_div");
11535 if (TARGET_PPC_GPOPT)
11536 set_optab_libfunc (sqrt_optab, mode, "_q_sqrt");
11537
11538 set_optab_libfunc (eq_optab, mode, "_q_feq");
11539 set_optab_libfunc (ne_optab, mode, "_q_fne");
11540 set_optab_libfunc (gt_optab, mode, "_q_fgt");
11541 set_optab_libfunc (ge_optab, mode, "_q_fge");
11542 set_optab_libfunc (lt_optab, mode, "_q_flt");
11543 set_optab_libfunc (le_optab, mode, "_q_fle");
11544
11545 set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq");
11546 set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq");
11547 set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos");
11548 set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod");
11549 set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi");
11550 set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou");
11551 set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq");
11552 set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq");
11553 }
11554 }
11555
11556 static void
11557 rs6000_init_libfuncs (void)
11558 {
11559 /* __float128 support. */
11560 if (TARGET_FLOAT128_TYPE)
11561 {
11562 init_float128_ibm (IFmode);
11563 init_float128_ieee (KFmode);
11564 }
11565
11566 /* AIX/Darwin/64-bit Linux quad floating point routines. */
11567 if (TARGET_LONG_DOUBLE_128)
11568 {
11569 if (!TARGET_IEEEQUAD)
11570 init_float128_ibm (TFmode);
11571
11572 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
11573 else
11574 init_float128_ieee (TFmode);
11575 }
11576 }
11577
11578 /* Emit a potentially record-form instruction, setting DST from SRC.
11579 If DOT is 0, that is all; otherwise, set CCREG to the result of the
11580 signed comparison of DST with zero. If DOT is 1, the generated RTL
11581 doesn't care about the DST result; if DOT is 2, it does. If CCREG
11582 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
11583 a separate COMPARE. */
11584
11585 void
11586 rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
11587 {
11588 if (dot == 0)
11589 {
11590 emit_move_insn (dst, src);
11591 return;
11592 }
11593
11594 if (cc_reg_not_cr0_operand (ccreg, CCmode))
11595 {
11596 emit_move_insn (dst, src);
11597 emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx));
11598 return;
11599 }
11600
11601 rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx));
11602 if (dot == 1)
11603 {
11604 rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst);
11605 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber)));
11606 }
11607 else
11608 {
11609 rtx set = gen_rtx_SET (dst, src);
11610 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set)));
11611 }
11612 }
11613
11614 \f
11615 /* A validation routine: say whether CODE, a condition code, and MODE
11616 match. The other alternatives either don't make sense or should
11617 never be generated. */
11618
11619 void
11620 validate_condition_mode (enum rtx_code code, machine_mode mode)
11621 {
11622 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
11623 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
11624 && GET_MODE_CLASS (mode) == MODE_CC);
11625
11626 /* These don't make sense. */
11627 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
11628 || mode != CCUNSmode);
11629
11630 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
11631 || mode == CCUNSmode);
11632
11633 gcc_assert (mode == CCFPmode
11634 || (code != ORDERED && code != UNORDERED
11635 && code != UNEQ && code != LTGT
11636 && code != UNGT && code != UNLT
11637 && code != UNGE && code != UNLE));
11638
11639 /* These are invalid; the information is not there. */
11640 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
11641 }
11642
11643 \f
11644 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
11645 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
11646 not zero, store there the bit offset (counted from the right) where
11647 the single stretch of 1 bits begins; and similarly for B, the bit
11648 offset where it ends. */
11649
11650 bool
11651 rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
11652 {
11653 unsigned HOST_WIDE_INT val = INTVAL (mask);
11654 unsigned HOST_WIDE_INT bit;
11655 int nb, ne;
11656 int n = GET_MODE_PRECISION (mode);
11657
11658 if (mode != DImode && mode != SImode)
11659 return false;
11660
11661 if (INTVAL (mask) >= 0)
11662 {
11663 bit = val & -val;
11664 ne = exact_log2 (bit);
11665 nb = exact_log2 (val + bit);
11666 }
11667 else if (val + 1 == 0)
11668 {
11669 nb = n;
11670 ne = 0;
11671 }
11672 else if (val & 1)
11673 {
11674 val = ~val;
11675 bit = val & -val;
11676 nb = exact_log2 (bit);
11677 ne = exact_log2 (val + bit);
11678 }
11679 else
11680 {
11681 bit = val & -val;
11682 ne = exact_log2 (bit);
11683 if (val + bit == 0)
11684 nb = n;
11685 else
11686 nb = 0;
11687 }
11688
11689 nb--;
11690
11691 if (nb < 0 || ne < 0 || nb >= n || ne >= n)
11692 return false;
11693
11694 if (b)
11695 *b = nb;
11696 if (e)
11697 *e = ne;
11698
11699 return true;
11700 }
11701
11702 bool
11703 rs6000_is_valid_rotate_dot_mask (rtx mask, machine_mode mode)
11704 {
11705 int nb, ne;
11706 if (rs6000_is_valid_mask (mask, &nb, &ne, mode) && nb >= ne && ne > 0)
11707 {
11708 if (TARGET_64BIT)
11709 return true;
11710 /* *rotldi3_mask_dot requires for -m32 -mpowerpc64 that the mask is
11711 <= 0x7fffffff. */
11712 return (UINTVAL (mask) << (63 - nb)) <= 0x7fffffff;
11713 }
11714
11715 return false;
11716 }
11717
11718 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
11719 or rldicr instruction, to implement an AND with it in mode MODE. */
11720
11721 bool
11722 rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
11723 {
11724 int nb, ne;
11725
11726 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
11727 return false;
11728
11729 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
11730 does not wrap. */
11731 if (mode == DImode)
11732 return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
11733
11734 /* For SImode, rlwinm can do everything. */
11735 if (mode == SImode)
11736 return (nb < 32 && ne < 32);
11737
11738 return false;
11739 }
11740
11741 /* Return the instruction template for an AND with mask in mode MODE, with
11742 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11743
11744 const char *
11745 rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
11746 {
11747 int nb, ne;
11748
11749 if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
11750 gcc_unreachable ();
11751
11752 if (mode == DImode && ne == 0)
11753 {
11754 operands[3] = GEN_INT (63 - nb);
11755 if (dot)
11756 return "rldicl. %0,%1,0,%3";
11757 return "rldicl %0,%1,0,%3";
11758 }
11759
11760 if (mode == DImode && nb == 63)
11761 {
11762 operands[3] = GEN_INT (63 - ne);
11763 if (dot)
11764 return "rldicr. %0,%1,0,%3";
11765 return "rldicr %0,%1,0,%3";
11766 }
11767
11768 if (nb < 32 && ne < 32)
11769 {
11770 operands[3] = GEN_INT (31 - nb);
11771 operands[4] = GEN_INT (31 - ne);
11772 if (dot)
11773 return "rlwinm. %0,%1,0,%3,%4";
11774 return "rlwinm %0,%1,0,%3,%4";
11775 }
11776
11777 gcc_unreachable ();
11778 }
11779
11780 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
11781 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
11782 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
11783
11784 bool
11785 rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
11786 {
11787 int nb, ne;
11788
11789 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
11790 return false;
11791
11792 int n = GET_MODE_PRECISION (mode);
11793 int sh = -1;
11794
11795 if (CONST_INT_P (XEXP (shift, 1)))
11796 {
11797 sh = INTVAL (XEXP (shift, 1));
11798 if (sh < 0 || sh >= n)
11799 return false;
11800 }
11801
11802 rtx_code code = GET_CODE (shift);
11803
11804 /* Convert any shift by 0 to a rotate, to simplify below code. */
11805 if (sh == 0)
11806 code = ROTATE;
11807
11808 /* Convert rotate to simple shift if we can, to make analysis simpler. */
11809 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
11810 code = ASHIFT;
11811 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
11812 {
11813 code = LSHIFTRT;
11814 sh = n - sh;
11815 }
11816
11817 /* DImode rotates need rld*. */
11818 if (mode == DImode && code == ROTATE)
11819 return (nb == 63 || ne == 0 || ne == sh);
11820
11821 /* SImode rotates need rlw*. */
11822 if (mode == SImode && code == ROTATE)
11823 return (nb < 32 && ne < 32 && sh < 32);
11824
11825 /* Wrap-around masks are only okay for rotates. */
11826 if (ne > nb)
11827 return false;
11828
11829 /* Variable shifts are only okay for rotates. */
11830 if (sh < 0)
11831 return false;
11832
11833 /* Don't allow ASHIFT if the mask is wrong for that. */
11834 if (code == ASHIFT && ne < sh)
11835 return false;
11836
11837 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
11838 if the mask is wrong for that. */
11839 if (nb < 32 && ne < 32 && sh < 32
11840 && !(code == LSHIFTRT && nb >= 32 - sh))
11841 return true;
11842
11843 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
11844 if the mask is wrong for that. */
11845 if (code == LSHIFTRT)
11846 sh = 64 - sh;
11847 if (nb == 63 || ne == 0 || ne == sh)
11848 return !(code == LSHIFTRT && nb >= sh);
11849
11850 return false;
11851 }
11852
11853 /* Return the instruction template for a shift with mask in mode MODE, with
11854 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11855
11856 const char *
11857 rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
11858 {
11859 int nb, ne;
11860
11861 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
11862 gcc_unreachable ();
11863
11864 if (mode == DImode && ne == 0)
11865 {
11866 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
11867 operands[2] = GEN_INT (64 - INTVAL (operands[2]));
11868 operands[3] = GEN_INT (63 - nb);
11869 if (dot)
11870 return "rld%I2cl. %0,%1,%2,%3";
11871 return "rld%I2cl %0,%1,%2,%3";
11872 }
11873
11874 if (mode == DImode && nb == 63)
11875 {
11876 operands[3] = GEN_INT (63 - ne);
11877 if (dot)
11878 return "rld%I2cr. %0,%1,%2,%3";
11879 return "rld%I2cr %0,%1,%2,%3";
11880 }
11881
11882 if (mode == DImode
11883 && GET_CODE (operands[4]) != LSHIFTRT
11884 && CONST_INT_P (operands[2])
11885 && ne == INTVAL (operands[2]))
11886 {
11887 operands[3] = GEN_INT (63 - nb);
11888 if (dot)
11889 return "rld%I2c. %0,%1,%2,%3";
11890 return "rld%I2c %0,%1,%2,%3";
11891 }
11892
11893 if (nb < 32 && ne < 32)
11894 {
11895 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
11896 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
11897 operands[3] = GEN_INT (31 - nb);
11898 operands[4] = GEN_INT (31 - ne);
11899 /* This insn can also be a 64-bit rotate with mask that really makes
11900 it just a shift right (with mask); the %h below are to adjust for
11901 that situation (shift count is >= 32 in that case). */
11902 if (dot)
11903 return "rlw%I2nm. %0,%1,%h2,%3,%4";
11904 return "rlw%I2nm %0,%1,%h2,%3,%4";
11905 }
11906
11907 gcc_unreachable ();
11908 }
11909
11910 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
11911 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
11912 ASHIFT, or LSHIFTRT) in mode MODE. */
11913
11914 bool
11915 rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
11916 {
11917 int nb, ne;
11918
11919 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
11920 return false;
11921
11922 int n = GET_MODE_PRECISION (mode);
11923
11924 int sh = INTVAL (XEXP (shift, 1));
11925 if (sh < 0 || sh >= n)
11926 return false;
11927
11928 rtx_code code = GET_CODE (shift);
11929
11930 /* Convert any shift by 0 to a rotate, to simplify below code. */
11931 if (sh == 0)
11932 code = ROTATE;
11933
11934 /* Convert rotate to simple shift if we can, to make analysis simpler. */
11935 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
11936 code = ASHIFT;
11937 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
11938 {
11939 code = LSHIFTRT;
11940 sh = n - sh;
11941 }
11942
11943 /* DImode rotates need rldimi. */
11944 if (mode == DImode && code == ROTATE)
11945 return (ne == sh);
11946
11947 /* SImode rotates need rlwimi. */
11948 if (mode == SImode && code == ROTATE)
11949 return (nb < 32 && ne < 32 && sh < 32);
11950
11951 /* Wrap-around masks are only okay for rotates. */
11952 if (ne > nb)
11953 return false;
11954
11955 /* Don't allow ASHIFT if the mask is wrong for that. */
11956 if (code == ASHIFT && ne < sh)
11957 return false;
11958
11959 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
11960 if the mask is wrong for that. */
11961 if (nb < 32 && ne < 32 && sh < 32
11962 && !(code == LSHIFTRT && nb >= 32 - sh))
11963 return true;
11964
11965 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
11966 if the mask is wrong for that. */
11967 if (code == LSHIFTRT)
11968 sh = 64 - sh;
11969 if (ne == sh)
11970 return !(code == LSHIFTRT && nb >= sh);
11971
11972 return false;
11973 }
11974
11975 /* Return the instruction template for an insert with mask in mode MODE, with
11976 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11977
11978 const char *
11979 rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
11980 {
11981 int nb, ne;
11982
11983 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
11984 gcc_unreachable ();
11985
11986 /* Prefer rldimi because rlwimi is cracked. */
11987 if (TARGET_POWERPC64
11988 && (!dot || mode == DImode)
11989 && GET_CODE (operands[4]) != LSHIFTRT
11990 && ne == INTVAL (operands[2]))
11991 {
11992 operands[3] = GEN_INT (63 - nb);
11993 if (dot)
11994 return "rldimi. %0,%1,%2,%3";
11995 return "rldimi %0,%1,%2,%3";
11996 }
11997
11998 if (nb < 32 && ne < 32)
11999 {
12000 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
12001 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
12002 operands[3] = GEN_INT (31 - nb);
12003 operands[4] = GEN_INT (31 - ne);
12004 if (dot)
12005 return "rlwimi. %0,%1,%2,%3,%4";
12006 return "rlwimi %0,%1,%2,%3,%4";
12007 }
12008
12009 gcc_unreachable ();
12010 }
12011
12012 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
12013 using two machine instructions. */
12014
12015 bool
12016 rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
12017 {
12018 /* There are two kinds of AND we can handle with two insns:
12019 1) those we can do with two rl* insn;
12020 2) ori[s];xori[s].
12021
12022 We do not handle that last case yet. */
12023
12024 /* If there is just one stretch of ones, we can do it. */
12025 if (rs6000_is_valid_mask (c, NULL, NULL, mode))
12026 return true;
12027
12028 /* Otherwise, fill in the lowest "hole"; if we can do the result with
12029 one insn, we can do the whole thing with two. */
12030 unsigned HOST_WIDE_INT val = INTVAL (c);
12031 unsigned HOST_WIDE_INT bit1 = val & -val;
12032 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
12033 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
12034 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
12035 return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
12036 }
12037
12038 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
12039 If EXPAND is true, split rotate-and-mask instructions we generate to
12040 their constituent parts as well (this is used during expand); if DOT
12041 is 1, make the last insn a record-form instruction clobbering the
12042 destination GPR and setting the CC reg (from operands[3]); if 2, set
12043 that GPR as well as the CC reg. */
12044
12045 void
12046 rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot)
12047 {
12048 gcc_assert (!(expand && dot));
12049
12050 unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
12051
12052 /* If it is one stretch of ones, it is DImode; shift left, mask, then
12053 shift right. This generates better code than doing the masks without
12054 shifts, or shifting first right and then left. */
12055 int nb, ne;
12056 if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
12057 {
12058 gcc_assert (mode == DImode);
12059
12060 int shift = 63 - nb;
12061 if (expand)
12062 {
12063 rtx tmp1 = gen_reg_rtx (DImode);
12064 rtx tmp2 = gen_reg_rtx (DImode);
12065 emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
12066 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
12067 emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
12068 }
12069 else
12070 {
12071 rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
12072 tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
12073 emit_move_insn (operands[0], tmp);
12074 tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift));
12075 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
12076 }
12077 return;
12078 }
12079
12080 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
12081 that does the rest. */
12082 unsigned HOST_WIDE_INT bit1 = val & -val;
12083 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
12084 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
12085 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
12086
12087 unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
12088 unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
12089
12090 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
12091
12092 /* Two "no-rotate"-and-mask instructions, for SImode. */
12093 if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
12094 {
12095 gcc_assert (mode == SImode);
12096
12097 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
12098 rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
12099 emit_move_insn (reg, tmp);
12100 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
12101 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
12102 return;
12103 }
12104
12105 gcc_assert (mode == DImode);
12106
12107 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
12108 insns; we have to do the first in SImode, because it wraps. */
12109 if (mask2 <= 0xffffffff
12110 && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
12111 {
12112 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
12113 rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
12114 GEN_INT (mask1));
12115 rtx reg_low = gen_lowpart (SImode, reg);
12116 emit_move_insn (reg_low, tmp);
12117 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
12118 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
12119 return;
12120 }
12121
12122 /* Two rld* insns: rotate, clear the hole in the middle (which now is
12123 at the top end), rotate back and clear the other hole. */
12124 int right = exact_log2 (bit3);
12125 int left = 64 - right;
12126
12127 /* Rotate the mask too. */
12128 mask1 = (mask1 >> right) | ((bit2 - 1) << left);
12129
12130 if (expand)
12131 {
12132 rtx tmp1 = gen_reg_rtx (DImode);
12133 rtx tmp2 = gen_reg_rtx (DImode);
12134 rtx tmp3 = gen_reg_rtx (DImode);
12135 emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
12136 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
12137 emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
12138 emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
12139 }
12140 else
12141 {
12142 rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
12143 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
12144 emit_move_insn (operands[0], tmp);
12145 tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
12146 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
12147 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
12148 }
12149 }
12150 \f
12151 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
12152 for lfq and stfq insns iff the registers are hard registers. */
12153
12154 int
12155 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
12156 {
12157 /* We might have been passed a SUBREG. */
12158 if (!REG_P (reg1) || !REG_P (reg2))
12159 return 0;
12160
12161 /* We might have been passed non floating point registers. */
12162 if (!FP_REGNO_P (REGNO (reg1))
12163 || !FP_REGNO_P (REGNO (reg2)))
12164 return 0;
12165
12166 return (REGNO (reg1) == REGNO (reg2) - 1);
12167 }
12168
12169 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
12170 addr1 and addr2 must be in consecutive memory locations
12171 (addr2 == addr1 + 8). */
12172
12173 int
12174 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
12175 {
12176 rtx addr1, addr2;
12177 unsigned int reg1, reg2;
12178 int offset1, offset2;
12179
12180 /* The mems cannot be volatile. */
12181 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
12182 return 0;
12183
12184 addr1 = XEXP (mem1, 0);
12185 addr2 = XEXP (mem2, 0);
12186
12187 /* Extract an offset (if used) from the first addr. */
12188 if (GET_CODE (addr1) == PLUS)
12189 {
12190 /* If not a REG, return zero. */
12191 if (!REG_P (XEXP (addr1, 0)))
12192 return 0;
12193 else
12194 {
12195 reg1 = REGNO (XEXP (addr1, 0));
12196 /* The offset must be constant! */
12197 if (!CONST_INT_P (XEXP (addr1, 1)))
12198 return 0;
12199 offset1 = INTVAL (XEXP (addr1, 1));
12200 }
12201 }
12202 else if (!REG_P (addr1))
12203 return 0;
12204 else
12205 {
12206 reg1 = REGNO (addr1);
12207 /* This was a simple (mem (reg)) expression. Offset is 0. */
12208 offset1 = 0;
12209 }
12210
12211 /* And now for the second addr. */
12212 if (GET_CODE (addr2) == PLUS)
12213 {
12214 /* If not a REG, return zero. */
12215 if (!REG_P (XEXP (addr2, 0)))
12216 return 0;
12217 else
12218 {
12219 reg2 = REGNO (XEXP (addr2, 0));
12220 /* The offset must be constant. */
12221 if (!CONST_INT_P (XEXP (addr2, 1)))
12222 return 0;
12223 offset2 = INTVAL (XEXP (addr2, 1));
12224 }
12225 }
12226 else if (!REG_P (addr2))
12227 return 0;
12228 else
12229 {
12230 reg2 = REGNO (addr2);
12231 /* This was a simple (mem (reg)) expression. Offset is 0. */
12232 offset2 = 0;
12233 }
12234
12235 /* Both of these must have the same base register. */
12236 if (reg1 != reg2)
12237 return 0;
12238
12239 /* The offset for the second addr must be 8 more than the first addr. */
12240 if (offset2 != offset1 + 8)
12241 return 0;
12242
12243 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
12244 instructions. */
12245 return 1;
12246 }
12247 \f
12248 /* Implement TARGET_SECONDARY_RELOAD_NEEDED_MODE. For SDmode values we
12249 need to use DDmode, in all other cases we can use the same mode. */
12250 static machine_mode
12251 rs6000_secondary_memory_needed_mode (machine_mode mode)
12252 {
12253 if (lra_in_progress && mode == SDmode)
12254 return DDmode;
12255 return mode;
12256 }
12257
12258 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
12259 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
12260 only work on the traditional altivec registers, note if an altivec register
12261 was chosen. */
12262
12263 static enum rs6000_reg_type
12264 register_to_reg_type (rtx reg, bool *is_altivec)
12265 {
12266 HOST_WIDE_INT regno;
12267 enum reg_class rclass;
12268
12269 if (SUBREG_P (reg))
12270 reg = SUBREG_REG (reg);
12271
12272 if (!REG_P (reg))
12273 return NO_REG_TYPE;
12274
12275 regno = REGNO (reg);
12276 if (!HARD_REGISTER_NUM_P (regno))
12277 {
12278 if (!lra_in_progress && !reload_completed)
12279 return PSEUDO_REG_TYPE;
12280
12281 regno = true_regnum (reg);
12282 if (regno < 0 || !HARD_REGISTER_NUM_P (regno))
12283 return PSEUDO_REG_TYPE;
12284 }
12285
12286 gcc_assert (regno >= 0);
12287
12288 if (is_altivec && ALTIVEC_REGNO_P (regno))
12289 *is_altivec = true;
12290
12291 rclass = rs6000_regno_regclass[regno];
12292 return reg_class_to_reg_type[(int)rclass];
12293 }
12294
12295 /* Helper function to return the cost of adding a TOC entry address. */
12296
12297 static inline int
12298 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
12299 {
12300 int ret;
12301
12302 if (TARGET_CMODEL != CMODEL_SMALL)
12303 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
12304
12305 else
12306 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
12307
12308 return ret;
12309 }
12310
12311 /* Helper function for rs6000_secondary_reload to determine whether the memory
12312 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
12313 needs reloading. Return negative if the memory is not handled by the memory
12314 helper functions and to try a different reload method, 0 if no additional
12315 instructions are need, and positive to give the extra cost for the
12316 memory. */
12317
12318 static int
12319 rs6000_secondary_reload_memory (rtx addr,
12320 enum reg_class rclass,
12321 machine_mode mode)
12322 {
12323 int extra_cost = 0;
12324 rtx reg, and_arg, plus_arg0, plus_arg1;
12325 addr_mask_type addr_mask;
12326 const char *type = NULL;
12327 const char *fail_msg = NULL;
12328
12329 if (GPR_REG_CLASS_P (rclass))
12330 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
12331
12332 else if (rclass == FLOAT_REGS)
12333 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
12334
12335 else if (rclass == ALTIVEC_REGS)
12336 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
12337
12338 /* For the combined VSX_REGS, turn off Altivec AND -16. */
12339 else if (rclass == VSX_REGS)
12340 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
12341 & ~RELOAD_REG_AND_M16);
12342
12343 /* If the register allocator hasn't made up its mind yet on the register
12344 class to use, settle on defaults to use. */
12345 else if (rclass == NO_REGS)
12346 {
12347 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY]
12348 & ~RELOAD_REG_AND_M16);
12349
12350 if ((addr_mask & RELOAD_REG_MULTIPLE) != 0)
12351 addr_mask &= ~(RELOAD_REG_INDEXED
12352 | RELOAD_REG_PRE_INCDEC
12353 | RELOAD_REG_PRE_MODIFY);
12354 }
12355
12356 else
12357 addr_mask = 0;
12358
12359 /* If the register isn't valid in this register class, just return now. */
12360 if ((addr_mask & RELOAD_REG_VALID) == 0)
12361 {
12362 if (TARGET_DEBUG_ADDR)
12363 {
12364 fprintf (stderr,
12365 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
12366 "not valid in class\n",
12367 GET_MODE_NAME (mode), reg_class_names[rclass]);
12368 debug_rtx (addr);
12369 }
12370
12371 return -1;
12372 }
12373
12374 switch (GET_CODE (addr))
12375 {
12376 /* Does the register class supports auto update forms for this mode? We
12377 don't need a scratch register, since the powerpc only supports
12378 PRE_INC, PRE_DEC, and PRE_MODIFY. */
12379 case PRE_INC:
12380 case PRE_DEC:
12381 reg = XEXP (addr, 0);
12382 if (!base_reg_operand (addr, GET_MODE (reg)))
12383 {
12384 fail_msg = "no base register #1";
12385 extra_cost = -1;
12386 }
12387
12388 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
12389 {
12390 extra_cost = 1;
12391 type = "update";
12392 }
12393 break;
12394
12395 case PRE_MODIFY:
12396 reg = XEXP (addr, 0);
12397 plus_arg1 = XEXP (addr, 1);
12398 if (!base_reg_operand (reg, GET_MODE (reg))
12399 || GET_CODE (plus_arg1) != PLUS
12400 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
12401 {
12402 fail_msg = "bad PRE_MODIFY";
12403 extra_cost = -1;
12404 }
12405
12406 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
12407 {
12408 extra_cost = 1;
12409 type = "update";
12410 }
12411 break;
12412
12413 /* Do we need to simulate AND -16 to clear the bottom address bits used
12414 in VMX load/stores? Only allow the AND for vector sizes. */
12415 case AND:
12416 and_arg = XEXP (addr, 0);
12417 if (GET_MODE_SIZE (mode) != 16
12418 || !CONST_INT_P (XEXP (addr, 1))
12419 || INTVAL (XEXP (addr, 1)) != -16)
12420 {
12421 fail_msg = "bad Altivec AND #1";
12422 extra_cost = -1;
12423 }
12424
12425 if (rclass != ALTIVEC_REGS)
12426 {
12427 if (legitimate_indirect_address_p (and_arg, false))
12428 extra_cost = 1;
12429
12430 else if (legitimate_indexed_address_p (and_arg, false))
12431 extra_cost = 2;
12432
12433 else
12434 {
12435 fail_msg = "bad Altivec AND #2";
12436 extra_cost = -1;
12437 }
12438
12439 type = "and";
12440 }
12441 break;
12442
12443 /* If this is an indirect address, make sure it is a base register. */
12444 case REG:
12445 case SUBREG:
12446 if (!legitimate_indirect_address_p (addr, false))
12447 {
12448 extra_cost = 1;
12449 type = "move";
12450 }
12451 break;
12452
12453 /* If this is an indexed address, make sure the register class can handle
12454 indexed addresses for this mode. */
12455 case PLUS:
12456 plus_arg0 = XEXP (addr, 0);
12457 plus_arg1 = XEXP (addr, 1);
12458
12459 /* (plus (plus (reg) (constant)) (constant)) is generated during
12460 push_reload processing, so handle it now. */
12461 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
12462 {
12463 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12464 {
12465 extra_cost = 1;
12466 type = "offset";
12467 }
12468 }
12469
12470 /* (plus (plus (reg) (constant)) (reg)) is also generated during
12471 push_reload processing, so handle it now. */
12472 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
12473 {
12474 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
12475 {
12476 extra_cost = 1;
12477 type = "indexed #2";
12478 }
12479 }
12480
12481 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
12482 {
12483 fail_msg = "no base register #2";
12484 extra_cost = -1;
12485 }
12486
12487 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
12488 {
12489 if ((addr_mask & RELOAD_REG_INDEXED) == 0
12490 || !legitimate_indexed_address_p (addr, false))
12491 {
12492 extra_cost = 1;
12493 type = "indexed";
12494 }
12495 }
12496
12497 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0
12498 && CONST_INT_P (plus_arg1))
12499 {
12500 if (!quad_address_offset_p (INTVAL (plus_arg1)))
12501 {
12502 extra_cost = 1;
12503 type = "vector d-form offset";
12504 }
12505 }
12506
12507 /* Make sure the register class can handle offset addresses. */
12508 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
12509 {
12510 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12511 {
12512 extra_cost = 1;
12513 type = "offset #2";
12514 }
12515 }
12516
12517 else
12518 {
12519 fail_msg = "bad PLUS";
12520 extra_cost = -1;
12521 }
12522
12523 break;
12524
12525 case LO_SUM:
12526 /* Quad offsets are restricted and can't handle normal addresses. */
12527 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
12528 {
12529 extra_cost = -1;
12530 type = "vector d-form lo_sum";
12531 }
12532
12533 else if (!legitimate_lo_sum_address_p (mode, addr, false))
12534 {
12535 fail_msg = "bad LO_SUM";
12536 extra_cost = -1;
12537 }
12538
12539 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12540 {
12541 extra_cost = 1;
12542 type = "lo_sum";
12543 }
12544 break;
12545
12546 /* Static addresses need to create a TOC entry. */
12547 case CONST:
12548 case SYMBOL_REF:
12549 case LABEL_REF:
12550 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
12551 {
12552 extra_cost = -1;
12553 type = "vector d-form lo_sum #2";
12554 }
12555
12556 else
12557 {
12558 type = "address";
12559 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
12560 }
12561 break;
12562
12563 /* TOC references look like offsetable memory. */
12564 case UNSPEC:
12565 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
12566 {
12567 fail_msg = "bad UNSPEC";
12568 extra_cost = -1;
12569 }
12570
12571 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
12572 {
12573 extra_cost = -1;
12574 type = "vector d-form lo_sum #3";
12575 }
12576
12577 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12578 {
12579 extra_cost = 1;
12580 type = "toc reference";
12581 }
12582 break;
12583
12584 default:
12585 {
12586 fail_msg = "bad address";
12587 extra_cost = -1;
12588 }
12589 }
12590
12591 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
12592 {
12593 if (extra_cost < 0)
12594 fprintf (stderr,
12595 "rs6000_secondary_reload_memory error: mode = %s, "
12596 "class = %s, addr_mask = '%s', %s\n",
12597 GET_MODE_NAME (mode),
12598 reg_class_names[rclass],
12599 rs6000_debug_addr_mask (addr_mask, false),
12600 (fail_msg != NULL) ? fail_msg : "<bad address>");
12601
12602 else
12603 fprintf (stderr,
12604 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
12605 "addr_mask = '%s', extra cost = %d, %s\n",
12606 GET_MODE_NAME (mode),
12607 reg_class_names[rclass],
12608 rs6000_debug_addr_mask (addr_mask, false),
12609 extra_cost,
12610 (type) ? type : "<none>");
12611
12612 debug_rtx (addr);
12613 }
12614
12615 return extra_cost;
12616 }
12617
12618 /* Helper function for rs6000_secondary_reload to return true if a move to a
12619 different register classe is really a simple move. */
12620
12621 static bool
12622 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
12623 enum rs6000_reg_type from_type,
12624 machine_mode mode)
12625 {
12626 int size = GET_MODE_SIZE (mode);
12627
12628 /* Add support for various direct moves available. In this function, we only
12629 look at cases where we don't need any extra registers, and one or more
12630 simple move insns are issued. Originally small integers are not allowed
12631 in FPR/VSX registers. Single precision binary floating is not a simple
12632 move because we need to convert to the single precision memory layout.
12633 The 4-byte SDmode can be moved. TDmode values are disallowed since they
12634 need special direct move handling, which we do not support yet. */
12635 if (TARGET_DIRECT_MOVE
12636 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
12637 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
12638 {
12639 if (TARGET_POWERPC64)
12640 {
12641 /* ISA 2.07: MTVSRD or MVFVSRD. */
12642 if (size == 8)
12643 return true;
12644
12645 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
12646 if (size == 16 && TARGET_P9_VECTOR && mode != TDmode)
12647 return true;
12648 }
12649
12650 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
12651 if (TARGET_P8_VECTOR)
12652 {
12653 if (mode == SImode)
12654 return true;
12655
12656 if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode))
12657 return true;
12658 }
12659
12660 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
12661 if (mode == SDmode)
12662 return true;
12663 }
12664
12665 /* Move to/from SPR. */
12666 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
12667 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
12668 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
12669 return true;
12670
12671 return false;
12672 }
12673
12674 /* Direct move helper function for rs6000_secondary_reload, handle all of the
12675 special direct moves that involve allocating an extra register, return the
12676 insn code of the helper function if there is such a function or
12677 CODE_FOR_nothing if not. */
12678
12679 static bool
12680 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
12681 enum rs6000_reg_type from_type,
12682 machine_mode mode,
12683 secondary_reload_info *sri,
12684 bool altivec_p)
12685 {
12686 bool ret = false;
12687 enum insn_code icode = CODE_FOR_nothing;
12688 int cost = 0;
12689 int size = GET_MODE_SIZE (mode);
12690
12691 if (TARGET_POWERPC64 && size == 16)
12692 {
12693 /* Handle moving 128-bit values from GPRs to VSX point registers on
12694 ISA 2.07 (power8, power9) when running in 64-bit mode using
12695 XXPERMDI to glue the two 64-bit values back together. */
12696 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
12697 {
12698 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
12699 icode = reg_addr[mode].reload_vsx_gpr;
12700 }
12701
12702 /* Handle moving 128-bit values from VSX point registers to GPRs on
12703 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
12704 bottom 64-bit value. */
12705 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
12706 {
12707 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
12708 icode = reg_addr[mode].reload_gpr_vsx;
12709 }
12710 }
12711
12712 else if (TARGET_POWERPC64 && mode == SFmode)
12713 {
12714 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
12715 {
12716 cost = 3; /* xscvdpspn, mfvsrd, and. */
12717 icode = reg_addr[mode].reload_gpr_vsx;
12718 }
12719
12720 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
12721 {
12722 cost = 2; /* mtvsrz, xscvspdpn. */
12723 icode = reg_addr[mode].reload_vsx_gpr;
12724 }
12725 }
12726
12727 else if (!TARGET_POWERPC64 && size == 8)
12728 {
12729 /* Handle moving 64-bit values from GPRs to floating point registers on
12730 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
12731 32-bit values back together. Altivec register classes must be handled
12732 specially since a different instruction is used, and the secondary
12733 reload support requires a single instruction class in the scratch
12734 register constraint. However, right now TFmode is not allowed in
12735 Altivec registers, so the pattern will never match. */
12736 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
12737 {
12738 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
12739 icode = reg_addr[mode].reload_fpr_gpr;
12740 }
12741 }
12742
12743 if (icode != CODE_FOR_nothing)
12744 {
12745 ret = true;
12746 if (sri)
12747 {
12748 sri->icode = icode;
12749 sri->extra_cost = cost;
12750 }
12751 }
12752
12753 return ret;
12754 }
12755
12756 /* Return whether a move between two register classes can be done either
12757 directly (simple move) or via a pattern that uses a single extra temporary
12758 (using ISA 2.07's direct move in this case. */
12759
12760 static bool
12761 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
12762 enum rs6000_reg_type from_type,
12763 machine_mode mode,
12764 secondary_reload_info *sri,
12765 bool altivec_p)
12766 {
12767 /* Fall back to load/store reloads if either type is not a register. */
12768 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
12769 return false;
12770
12771 /* If we haven't allocated registers yet, assume the move can be done for the
12772 standard register types. */
12773 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
12774 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
12775 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
12776 return true;
12777
12778 /* Moves to the same set of registers is a simple move for non-specialized
12779 registers. */
12780 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
12781 return true;
12782
12783 /* Check whether a simple move can be done directly. */
12784 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
12785 {
12786 if (sri)
12787 {
12788 sri->icode = CODE_FOR_nothing;
12789 sri->extra_cost = 0;
12790 }
12791 return true;
12792 }
12793
12794 /* Now check if we can do it in a few steps. */
12795 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
12796 altivec_p);
12797 }
12798
12799 /* Inform reload about cases where moving X with a mode MODE to a register in
12800 RCLASS requires an extra scratch or immediate register. Return the class
12801 needed for the immediate register.
12802
12803 For VSX and Altivec, we may need a register to convert sp+offset into
12804 reg+sp.
12805
12806 For misaligned 64-bit gpr loads and stores we need a register to
12807 convert an offset address to indirect. */
12808
12809 static reg_class_t
12810 rs6000_secondary_reload (bool in_p,
12811 rtx x,
12812 reg_class_t rclass_i,
12813 machine_mode mode,
12814 secondary_reload_info *sri)
12815 {
12816 enum reg_class rclass = (enum reg_class) rclass_i;
12817 reg_class_t ret = ALL_REGS;
12818 enum insn_code icode;
12819 bool default_p = false;
12820 bool done_p = false;
12821
12822 /* Allow subreg of memory before/during reload. */
12823 bool memory_p = (MEM_P (x)
12824 || (!reload_completed && SUBREG_P (x)
12825 && MEM_P (SUBREG_REG (x))));
12826
12827 sri->icode = CODE_FOR_nothing;
12828 sri->t_icode = CODE_FOR_nothing;
12829 sri->extra_cost = 0;
12830 icode = ((in_p)
12831 ? reg_addr[mode].reload_load
12832 : reg_addr[mode].reload_store);
12833
12834 if (REG_P (x) || register_operand (x, mode))
12835 {
12836 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
12837 bool altivec_p = (rclass == ALTIVEC_REGS);
12838 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
12839
12840 if (!in_p)
12841 std::swap (to_type, from_type);
12842
12843 /* Can we do a direct move of some sort? */
12844 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
12845 altivec_p))
12846 {
12847 icode = (enum insn_code)sri->icode;
12848 default_p = false;
12849 done_p = true;
12850 ret = NO_REGS;
12851 }
12852 }
12853
12854 /* Make sure 0.0 is not reloaded or forced into memory. */
12855 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
12856 {
12857 ret = NO_REGS;
12858 default_p = false;
12859 done_p = true;
12860 }
12861
12862 /* If this is a scalar floating point value and we want to load it into the
12863 traditional Altivec registers, do it via a move via a traditional floating
12864 point register, unless we have D-form addressing. Also make sure that
12865 non-zero constants use a FPR. */
12866 if (!done_p && reg_addr[mode].scalar_in_vmx_p
12867 && !mode_supports_vmx_dform (mode)
12868 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
12869 && (memory_p || CONST_DOUBLE_P (x)))
12870 {
12871 ret = FLOAT_REGS;
12872 default_p = false;
12873 done_p = true;
12874 }
12875
12876 /* Handle reload of load/stores if we have reload helper functions. */
12877 if (!done_p && icode != CODE_FOR_nothing && memory_p)
12878 {
12879 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
12880 mode);
12881
12882 if (extra_cost >= 0)
12883 {
12884 done_p = true;
12885 ret = NO_REGS;
12886 if (extra_cost > 0)
12887 {
12888 sri->extra_cost = extra_cost;
12889 sri->icode = icode;
12890 }
12891 }
12892 }
12893
12894 /* Handle unaligned loads and stores of integer registers. */
12895 if (!done_p && TARGET_POWERPC64
12896 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
12897 && memory_p
12898 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
12899 {
12900 rtx addr = XEXP (x, 0);
12901 rtx off = address_offset (addr);
12902
12903 if (off != NULL_RTX)
12904 {
12905 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
12906 unsigned HOST_WIDE_INT offset = INTVAL (off);
12907
12908 /* We need a secondary reload when our legitimate_address_p
12909 says the address is good (as otherwise the entire address
12910 will be reloaded), and the offset is not a multiple of
12911 four or we have an address wrap. Address wrap will only
12912 occur for LO_SUMs since legitimate_offset_address_p
12913 rejects addresses for 16-byte mems that will wrap. */
12914 if (GET_CODE (addr) == LO_SUM
12915 ? (1 /* legitimate_address_p allows any offset for lo_sum */
12916 && ((offset & 3) != 0
12917 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
12918 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
12919 && (offset & 3) != 0))
12920 {
12921 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
12922 if (in_p)
12923 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
12924 : CODE_FOR_reload_di_load);
12925 else
12926 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
12927 : CODE_FOR_reload_di_store);
12928 sri->extra_cost = 2;
12929 ret = NO_REGS;
12930 done_p = true;
12931 }
12932 else
12933 default_p = true;
12934 }
12935 else
12936 default_p = true;
12937 }
12938
12939 if (!done_p && !TARGET_POWERPC64
12940 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
12941 && memory_p
12942 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
12943 {
12944 rtx addr = XEXP (x, 0);
12945 rtx off = address_offset (addr);
12946
12947 if (off != NULL_RTX)
12948 {
12949 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
12950 unsigned HOST_WIDE_INT offset = INTVAL (off);
12951
12952 /* We need a secondary reload when our legitimate_address_p
12953 says the address is good (as otherwise the entire address
12954 will be reloaded), and we have a wrap.
12955
12956 legitimate_lo_sum_address_p allows LO_SUM addresses to
12957 have any offset so test for wrap in the low 16 bits.
12958
12959 legitimate_offset_address_p checks for the range
12960 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
12961 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
12962 [0x7ff4,0x7fff] respectively, so test for the
12963 intersection of these ranges, [0x7ffc,0x7fff] and
12964 [0x7ff4,0x7ff7] respectively.
12965
12966 Note that the address we see here may have been
12967 manipulated by legitimize_reload_address. */
12968 if (GET_CODE (addr) == LO_SUM
12969 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
12970 : offset - (0x8000 - extra) < UNITS_PER_WORD)
12971 {
12972 if (in_p)
12973 sri->icode = CODE_FOR_reload_si_load;
12974 else
12975 sri->icode = CODE_FOR_reload_si_store;
12976 sri->extra_cost = 2;
12977 ret = NO_REGS;
12978 done_p = true;
12979 }
12980 else
12981 default_p = true;
12982 }
12983 else
12984 default_p = true;
12985 }
12986
12987 if (!done_p)
12988 default_p = true;
12989
12990 if (default_p)
12991 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
12992
12993 gcc_assert (ret != ALL_REGS);
12994
12995 if (TARGET_DEBUG_ADDR)
12996 {
12997 fprintf (stderr,
12998 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
12999 "mode = %s",
13000 reg_class_names[ret],
13001 in_p ? "true" : "false",
13002 reg_class_names[rclass],
13003 GET_MODE_NAME (mode));
13004
13005 if (reload_completed)
13006 fputs (", after reload", stderr);
13007
13008 if (!done_p)
13009 fputs (", done_p not set", stderr);
13010
13011 if (default_p)
13012 fputs (", default secondary reload", stderr);
13013
13014 if (sri->icode != CODE_FOR_nothing)
13015 fprintf (stderr, ", reload func = %s, extra cost = %d",
13016 insn_data[sri->icode].name, sri->extra_cost);
13017
13018 else if (sri->extra_cost > 0)
13019 fprintf (stderr, ", extra cost = %d", sri->extra_cost);
13020
13021 fputs ("\n", stderr);
13022 debug_rtx (x);
13023 }
13024
13025 return ret;
13026 }
13027
13028 /* Better tracing for rs6000_secondary_reload_inner. */
13029
13030 static void
13031 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
13032 bool store_p)
13033 {
13034 rtx set, clobber;
13035
13036 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
13037
13038 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
13039 store_p ? "store" : "load");
13040
13041 if (store_p)
13042 set = gen_rtx_SET (mem, reg);
13043 else
13044 set = gen_rtx_SET (reg, mem);
13045
13046 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
13047 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
13048 }
13049
13050 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
13051 ATTRIBUTE_NORETURN;
13052
13053 static void
13054 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
13055 bool store_p)
13056 {
13057 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
13058 gcc_unreachable ();
13059 }
13060
13061 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
13062 reload helper functions. These were identified in
13063 rs6000_secondary_reload_memory, and if reload decided to use the secondary
13064 reload, it calls the insns:
13065 reload_<RELOAD:mode>_<P:mptrsize>_store
13066 reload_<RELOAD:mode>_<P:mptrsize>_load
13067
13068 which in turn calls this function, to do whatever is necessary to create
13069 valid addresses. */
13070
13071 void
13072 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
13073 {
13074 int regno = true_regnum (reg);
13075 machine_mode mode = GET_MODE (reg);
13076 addr_mask_type addr_mask;
13077 rtx addr;
13078 rtx new_addr;
13079 rtx op_reg, op0, op1;
13080 rtx and_op;
13081 rtx cc_clobber;
13082 rtvec rv;
13083
13084 if (regno < 0 || !HARD_REGISTER_NUM_P (regno) || !MEM_P (mem)
13085 || !base_reg_operand (scratch, GET_MODE (scratch)))
13086 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
13087
13088 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
13089 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
13090
13091 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
13092 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
13093
13094 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
13095 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
13096
13097 else
13098 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
13099
13100 /* Make sure the mode is valid in this register class. */
13101 if ((addr_mask & RELOAD_REG_VALID) == 0)
13102 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
13103
13104 if (TARGET_DEBUG_ADDR)
13105 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
13106
13107 new_addr = addr = XEXP (mem, 0);
13108 switch (GET_CODE (addr))
13109 {
13110 /* Does the register class support auto update forms for this mode? If
13111 not, do the update now. We don't need a scratch register, since the
13112 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
13113 case PRE_INC:
13114 case PRE_DEC:
13115 op_reg = XEXP (addr, 0);
13116 if (!base_reg_operand (op_reg, Pmode))
13117 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
13118
13119 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
13120 {
13121 int delta = GET_MODE_SIZE (mode);
13122 if (GET_CODE (addr) == PRE_DEC)
13123 delta = -delta;
13124 emit_insn (gen_add2_insn (op_reg, GEN_INT (delta)));
13125 new_addr = op_reg;
13126 }
13127 break;
13128
13129 case PRE_MODIFY:
13130 op0 = XEXP (addr, 0);
13131 op1 = XEXP (addr, 1);
13132 if (!base_reg_operand (op0, Pmode)
13133 || GET_CODE (op1) != PLUS
13134 || !rtx_equal_p (op0, XEXP (op1, 0)))
13135 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
13136
13137 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
13138 {
13139 emit_insn (gen_rtx_SET (op0, op1));
13140 new_addr = reg;
13141 }
13142 break;
13143
13144 /* Do we need to simulate AND -16 to clear the bottom address bits used
13145 in VMX load/stores? */
13146 case AND:
13147 op0 = XEXP (addr, 0);
13148 op1 = XEXP (addr, 1);
13149 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
13150 {
13151 if (REG_P (op0) || SUBREG_P (op0))
13152 op_reg = op0;
13153
13154 else if (GET_CODE (op1) == PLUS)
13155 {
13156 emit_insn (gen_rtx_SET (scratch, op1));
13157 op_reg = scratch;
13158 }
13159
13160 else
13161 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
13162
13163 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
13164 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
13165 rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
13166 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
13167 new_addr = scratch;
13168 }
13169 break;
13170
13171 /* If this is an indirect address, make sure it is a base register. */
13172 case REG:
13173 case SUBREG:
13174 if (!base_reg_operand (addr, GET_MODE (addr)))
13175 {
13176 emit_insn (gen_rtx_SET (scratch, addr));
13177 new_addr = scratch;
13178 }
13179 break;
13180
13181 /* If this is an indexed address, make sure the register class can handle
13182 indexed addresses for this mode. */
13183 case PLUS:
13184 op0 = XEXP (addr, 0);
13185 op1 = XEXP (addr, 1);
13186 if (!base_reg_operand (op0, Pmode))
13187 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
13188
13189 else if (int_reg_operand (op1, Pmode))
13190 {
13191 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
13192 {
13193 emit_insn (gen_rtx_SET (scratch, addr));
13194 new_addr = scratch;
13195 }
13196 }
13197
13198 else if (mode_supports_dq_form (mode) && CONST_INT_P (op1))
13199 {
13200 if (((addr_mask & RELOAD_REG_QUAD_OFFSET) == 0)
13201 || !quad_address_p (addr, mode, false))
13202 {
13203 emit_insn (gen_rtx_SET (scratch, addr));
13204 new_addr = scratch;
13205 }
13206 }
13207
13208 /* Make sure the register class can handle offset addresses. */
13209 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
13210 {
13211 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
13212 {
13213 emit_insn (gen_rtx_SET (scratch, addr));
13214 new_addr = scratch;
13215 }
13216 }
13217
13218 else
13219 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
13220
13221 break;
13222
13223 case LO_SUM:
13224 op0 = XEXP (addr, 0);
13225 op1 = XEXP (addr, 1);
13226 if (!base_reg_operand (op0, Pmode))
13227 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
13228
13229 else if (int_reg_operand (op1, Pmode))
13230 {
13231 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
13232 {
13233 emit_insn (gen_rtx_SET (scratch, addr));
13234 new_addr = scratch;
13235 }
13236 }
13237
13238 /* Quad offsets are restricted and can't handle normal addresses. */
13239 else if (mode_supports_dq_form (mode))
13240 {
13241 emit_insn (gen_rtx_SET (scratch, addr));
13242 new_addr = scratch;
13243 }
13244
13245 /* Make sure the register class can handle offset addresses. */
13246 else if (legitimate_lo_sum_address_p (mode, addr, false))
13247 {
13248 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
13249 {
13250 emit_insn (gen_rtx_SET (scratch, addr));
13251 new_addr = scratch;
13252 }
13253 }
13254
13255 else
13256 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
13257
13258 break;
13259
13260 case SYMBOL_REF:
13261 case CONST:
13262 case LABEL_REF:
13263 rs6000_emit_move (scratch, addr, Pmode);
13264 new_addr = scratch;
13265 break;
13266
13267 default:
13268 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
13269 }
13270
13271 /* Adjust the address if it changed. */
13272 if (addr != new_addr)
13273 {
13274 mem = replace_equiv_address_nv (mem, new_addr);
13275 if (TARGET_DEBUG_ADDR)
13276 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
13277 }
13278
13279 /* Now create the move. */
13280 if (store_p)
13281 emit_insn (gen_rtx_SET (mem, reg));
13282 else
13283 emit_insn (gen_rtx_SET (reg, mem));
13284
13285 return;
13286 }
13287
13288 /* Convert reloads involving 64-bit gprs and misaligned offset
13289 addressing, or multiple 32-bit gprs and offsets that are too large,
13290 to use indirect addressing. */
13291
13292 void
13293 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
13294 {
13295 int regno = true_regnum (reg);
13296 enum reg_class rclass;
13297 rtx addr;
13298 rtx scratch_or_premodify = scratch;
13299
13300 if (TARGET_DEBUG_ADDR)
13301 {
13302 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
13303 store_p ? "store" : "load");
13304 fprintf (stderr, "reg:\n");
13305 debug_rtx (reg);
13306 fprintf (stderr, "mem:\n");
13307 debug_rtx (mem);
13308 fprintf (stderr, "scratch:\n");
13309 debug_rtx (scratch);
13310 }
13311
13312 gcc_assert (regno >= 0 && HARD_REGISTER_NUM_P (regno));
13313 gcc_assert (MEM_P (mem));
13314 rclass = REGNO_REG_CLASS (regno);
13315 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
13316 addr = XEXP (mem, 0);
13317
13318 if (GET_CODE (addr) == PRE_MODIFY)
13319 {
13320 gcc_assert (REG_P (XEXP (addr, 0))
13321 && GET_CODE (XEXP (addr, 1)) == PLUS
13322 && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0));
13323 scratch_or_premodify = XEXP (addr, 0);
13324 addr = XEXP (addr, 1);
13325 }
13326 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
13327
13328 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
13329
13330 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
13331
13332 /* Now create the move. */
13333 if (store_p)
13334 emit_insn (gen_rtx_SET (mem, reg));
13335 else
13336 emit_insn (gen_rtx_SET (reg, mem));
13337
13338 return;
13339 }
13340
13341 /* Given an rtx X being reloaded into a reg required to be
13342 in class CLASS, return the class of reg to actually use.
13343 In general this is just CLASS; but on some machines
13344 in some cases it is preferable to use a more restrictive class.
13345
13346 On the RS/6000, we have to return NO_REGS when we want to reload a
13347 floating-point CONST_DOUBLE to force it to be copied to memory.
13348
13349 We also don't want to reload integer values into floating-point
13350 registers if we can at all help it. In fact, this can
13351 cause reload to die, if it tries to generate a reload of CTR
13352 into a FP register and discovers it doesn't have the memory location
13353 required.
13354
13355 ??? Would it be a good idea to have reload do the converse, that is
13356 try to reload floating modes into FP registers if possible?
13357 */
13358
13359 static enum reg_class
13360 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
13361 {
13362 machine_mode mode = GET_MODE (x);
13363 bool is_constant = CONSTANT_P (x);
13364
13365 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
13366 reload class for it. */
13367 if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS)
13368 && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0)
13369 return NO_REGS;
13370
13371 if ((rclass == FLOAT_REGS || rclass == VSX_REGS)
13372 && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
13373 return NO_REGS;
13374
13375 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
13376 the reloading of address expressions using PLUS into floating point
13377 registers. */
13378 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
13379 {
13380 if (is_constant)
13381 {
13382 /* Zero is always allowed in all VSX registers. */
13383 if (x == CONST0_RTX (mode))
13384 return rclass;
13385
13386 /* If this is a vector constant that can be formed with a few Altivec
13387 instructions, we want altivec registers. */
13388 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
13389 return ALTIVEC_REGS;
13390
13391 /* If this is an integer constant that can easily be loaded into
13392 vector registers, allow it. */
13393 if (CONST_INT_P (x))
13394 {
13395 HOST_WIDE_INT value = INTVAL (x);
13396
13397 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
13398 2.06 can generate it in the Altivec registers with
13399 VSPLTI<x>. */
13400 if (value == -1)
13401 {
13402 if (TARGET_P8_VECTOR)
13403 return rclass;
13404 else if (rclass == ALTIVEC_REGS || rclass == VSX_REGS)
13405 return ALTIVEC_REGS;
13406 else
13407 return NO_REGS;
13408 }
13409
13410 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
13411 a sign extend in the Altivec registers. */
13412 if (IN_RANGE (value, -128, 127) && TARGET_P9_VECTOR
13413 && (rclass == ALTIVEC_REGS || rclass == VSX_REGS))
13414 return ALTIVEC_REGS;
13415 }
13416
13417 /* Force constant to memory. */
13418 return NO_REGS;
13419 }
13420
13421 /* D-form addressing can easily reload the value. */
13422 if (mode_supports_vmx_dform (mode)
13423 || mode_supports_dq_form (mode))
13424 return rclass;
13425
13426 /* If this is a scalar floating point value and we don't have D-form
13427 addressing, prefer the traditional floating point registers so that we
13428 can use D-form (register+offset) addressing. */
13429 if (rclass == VSX_REGS
13430 && (mode == SFmode || GET_MODE_SIZE (mode) == 8))
13431 return FLOAT_REGS;
13432
13433 /* Prefer the Altivec registers if Altivec is handling the vector
13434 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
13435 loads. */
13436 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
13437 || mode == V1TImode)
13438 return ALTIVEC_REGS;
13439
13440 return rclass;
13441 }
13442
13443 if (is_constant || GET_CODE (x) == PLUS)
13444 {
13445 if (reg_class_subset_p (GENERAL_REGS, rclass))
13446 return GENERAL_REGS;
13447 if (reg_class_subset_p (BASE_REGS, rclass))
13448 return BASE_REGS;
13449 return NO_REGS;
13450 }
13451
13452 /* For the vector pair and vector quad modes, prefer their natural register
13453 (VSX or FPR) rather than GPR registers. For other integer types, prefer
13454 the GPR registers. */
13455 if (rclass == GEN_OR_FLOAT_REGS)
13456 {
13457 if (mode == OOmode)
13458 return VSX_REGS;
13459
13460 if (mode == XOmode)
13461 return FLOAT_REGS;
13462
13463 if (GET_MODE_CLASS (mode) == MODE_INT)
13464 return GENERAL_REGS;
13465 }
13466
13467 return rclass;
13468 }
13469
13470 /* Debug version of rs6000_preferred_reload_class. */
13471 static enum reg_class
13472 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
13473 {
13474 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
13475
13476 fprintf (stderr,
13477 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
13478 "mode = %s, x:\n",
13479 reg_class_names[ret], reg_class_names[rclass],
13480 GET_MODE_NAME (GET_MODE (x)));
13481 debug_rtx (x);
13482
13483 return ret;
13484 }
13485
13486 /* If we are copying between FP or AltiVec registers and anything else, we need
13487 a memory location. The exception is when we are targeting ppc64 and the
13488 move to/from fpr to gpr instructions are available. Also, under VSX, you
13489 can copy vector registers from the FP register set to the Altivec register
13490 set and vice versa. */
13491
13492 static bool
13493 rs6000_secondary_memory_needed (machine_mode mode,
13494 reg_class_t from_class,
13495 reg_class_t to_class)
13496 {
13497 enum rs6000_reg_type from_type, to_type;
13498 bool altivec_p = ((from_class == ALTIVEC_REGS)
13499 || (to_class == ALTIVEC_REGS));
13500
13501 /* If a simple/direct move is available, we don't need secondary memory */
13502 from_type = reg_class_to_reg_type[(int)from_class];
13503 to_type = reg_class_to_reg_type[(int)to_class];
13504
13505 if (rs6000_secondary_reload_move (to_type, from_type, mode,
13506 (secondary_reload_info *)0, altivec_p))
13507 return false;
13508
13509 /* If we have a floating point or vector register class, we need to use
13510 memory to transfer the data. */
13511 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
13512 return true;
13513
13514 return false;
13515 }
13516
13517 /* Debug version of rs6000_secondary_memory_needed. */
13518 static bool
13519 rs6000_debug_secondary_memory_needed (machine_mode mode,
13520 reg_class_t from_class,
13521 reg_class_t to_class)
13522 {
13523 bool ret = rs6000_secondary_memory_needed (mode, from_class, to_class);
13524
13525 fprintf (stderr,
13526 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
13527 "to_class = %s, mode = %s\n",
13528 ret ? "true" : "false",
13529 reg_class_names[from_class],
13530 reg_class_names[to_class],
13531 GET_MODE_NAME (mode));
13532
13533 return ret;
13534 }
13535
13536 /* Return the register class of a scratch register needed to copy IN into
13537 or out of a register in RCLASS in MODE. If it can be done directly,
13538 NO_REGS is returned. */
13539
13540 static enum reg_class
13541 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
13542 rtx in)
13543 {
13544 int regno;
13545
13546 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
13547 #if TARGET_MACHO
13548 && MACHOPIC_INDIRECT
13549 #endif
13550 ))
13551 {
13552 /* We cannot copy a symbolic operand directly into anything
13553 other than BASE_REGS for TARGET_ELF. So indicate that a
13554 register from BASE_REGS is needed as an intermediate
13555 register.
13556
13557 On Darwin, pic addresses require a load from memory, which
13558 needs a base register. */
13559 if (rclass != BASE_REGS
13560 && (SYMBOL_REF_P (in)
13561 || GET_CODE (in) == HIGH
13562 || GET_CODE (in) == LABEL_REF
13563 || GET_CODE (in) == CONST))
13564 return BASE_REGS;
13565 }
13566
13567 if (REG_P (in))
13568 {
13569 regno = REGNO (in);
13570 if (!HARD_REGISTER_NUM_P (regno))
13571 {
13572 regno = true_regnum (in);
13573 if (!HARD_REGISTER_NUM_P (regno))
13574 regno = -1;
13575 }
13576 }
13577 else if (SUBREG_P (in))
13578 {
13579 regno = true_regnum (in);
13580 if (!HARD_REGISTER_NUM_P (regno))
13581 regno = -1;
13582 }
13583 else
13584 regno = -1;
13585
13586 /* If we have VSX register moves, prefer moving scalar values between
13587 Altivec registers and GPR by going via an FPR (and then via memory)
13588 instead of reloading the secondary memory address for Altivec moves. */
13589 if (TARGET_VSX
13590 && GET_MODE_SIZE (mode) < 16
13591 && !mode_supports_vmx_dform (mode)
13592 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
13593 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
13594 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
13595 && (regno >= 0 && INT_REGNO_P (regno)))))
13596 return FLOAT_REGS;
13597
13598 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
13599 into anything. */
13600 if (rclass == GENERAL_REGS || rclass == BASE_REGS
13601 || (regno >= 0 && INT_REGNO_P (regno)))
13602 return NO_REGS;
13603
13604 /* Constants, memory, and VSX registers can go into VSX registers (both the
13605 traditional floating point and the altivec registers). */
13606 if (rclass == VSX_REGS
13607 && (regno == -1 || VSX_REGNO_P (regno)))
13608 return NO_REGS;
13609
13610 /* Constants, memory, and FP registers can go into FP registers. */
13611 if ((regno == -1 || FP_REGNO_P (regno))
13612 && (rclass == FLOAT_REGS || rclass == GEN_OR_FLOAT_REGS))
13613 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
13614
13615 /* Memory, and AltiVec registers can go into AltiVec registers. */
13616 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
13617 && rclass == ALTIVEC_REGS)
13618 return NO_REGS;
13619
13620 /* We can copy among the CR registers. */
13621 if ((rclass == CR_REGS || rclass == CR0_REGS)
13622 && regno >= 0 && CR_REGNO_P (regno))
13623 return NO_REGS;
13624
13625 /* Otherwise, we need GENERAL_REGS. */
13626 return GENERAL_REGS;
13627 }
13628
13629 /* Debug version of rs6000_secondary_reload_class. */
13630 static enum reg_class
13631 rs6000_debug_secondary_reload_class (enum reg_class rclass,
13632 machine_mode mode, rtx in)
13633 {
13634 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
13635 fprintf (stderr,
13636 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
13637 "mode = %s, input rtx:\n",
13638 reg_class_names[ret], reg_class_names[rclass],
13639 GET_MODE_NAME (mode));
13640 debug_rtx (in);
13641
13642 return ret;
13643 }
13644
13645 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
13646
13647 static bool
13648 rs6000_can_change_mode_class (machine_mode from,
13649 machine_mode to,
13650 reg_class_t rclass)
13651 {
13652 unsigned from_size = GET_MODE_SIZE (from);
13653 unsigned to_size = GET_MODE_SIZE (to);
13654
13655 if (from_size != to_size)
13656 {
13657 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
13658
13659 if (reg_classes_intersect_p (xclass, rclass))
13660 {
13661 unsigned to_nregs = hard_regno_nregs (FIRST_FPR_REGNO, to);
13662 unsigned from_nregs = hard_regno_nregs (FIRST_FPR_REGNO, from);
13663 bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
13664 bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
13665
13666 /* Don't allow 64-bit types to overlap with 128-bit types that take a
13667 single register under VSX because the scalar part of the register
13668 is in the upper 64-bits, and not the lower 64-bits. Types like
13669 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
13670 IEEE floating point can't overlap, and neither can small
13671 values. */
13672
13673 if (to_float128_vector_p && from_float128_vector_p)
13674 return true;
13675
13676 else if (to_float128_vector_p || from_float128_vector_p)
13677 return false;
13678
13679 /* TDmode in floating-mode registers must always go into a register
13680 pair with the most significant word in the even-numbered register
13681 to match ISA requirements. In little-endian mode, this does not
13682 match subreg numbering, so we cannot allow subregs. */
13683 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
13684 return false;
13685
13686 /* Allow SD<->DD changes, since SDmode values are stored in
13687 the low half of the DDmode, just like target-independent
13688 code expects. We need to allow at least SD->DD since
13689 rs6000_secondary_memory_needed_mode asks for that change
13690 to be made for SD reloads. */
13691 if ((to == DDmode && from == SDmode)
13692 || (to == SDmode && from == DDmode))
13693 return true;
13694
13695 if (from_size < 8 || to_size < 8)
13696 return false;
13697
13698 if (from_size == 8 && (8 * to_nregs) != to_size)
13699 return false;
13700
13701 if (to_size == 8 && (8 * from_nregs) != from_size)
13702 return false;
13703
13704 return true;
13705 }
13706 else
13707 return true;
13708 }
13709
13710 /* Since the VSX register set includes traditional floating point registers
13711 and altivec registers, just check for the size being different instead of
13712 trying to check whether the modes are vector modes. Otherwise it won't
13713 allow say DF and DI to change classes. For types like TFmode and TDmode
13714 that take 2 64-bit registers, rather than a single 128-bit register, don't
13715 allow subregs of those types to other 128 bit types. */
13716 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
13717 {
13718 unsigned num_regs = (from_size + 15) / 16;
13719 if (hard_regno_nregs (FIRST_FPR_REGNO, to) > num_regs
13720 || hard_regno_nregs (FIRST_FPR_REGNO, from) > num_regs)
13721 return false;
13722
13723 return (from_size == 8 || from_size == 16);
13724 }
13725
13726 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
13727 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
13728 return false;
13729
13730 return true;
13731 }
13732
13733 /* Debug version of rs6000_can_change_mode_class. */
13734 static bool
13735 rs6000_debug_can_change_mode_class (machine_mode from,
13736 machine_mode to,
13737 reg_class_t rclass)
13738 {
13739 bool ret = rs6000_can_change_mode_class (from, to, rclass);
13740
13741 fprintf (stderr,
13742 "rs6000_can_change_mode_class, return %s, from = %s, "
13743 "to = %s, rclass = %s\n",
13744 ret ? "true" : "false",
13745 GET_MODE_NAME (from), GET_MODE_NAME (to),
13746 reg_class_names[rclass]);
13747
13748 return ret;
13749 }
13750 \f
13751 /* Return a string to do a move operation of 128 bits of data. */
13752
13753 const char *
13754 rs6000_output_move_128bit (rtx operands[])
13755 {
13756 rtx dest = operands[0];
13757 rtx src = operands[1];
13758 machine_mode mode = GET_MODE (dest);
13759 int dest_regno;
13760 int src_regno;
13761 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
13762 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
13763
13764 if (REG_P (dest))
13765 {
13766 dest_regno = REGNO (dest);
13767 dest_gpr_p = INT_REGNO_P (dest_regno);
13768 dest_fp_p = FP_REGNO_P (dest_regno);
13769 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
13770 dest_vsx_p = dest_fp_p | dest_vmx_p;
13771 }
13772 else
13773 {
13774 dest_regno = -1;
13775 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
13776 }
13777
13778 if (REG_P (src))
13779 {
13780 src_regno = REGNO (src);
13781 src_gpr_p = INT_REGNO_P (src_regno);
13782 src_fp_p = FP_REGNO_P (src_regno);
13783 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
13784 src_vsx_p = src_fp_p | src_vmx_p;
13785 }
13786 else
13787 {
13788 src_regno = -1;
13789 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
13790 }
13791
13792 /* Register moves. */
13793 if (dest_regno >= 0 && src_regno >= 0)
13794 {
13795 if (dest_gpr_p)
13796 {
13797 if (src_gpr_p)
13798 return "#";
13799
13800 if (TARGET_DIRECT_MOVE_128 && src_vsx_p)
13801 return (WORDS_BIG_ENDIAN
13802 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
13803 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
13804
13805 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
13806 return "#";
13807 }
13808
13809 else if (TARGET_VSX && dest_vsx_p)
13810 {
13811 if (src_vsx_p)
13812 return "xxlor %x0,%x1,%x1";
13813
13814 else if (TARGET_DIRECT_MOVE_128 && src_gpr_p)
13815 return (WORDS_BIG_ENDIAN
13816 ? "mtvsrdd %x0,%1,%L1"
13817 : "mtvsrdd %x0,%L1,%1");
13818
13819 else if (TARGET_DIRECT_MOVE && src_gpr_p)
13820 return "#";
13821 }
13822
13823 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
13824 return "vor %0,%1,%1";
13825
13826 else if (dest_fp_p && src_fp_p)
13827 return "#";
13828 }
13829
13830 /* Loads. */
13831 else if (dest_regno >= 0 && MEM_P (src))
13832 {
13833 if (dest_gpr_p)
13834 {
13835 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
13836 return "lq %0,%1";
13837 else
13838 return "#";
13839 }
13840
13841 else if (TARGET_ALTIVEC && dest_vmx_p
13842 && altivec_indexed_or_indirect_operand (src, mode))
13843 return "lvx %0,%y1";
13844
13845 else if (TARGET_VSX && dest_vsx_p)
13846 {
13847 if (mode_supports_dq_form (mode)
13848 && quad_address_p (XEXP (src, 0), mode, true))
13849 return "lxv %x0,%1";
13850
13851 else if (TARGET_P9_VECTOR)
13852 return "lxvx %x0,%y1";
13853
13854 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
13855 return "lxvw4x %x0,%y1";
13856
13857 else
13858 return "lxvd2x %x0,%y1";
13859 }
13860
13861 else if (TARGET_ALTIVEC && dest_vmx_p)
13862 return "lvx %0,%y1";
13863
13864 else if (dest_fp_p)
13865 return "#";
13866 }
13867
13868 /* Stores. */
13869 else if (src_regno >= 0 && MEM_P (dest))
13870 {
13871 if (src_gpr_p)
13872 {
13873 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
13874 return "stq %1,%0";
13875 else
13876 return "#";
13877 }
13878
13879 else if (TARGET_ALTIVEC && src_vmx_p
13880 && altivec_indexed_or_indirect_operand (dest, mode))
13881 return "stvx %1,%y0";
13882
13883 else if (TARGET_VSX && src_vsx_p)
13884 {
13885 if (mode_supports_dq_form (mode)
13886 && quad_address_p (XEXP (dest, 0), mode, true))
13887 return "stxv %x1,%0";
13888
13889 else if (TARGET_P9_VECTOR)
13890 return "stxvx %x1,%y0";
13891
13892 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
13893 return "stxvw4x %x1,%y0";
13894
13895 else
13896 return "stxvd2x %x1,%y0";
13897 }
13898
13899 else if (TARGET_ALTIVEC && src_vmx_p)
13900 return "stvx %1,%y0";
13901
13902 else if (src_fp_p)
13903 return "#";
13904 }
13905
13906 /* Constants. */
13907 else if (dest_regno >= 0
13908 && (CONST_INT_P (src)
13909 || CONST_WIDE_INT_P (src)
13910 || CONST_DOUBLE_P (src)
13911 || GET_CODE (src) == CONST_VECTOR))
13912 {
13913 if (dest_gpr_p)
13914 return "#";
13915
13916 else if ((dest_vmx_p && TARGET_ALTIVEC)
13917 || (dest_vsx_p && TARGET_VSX))
13918 return output_vec_const_move (operands);
13919 }
13920
13921 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
13922 }
13923
13924 /* Validate a 128-bit move. */
13925 bool
13926 rs6000_move_128bit_ok_p (rtx operands[])
13927 {
13928 machine_mode mode = GET_MODE (operands[0]);
13929 return (gpc_reg_operand (operands[0], mode)
13930 || gpc_reg_operand (operands[1], mode));
13931 }
13932
13933 /* Return true if a 128-bit move needs to be split. */
13934 bool
13935 rs6000_split_128bit_ok_p (rtx operands[])
13936 {
13937 if (!reload_completed)
13938 return false;
13939
13940 if (!gpr_or_gpr_p (operands[0], operands[1]))
13941 return false;
13942
13943 if (quad_load_store_p (operands[0], operands[1]))
13944 return false;
13945
13946 return true;
13947 }
13948
13949 \f
13950 /* Given a comparison operation, return the bit number in CCR to test. We
13951 know this is a valid comparison.
13952
13953 SCC_P is 1 if this is for an scc. That means that %D will have been
13954 used instead of %C, so the bits will be in different places.
13955
13956 Return -1 if OP isn't a valid comparison for some reason. */
13957
13958 int
13959 ccr_bit (rtx op, int scc_p)
13960 {
13961 enum rtx_code code = GET_CODE (op);
13962 machine_mode cc_mode;
13963 int cc_regnum;
13964 int base_bit;
13965 rtx reg;
13966
13967 if (!COMPARISON_P (op))
13968 return -1;
13969
13970 reg = XEXP (op, 0);
13971
13972 if (!REG_P (reg) || !CR_REGNO_P (REGNO (reg)))
13973 return -1;
13974
13975 cc_mode = GET_MODE (reg);
13976 cc_regnum = REGNO (reg);
13977 base_bit = 4 * (cc_regnum - CR0_REGNO);
13978
13979 validate_condition_mode (code, cc_mode);
13980
13981 /* When generating a sCOND operation, only positive conditions are
13982 allowed. */
13983 if (scc_p)
13984 switch (code)
13985 {
13986 case EQ:
13987 case GT:
13988 case LT:
13989 case UNORDERED:
13990 case GTU:
13991 case LTU:
13992 break;
13993 default:
13994 return -1;
13995 }
13996
13997 switch (code)
13998 {
13999 case NE:
14000 return scc_p ? base_bit + 3 : base_bit + 2;
14001 case EQ:
14002 return base_bit + 2;
14003 case GT: case GTU: case UNLE:
14004 return base_bit + 1;
14005 case LT: case LTU: case UNGE:
14006 return base_bit;
14007 case ORDERED: case UNORDERED:
14008 return base_bit + 3;
14009
14010 case GE: case GEU:
14011 /* If scc, we will have done a cror to put the bit in the
14012 unordered position. So test that bit. For integer, this is ! LT
14013 unless this is an scc insn. */
14014 return scc_p ? base_bit + 3 : base_bit;
14015
14016 case LE: case LEU:
14017 return scc_p ? base_bit + 3 : base_bit + 1;
14018
14019 default:
14020 return -1;
14021 }
14022 }
14023 \f
14024 /* Return the GOT register. */
14025
14026 rtx
14027 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
14028 {
14029 /* The second flow pass currently (June 1999) can't update
14030 regs_ever_live without disturbing other parts of the compiler, so
14031 update it here to make the prolog/epilogue code happy. */
14032 if (!can_create_pseudo_p ()
14033 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
14034 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
14035
14036 crtl->uses_pic_offset_table = 1;
14037
14038 return pic_offset_table_rtx;
14039 }
14040 \f
14041 #define INT_P(X) (CONST_INT_P (X) && GET_MODE (X) == VOIDmode)
14042
14043 /* Write out a function code label. */
14044
14045 void
14046 rs6000_output_function_entry (FILE *file, const char *fname)
14047 {
14048 if (fname[0] != '.')
14049 {
14050 switch (DEFAULT_ABI)
14051 {
14052 default:
14053 gcc_unreachable ();
14054
14055 case ABI_AIX:
14056 if (DOT_SYMBOLS)
14057 putc ('.', file);
14058 else
14059 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
14060 break;
14061
14062 case ABI_ELFv2:
14063 case ABI_V4:
14064 case ABI_DARWIN:
14065 break;
14066 }
14067 }
14068
14069 RS6000_OUTPUT_BASENAME (file, fname);
14070 }
14071
14072 /* Print an operand. Recognize special options, documented below. */
14073
14074 #if TARGET_ELF
14075 /* Access to .sdata2 through r2 (see -msdata=eabi in invoke.texi) is
14076 only introduced by the linker, when applying the sda21
14077 relocation. */
14078 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
14079 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
14080 #else
14081 #define SMALL_DATA_RELOC "sda21"
14082 #define SMALL_DATA_REG 0
14083 #endif
14084
14085 void
14086 print_operand (FILE *file, rtx x, int code)
14087 {
14088 int i;
14089 unsigned HOST_WIDE_INT uval;
14090
14091 switch (code)
14092 {
14093 /* %a is output_address. */
14094
14095 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
14096 output_operand. */
14097
14098 case 'A':
14099 /* Write the MMA accumulator number associated with VSX register X. */
14100 if (!REG_P (x) || !FP_REGNO_P (REGNO (x)) || (REGNO (x) % 4) != 0)
14101 output_operand_lossage ("invalid %%A value");
14102 else
14103 fprintf (file, "%d", (REGNO (x) - FIRST_FPR_REGNO) / 4);
14104 return;
14105
14106 case 'D':
14107 /* Like 'J' but get to the GT bit only. */
14108 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
14109 {
14110 output_operand_lossage ("invalid %%D value");
14111 return;
14112 }
14113
14114 /* Bit 1 is GT bit. */
14115 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
14116
14117 /* Add one for shift count in rlinm for scc. */
14118 fprintf (file, "%d", i + 1);
14119 return;
14120
14121 case 'e':
14122 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
14123 if (! INT_P (x))
14124 {
14125 output_operand_lossage ("invalid %%e value");
14126 return;
14127 }
14128
14129 uval = INTVAL (x);
14130 if ((uval & 0xffff) == 0 && uval != 0)
14131 putc ('s', file);
14132 return;
14133
14134 case 'E':
14135 /* X is a CR register. Print the number of the EQ bit of the CR */
14136 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
14137 output_operand_lossage ("invalid %%E value");
14138 else
14139 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
14140 return;
14141
14142 case 'f':
14143 /* X is a CR register. Print the shift count needed to move it
14144 to the high-order four bits. */
14145 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
14146 output_operand_lossage ("invalid %%f value");
14147 else
14148 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
14149 return;
14150
14151 case 'F':
14152 /* Similar, but print the count for the rotate in the opposite
14153 direction. */
14154 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
14155 output_operand_lossage ("invalid %%F value");
14156 else
14157 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
14158 return;
14159
14160 case 'G':
14161 /* X is a constant integer. If it is negative, print "m",
14162 otherwise print "z". This is to make an aze or ame insn. */
14163 if (!CONST_INT_P (x))
14164 output_operand_lossage ("invalid %%G value");
14165 else if (INTVAL (x) >= 0)
14166 putc ('z', file);
14167 else
14168 putc ('m', file);
14169 return;
14170
14171 case 'h':
14172 /* If constant, output low-order five bits. Otherwise, write
14173 normally. */
14174 if (INT_P (x))
14175 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
14176 else
14177 print_operand (file, x, 0);
14178 return;
14179
14180 case 'H':
14181 /* If constant, output low-order six bits. Otherwise, write
14182 normally. */
14183 if (INT_P (x))
14184 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
14185 else
14186 print_operand (file, x, 0);
14187 return;
14188
14189 case 'I':
14190 /* Print `i' if this is a constant, else nothing. */
14191 if (INT_P (x))
14192 putc ('i', file);
14193 return;
14194
14195 case 'j':
14196 /* Write the bit number in CCR for jump. */
14197 i = ccr_bit (x, 0);
14198 if (i == -1)
14199 output_operand_lossage ("invalid %%j code");
14200 else
14201 fprintf (file, "%d", i);
14202 return;
14203
14204 case 'J':
14205 /* Similar, but add one for shift count in rlinm for scc and pass
14206 scc flag to `ccr_bit'. */
14207 i = ccr_bit (x, 1);
14208 if (i == -1)
14209 output_operand_lossage ("invalid %%J code");
14210 else
14211 /* If we want bit 31, write a shift count of zero, not 32. */
14212 fprintf (file, "%d", i == 31 ? 0 : i + 1);
14213 return;
14214
14215 case 'k':
14216 /* X must be a constant. Write the 1's complement of the
14217 constant. */
14218 if (! INT_P (x))
14219 output_operand_lossage ("invalid %%k value");
14220 else
14221 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
14222 return;
14223
14224 case 'K':
14225 /* X must be a symbolic constant on ELF. Write an
14226 expression suitable for an 'addi' that adds in the low 16
14227 bits of the MEM. */
14228 if (GET_CODE (x) == CONST)
14229 {
14230 if (GET_CODE (XEXP (x, 0)) != PLUS
14231 || (!SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
14232 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
14233 || !CONST_INT_P (XEXP (XEXP (x, 0), 1)))
14234 output_operand_lossage ("invalid %%K value");
14235 }
14236 print_operand_address (file, x);
14237 fputs ("@l", file);
14238 return;
14239
14240 /* %l is output_asm_label. */
14241
14242 case 'L':
14243 /* Write second word of DImode or DFmode reference. Works on register
14244 or non-indexed memory only. */
14245 if (REG_P (x))
14246 fputs (reg_names[REGNO (x) + 1], file);
14247 else if (MEM_P (x))
14248 {
14249 machine_mode mode = GET_MODE (x);
14250 /* Handle possible auto-increment. Since it is pre-increment and
14251 we have already done it, we can just use an offset of word. */
14252 if (GET_CODE (XEXP (x, 0)) == PRE_INC
14253 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
14254 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
14255 UNITS_PER_WORD));
14256 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
14257 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
14258 UNITS_PER_WORD));
14259 else
14260 output_address (mode, XEXP (adjust_address_nv (x, SImode,
14261 UNITS_PER_WORD),
14262 0));
14263
14264 if (small_data_operand (x, GET_MODE (x)))
14265 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
14266 reg_names[SMALL_DATA_REG]);
14267 }
14268 return;
14269
14270 case 'N': /* Unused */
14271 /* Write the number of elements in the vector times 4. */
14272 if (GET_CODE (x) != PARALLEL)
14273 output_operand_lossage ("invalid %%N value");
14274 else
14275 fprintf (file, "%d", XVECLEN (x, 0) * 4);
14276 return;
14277
14278 case 'O': /* Unused */
14279 /* Similar, but subtract 1 first. */
14280 if (GET_CODE (x) != PARALLEL)
14281 output_operand_lossage ("invalid %%O value");
14282 else
14283 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
14284 return;
14285
14286 case 'p':
14287 /* X is a CONST_INT that is a power of two. Output the logarithm. */
14288 if (! INT_P (x)
14289 || INTVAL (x) < 0
14290 || (i = exact_log2 (INTVAL (x))) < 0)
14291 output_operand_lossage ("invalid %%p value");
14292 else
14293 fprintf (file, "%d", i);
14294 return;
14295
14296 case 'P':
14297 /* The operand must be an indirect memory reference. The result
14298 is the register name. */
14299 if (!MEM_P (x) || !REG_P (XEXP (x, 0))
14300 || REGNO (XEXP (x, 0)) >= 32)
14301 output_operand_lossage ("invalid %%P value");
14302 else
14303 fputs (reg_names[REGNO (XEXP (x, 0))], file);
14304 return;
14305
14306 case 'q':
14307 /* This outputs the logical code corresponding to a boolean
14308 expression. The expression may have one or both operands
14309 negated (if one, only the first one). For condition register
14310 logical operations, it will also treat the negated
14311 CR codes as NOTs, but not handle NOTs of them. */
14312 {
14313 const char *const *t = 0;
14314 const char *s;
14315 enum rtx_code code = GET_CODE (x);
14316 static const char * const tbl[3][3] = {
14317 { "and", "andc", "nor" },
14318 { "or", "orc", "nand" },
14319 { "xor", "eqv", "xor" } };
14320
14321 if (code == AND)
14322 t = tbl[0];
14323 else if (code == IOR)
14324 t = tbl[1];
14325 else if (code == XOR)
14326 t = tbl[2];
14327 else
14328 output_operand_lossage ("invalid %%q value");
14329
14330 if (GET_CODE (XEXP (x, 0)) != NOT)
14331 s = t[0];
14332 else
14333 {
14334 if (GET_CODE (XEXP (x, 1)) == NOT)
14335 s = t[2];
14336 else
14337 s = t[1];
14338 }
14339
14340 fputs (s, file);
14341 }
14342 return;
14343
14344 case 'Q':
14345 if (! TARGET_MFCRF)
14346 return;
14347 fputc (',', file);
14348 /* FALLTHRU */
14349
14350 case 'R':
14351 /* X is a CR register. Print the mask for `mtcrf'. */
14352 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
14353 output_operand_lossage ("invalid %%R value");
14354 else
14355 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
14356 return;
14357
14358 case 's':
14359 /* Low 5 bits of 32 - value */
14360 if (! INT_P (x))
14361 output_operand_lossage ("invalid %%s value");
14362 else
14363 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
14364 return;
14365
14366 case 't':
14367 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
14368 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
14369 {
14370 output_operand_lossage ("invalid %%t value");
14371 return;
14372 }
14373
14374 /* Bit 3 is OV bit. */
14375 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
14376
14377 /* If we want bit 31, write a shift count of zero, not 32. */
14378 fprintf (file, "%d", i == 31 ? 0 : i + 1);
14379 return;
14380
14381 case 'T':
14382 /* Print the symbolic name of a branch target register. */
14383 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
14384 x = XVECEXP (x, 0, 0);
14385 if (!REG_P (x) || (REGNO (x) != LR_REGNO
14386 && REGNO (x) != CTR_REGNO))
14387 output_operand_lossage ("invalid %%T value");
14388 else if (REGNO (x) == LR_REGNO)
14389 fputs ("lr", file);
14390 else
14391 fputs ("ctr", file);
14392 return;
14393
14394 case 'u':
14395 /* High-order or low-order 16 bits of constant, whichever is non-zero,
14396 for use in unsigned operand. */
14397 if (! INT_P (x))
14398 {
14399 output_operand_lossage ("invalid %%u value");
14400 return;
14401 }
14402
14403 uval = INTVAL (x);
14404 if ((uval & 0xffff) == 0)
14405 uval >>= 16;
14406
14407 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
14408 return;
14409
14410 case 'v':
14411 /* High-order 16 bits of constant for use in signed operand. */
14412 if (! INT_P (x))
14413 output_operand_lossage ("invalid %%v value");
14414 else
14415 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
14416 (INTVAL (x) >> 16) & 0xffff);
14417 return;
14418
14419 case 'U':
14420 /* Print `u' if this has an auto-increment or auto-decrement. */
14421 if (MEM_P (x)
14422 && (GET_CODE (XEXP (x, 0)) == PRE_INC
14423 || GET_CODE (XEXP (x, 0)) == PRE_DEC
14424 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
14425 putc ('u', file);
14426 return;
14427
14428 case 'V':
14429 /* Print the trap code for this operand. */
14430 switch (GET_CODE (x))
14431 {
14432 case EQ:
14433 fputs ("eq", file); /* 4 */
14434 break;
14435 case NE:
14436 fputs ("ne", file); /* 24 */
14437 break;
14438 case LT:
14439 fputs ("lt", file); /* 16 */
14440 break;
14441 case LE:
14442 fputs ("le", file); /* 20 */
14443 break;
14444 case GT:
14445 fputs ("gt", file); /* 8 */
14446 break;
14447 case GE:
14448 fputs ("ge", file); /* 12 */
14449 break;
14450 case LTU:
14451 fputs ("llt", file); /* 2 */
14452 break;
14453 case LEU:
14454 fputs ("lle", file); /* 6 */
14455 break;
14456 case GTU:
14457 fputs ("lgt", file); /* 1 */
14458 break;
14459 case GEU:
14460 fputs ("lge", file); /* 5 */
14461 break;
14462 default:
14463 output_operand_lossage ("invalid %%V value");
14464 }
14465 break;
14466
14467 case 'w':
14468 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
14469 normally. */
14470 if (INT_P (x))
14471 fprintf (file, HOST_WIDE_INT_PRINT_DEC, sext_hwi (INTVAL (x), 16));
14472 else
14473 print_operand (file, x, 0);
14474 return;
14475
14476 case 'x':
14477 /* X is a FPR or Altivec register used in a VSX context. */
14478 if (!REG_P (x) || !VSX_REGNO_P (REGNO (x)))
14479 output_operand_lossage ("invalid %%x value");
14480 else
14481 {
14482 int reg = REGNO (x);
14483 int vsx_reg = (FP_REGNO_P (reg)
14484 ? reg - 32
14485 : reg - FIRST_ALTIVEC_REGNO + 32);
14486
14487 #ifdef TARGET_REGNAMES
14488 if (TARGET_REGNAMES)
14489 fprintf (file, "%%vs%d", vsx_reg);
14490 else
14491 #endif
14492 fprintf (file, "%d", vsx_reg);
14493 }
14494 return;
14495
14496 case 'X':
14497 if (MEM_P (x)
14498 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
14499 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
14500 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
14501 putc ('x', file);
14502 return;
14503
14504 case 'Y':
14505 /* Like 'L', for third word of TImode/PTImode */
14506 if (REG_P (x))
14507 fputs (reg_names[REGNO (x) + 2], file);
14508 else if (MEM_P (x))
14509 {
14510 machine_mode mode = GET_MODE (x);
14511 if (GET_CODE (XEXP (x, 0)) == PRE_INC
14512 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
14513 output_address (mode, plus_constant (Pmode,
14514 XEXP (XEXP (x, 0), 0), 8));
14515 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
14516 output_address (mode, plus_constant (Pmode,
14517 XEXP (XEXP (x, 0), 0), 8));
14518 else
14519 output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0));
14520 if (small_data_operand (x, GET_MODE (x)))
14521 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
14522 reg_names[SMALL_DATA_REG]);
14523 }
14524 return;
14525
14526 case 'z':
14527 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
14528 x = XVECEXP (x, 0, 1);
14529 /* X is a SYMBOL_REF. Write out the name preceded by a
14530 period and without any trailing data in brackets. Used for function
14531 names. If we are configured for System V (or the embedded ABI) on
14532 the PowerPC, do not emit the period, since those systems do not use
14533 TOCs and the like. */
14534 if (!SYMBOL_REF_P (x))
14535 {
14536 output_operand_lossage ("invalid %%z value");
14537 return;
14538 }
14539
14540 /* For macho, check to see if we need a stub. */
14541 if (TARGET_MACHO)
14542 {
14543 const char *name = XSTR (x, 0);
14544 #if TARGET_MACHO
14545 if (darwin_symbol_stubs
14546 && MACHOPIC_INDIRECT
14547 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14548 name = machopic_indirection_name (x, /*stub_p=*/true);
14549 #endif
14550 assemble_name (file, name);
14551 }
14552 else if (!DOT_SYMBOLS)
14553 assemble_name (file, XSTR (x, 0));
14554 else
14555 rs6000_output_function_entry (file, XSTR (x, 0));
14556 return;
14557
14558 case 'Z':
14559 /* Like 'L', for last word of TImode/PTImode. */
14560 if (REG_P (x))
14561 fputs (reg_names[REGNO (x) + 3], file);
14562 else if (MEM_P (x))
14563 {
14564 machine_mode mode = GET_MODE (x);
14565 if (GET_CODE (XEXP (x, 0)) == PRE_INC
14566 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
14567 output_address (mode, plus_constant (Pmode,
14568 XEXP (XEXP (x, 0), 0), 12));
14569 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
14570 output_address (mode, plus_constant (Pmode,
14571 XEXP (XEXP (x, 0), 0), 12));
14572 else
14573 output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0));
14574 if (small_data_operand (x, GET_MODE (x)))
14575 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
14576 reg_names[SMALL_DATA_REG]);
14577 }
14578 return;
14579
14580 /* Print AltiVec memory operand. */
14581 case 'y':
14582 {
14583 rtx tmp;
14584
14585 gcc_assert (MEM_P (x));
14586
14587 tmp = XEXP (x, 0);
14588
14589 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (GET_MODE (x))
14590 && GET_CODE (tmp) == AND
14591 && CONST_INT_P (XEXP (tmp, 1))
14592 && INTVAL (XEXP (tmp, 1)) == -16)
14593 tmp = XEXP (tmp, 0);
14594 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
14595 && GET_CODE (tmp) == PRE_MODIFY)
14596 tmp = XEXP (tmp, 1);
14597 if (REG_P (tmp))
14598 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
14599 else
14600 {
14601 if (GET_CODE (tmp) != PLUS
14602 || !REG_P (XEXP (tmp, 0))
14603 || !REG_P (XEXP (tmp, 1)))
14604 {
14605 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
14606 break;
14607 }
14608
14609 if (REGNO (XEXP (tmp, 0)) == 0)
14610 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
14611 reg_names[ REGNO (XEXP (tmp, 0)) ]);
14612 else
14613 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
14614 reg_names[ REGNO (XEXP (tmp, 1)) ]);
14615 }
14616 break;
14617 }
14618
14619 case 0:
14620 if (REG_P (x))
14621 fprintf (file, "%s", reg_names[REGNO (x)]);
14622 else if (MEM_P (x))
14623 {
14624 /* We need to handle PRE_INC and PRE_DEC here, since we need to
14625 know the width from the mode. */
14626 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
14627 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
14628 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
14629 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
14630 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
14631 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
14632 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
14633 output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
14634 else
14635 output_address (GET_MODE (x), XEXP (x, 0));
14636 }
14637 else if (toc_relative_expr_p (x, false,
14638 &tocrel_base_oac, &tocrel_offset_oac))
14639 /* This hack along with a corresponding hack in
14640 rs6000_output_addr_const_extra arranges to output addends
14641 where the assembler expects to find them. eg.
14642 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
14643 without this hack would be output as "x@toc+4". We
14644 want "x+4@toc". */
14645 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
14646 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
14647 output_addr_const (file, XVECEXP (x, 0, 0));
14648 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
14649 output_addr_const (file, XVECEXP (x, 0, 1));
14650 else
14651 output_addr_const (file, x);
14652 return;
14653
14654 case '&':
14655 if (const char *name = get_some_local_dynamic_name ())
14656 assemble_name (file, name);
14657 else
14658 output_operand_lossage ("'%%&' used without any "
14659 "local dynamic TLS references");
14660 return;
14661
14662 default:
14663 output_operand_lossage ("invalid %%xn code");
14664 }
14665 }
14666 \f
14667 /* Print the address of an operand. */
14668
14669 void
14670 print_operand_address (FILE *file, rtx x)
14671 {
14672 if (REG_P (x))
14673 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
14674
14675 /* Is it a PC-relative address? */
14676 else if (TARGET_PCREL && pcrel_local_or_external_address (x, VOIDmode))
14677 {
14678 HOST_WIDE_INT offset;
14679
14680 if (GET_CODE (x) == CONST)
14681 x = XEXP (x, 0);
14682
14683 if (GET_CODE (x) == PLUS)
14684 {
14685 offset = INTVAL (XEXP (x, 1));
14686 x = XEXP (x, 0);
14687 }
14688 else
14689 offset = 0;
14690
14691 output_addr_const (file, x);
14692
14693 if (offset)
14694 fprintf (file, "%+" PRId64, offset);
14695
14696 if (SYMBOL_REF_P (x) && !SYMBOL_REF_LOCAL_P (x))
14697 fprintf (file, "@got");
14698
14699 fprintf (file, "@pcrel");
14700 }
14701 else if (SYMBOL_REF_P (x) || GET_CODE (x) == CONST
14702 || GET_CODE (x) == LABEL_REF)
14703 {
14704 output_addr_const (file, x);
14705 if (small_data_operand (x, GET_MODE (x)))
14706 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
14707 reg_names[SMALL_DATA_REG]);
14708 else
14709 {
14710 /* Do not support getting address directly from TOC, emit error.
14711 No more work is needed for !TARGET_TOC. */
14712 if (TARGET_TOC)
14713 output_operand_lossage ("%%a requires an address of memory");
14714 }
14715 }
14716 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
14717 && REG_P (XEXP (x, 1)))
14718 {
14719 if (REGNO (XEXP (x, 0)) == 0)
14720 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
14721 reg_names[ REGNO (XEXP (x, 0)) ]);
14722 else
14723 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
14724 reg_names[ REGNO (XEXP (x, 1)) ]);
14725 }
14726 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
14727 && CONST_INT_P (XEXP (x, 1)))
14728 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
14729 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
14730 #if TARGET_MACHO
14731 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
14732 && CONSTANT_P (XEXP (x, 1)))
14733 {
14734 fprintf (file, "lo16(");
14735 output_addr_const (file, XEXP (x, 1));
14736 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
14737 }
14738 #endif
14739 #if TARGET_ELF
14740 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
14741 && CONSTANT_P (XEXP (x, 1)))
14742 {
14743 output_addr_const (file, XEXP (x, 1));
14744 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
14745 }
14746 #endif
14747 else if (toc_relative_expr_p (x, false, &tocrel_base_oac, &tocrel_offset_oac))
14748 {
14749 /* This hack along with a corresponding hack in
14750 rs6000_output_addr_const_extra arranges to output addends
14751 where the assembler expects to find them. eg.
14752 (lo_sum (reg 9)
14753 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
14754 without this hack would be output as "x@toc+8@l(9)". We
14755 want "x+8@toc@l(9)". */
14756 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
14757 if (GET_CODE (x) == LO_SUM)
14758 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
14759 else
14760 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base_oac, 0, 1))]);
14761 }
14762 else
14763 output_addr_const (file, x);
14764 }
14765 \f
14766 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14767
14768 bool
14769 rs6000_output_addr_const_extra (FILE *file, rtx x)
14770 {
14771 if (GET_CODE (x) == UNSPEC)
14772 switch (XINT (x, 1))
14773 {
14774 case UNSPEC_TOCREL:
14775 gcc_checking_assert (SYMBOL_REF_P (XVECEXP (x, 0, 0))
14776 && REG_P (XVECEXP (x, 0, 1))
14777 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
14778 output_addr_const (file, XVECEXP (x, 0, 0));
14779 if (x == tocrel_base_oac && tocrel_offset_oac != const0_rtx)
14780 {
14781 if (INTVAL (tocrel_offset_oac) >= 0)
14782 fprintf (file, "+");
14783 output_addr_const (file, CONST_CAST_RTX (tocrel_offset_oac));
14784 }
14785 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
14786 {
14787 putc ('-', file);
14788 assemble_name (file, toc_label_name);
14789 need_toc_init = 1;
14790 }
14791 else if (TARGET_ELF)
14792 fputs ("@toc", file);
14793 return true;
14794
14795 #if TARGET_MACHO
14796 case UNSPEC_MACHOPIC_OFFSET:
14797 output_addr_const (file, XVECEXP (x, 0, 0));
14798 putc ('-', file);
14799 machopic_output_function_base_name (file);
14800 return true;
14801 #endif
14802 }
14803 return false;
14804 }
14805 \f
14806 /* Target hook for assembling integer objects. The PowerPC version has
14807 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
14808 is defined. It also needs to handle DI-mode objects on 64-bit
14809 targets. */
14810
14811 static bool
14812 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
14813 {
14814 #ifdef RELOCATABLE_NEEDS_FIXUP
14815 /* Special handling for SI values. */
14816 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
14817 {
14818 static int recurse = 0;
14819
14820 /* For -mrelocatable, we mark all addresses that need to be fixed up in
14821 the .fixup section. Since the TOC section is already relocated, we
14822 don't need to mark it here. We used to skip the text section, but it
14823 should never be valid for relocated addresses to be placed in the text
14824 section. */
14825 if (DEFAULT_ABI == ABI_V4
14826 && (TARGET_RELOCATABLE || flag_pic > 1)
14827 && in_section != toc_section
14828 && !recurse
14829 && !CONST_SCALAR_INT_P (x)
14830 && CONSTANT_P (x))
14831 {
14832 char buf[256];
14833
14834 recurse = 1;
14835 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
14836 fixuplabelno++;
14837 ASM_OUTPUT_LABEL (asm_out_file, buf);
14838 fprintf (asm_out_file, "\t.long\t(");
14839 output_addr_const (asm_out_file, x);
14840 fprintf (asm_out_file, ")@fixup\n");
14841 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
14842 ASM_OUTPUT_ALIGN (asm_out_file, 2);
14843 fprintf (asm_out_file, "\t.long\t");
14844 assemble_name (asm_out_file, buf);
14845 fprintf (asm_out_file, "\n\t.previous\n");
14846 recurse = 0;
14847 return true;
14848 }
14849 /* Remove initial .'s to turn a -mcall-aixdesc function
14850 address into the address of the descriptor, not the function
14851 itself. */
14852 else if (SYMBOL_REF_P (x)
14853 && XSTR (x, 0)[0] == '.'
14854 && DEFAULT_ABI == ABI_AIX)
14855 {
14856 const char *name = XSTR (x, 0);
14857 while (*name == '.')
14858 name++;
14859
14860 fprintf (asm_out_file, "\t.long\t%s\n", name);
14861 return true;
14862 }
14863 }
14864 #endif /* RELOCATABLE_NEEDS_FIXUP */
14865 return default_assemble_integer (x, size, aligned_p);
14866 }
14867
14868 /* Return a template string for assembly to emit when making an
14869 external call. FUNOP is the call mem argument operand number. */
14870
14871 static const char *
14872 rs6000_call_template_1 (rtx *operands, unsigned int funop, bool sibcall)
14873 {
14874 /* -Wformat-overflow workaround, without which gcc thinks that %u
14875 might produce 10 digits. */
14876 gcc_assert (funop <= MAX_RECOG_OPERANDS);
14877
14878 char arg[12];
14879 arg[0] = 0;
14880 if (GET_CODE (operands[funop + 1]) == UNSPEC)
14881 {
14882 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
14883 sprintf (arg, "(%%%u@tlsgd)", funop + 1);
14884 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
14885 sprintf (arg, "(%%&@tlsld)");
14886 }
14887
14888 /* The magic 32768 offset here corresponds to the offset of
14889 r30 in .got2, as given by LCTOC1. See sysv4.h:toc_section. */
14890 char z[11];
14891 sprintf (z, "%%z%u%s", funop,
14892 (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic == 2
14893 ? "+32768" : ""));
14894
14895 static char str[32]; /* 1 spare */
14896 if (rs6000_pcrel_p ())
14897 sprintf (str, "b%s %s@notoc%s", sibcall ? "" : "l", z, arg);
14898 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
14899 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
14900 sibcall ? "" : "\n\tnop");
14901 else if (DEFAULT_ABI == ABI_V4)
14902 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
14903 flag_pic ? "@plt" : "");
14904 #if TARGET_MACHO
14905 /* If/when we remove the mlongcall opt, we can share the AIX/ELGv2 case. */
14906 else if (DEFAULT_ABI == ABI_DARWIN)
14907 {
14908 /* The cookie is in operand func+2. */
14909 gcc_checking_assert (GET_CODE (operands[funop + 2]) == CONST_INT);
14910 int cookie = INTVAL (operands[funop + 2]);
14911 if (cookie & CALL_LONG)
14912 {
14913 tree funname = get_identifier (XSTR (operands[funop], 0));
14914 tree labelname = get_prev_label (funname);
14915 gcc_checking_assert (labelname && !sibcall);
14916
14917 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
14918 instruction will reach 'foo', otherwise link as 'bl L42'".
14919 "L42" should be a 'branch island', that will do a far jump to
14920 'foo'. Branch islands are generated in
14921 macho_branch_islands(). */
14922 sprintf (str, "jbsr %%z%u,%.10s", funop,
14923 IDENTIFIER_POINTER (labelname));
14924 }
14925 else
14926 /* Same as AIX or ELFv2, except to keep backwards compat, no nop
14927 after the call. */
14928 sprintf (str, "b%s %s%s", sibcall ? "" : "l", z, arg);
14929 }
14930 #endif
14931 else
14932 gcc_unreachable ();
14933 return str;
14934 }
14935
14936 const char *
14937 rs6000_call_template (rtx *operands, unsigned int funop)
14938 {
14939 return rs6000_call_template_1 (operands, funop, false);
14940 }
14941
14942 const char *
14943 rs6000_sibcall_template (rtx *operands, unsigned int funop)
14944 {
14945 return rs6000_call_template_1 (operands, funop, true);
14946 }
14947
14948 /* As above, for indirect calls. */
14949
14950 static const char *
14951 rs6000_indirect_call_template_1 (rtx *operands, unsigned int funop,
14952 bool sibcall)
14953 {
14954 /* -Wformat-overflow workaround, without which gcc thinks that %u
14955 might produce 10 digits. Note that -Wformat-overflow will not
14956 currently warn here for str[], so do not rely on a warning to
14957 ensure str[] is correctly sized. */
14958 gcc_assert (funop <= MAX_RECOG_OPERANDS);
14959
14960 /* Currently, funop is either 0 or 1. The maximum string is always
14961 a !speculate 64-bit __tls_get_addr call.
14962
14963 ABI_ELFv2, pcrel:
14964 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14965 . 35 .reloc .,R_PPC64_PLTSEQ_NOTOC,%z1\n\t
14966 . 9 crset 2\n\t
14967 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14968 . 36 .reloc .,R_PPC64_PLTCALL_NOTOC,%z1\n\t
14969 . 8 beq%T1l-
14970 .---
14971 .142
14972
14973 ABI_AIX:
14974 . 9 ld 2,%3\n\t
14975 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14976 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
14977 . 9 crset 2\n\t
14978 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14979 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
14980 . 10 beq%T1l-\n\t
14981 . 10 ld 2,%4(1)
14982 .---
14983 .151
14984
14985 ABI_ELFv2:
14986 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14987 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
14988 . 9 crset 2\n\t
14989 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14990 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
14991 . 10 beq%T1l-\n\t
14992 . 10 ld 2,%3(1)
14993 .---
14994 .142
14995
14996 ABI_V4:
14997 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14998 . 35 .reloc .,R_PPC64_PLTSEQ,%z1+32768\n\t
14999 . 9 crset 2\n\t
15000 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
15001 . 36 .reloc .,R_PPC64_PLTCALL,%z1+32768\n\t
15002 . 8 beq%T1l-
15003 .---
15004 .141 */
15005 static char str[160]; /* 8 spare */
15006 char *s = str;
15007 const char *ptrload = TARGET_64BIT ? "d" : "wz";
15008
15009 if (DEFAULT_ABI == ABI_AIX)
15010 s += sprintf (s,
15011 "l%s 2,%%%u\n\t",
15012 ptrload, funop + 3);
15013
15014 /* We don't need the extra code to stop indirect call speculation if
15015 calling via LR. */
15016 bool speculate = (TARGET_MACHO
15017 || rs6000_speculate_indirect_jumps
15018 || (REG_P (operands[funop])
15019 && REGNO (operands[funop]) == LR_REGNO));
15020
15021 if (TARGET_PLTSEQ && GET_CODE (operands[funop]) == UNSPEC)
15022 {
15023 const char *rel64 = TARGET_64BIT ? "64" : "";
15024 char tls[29];
15025 tls[0] = 0;
15026 if (GET_CODE (operands[funop + 1]) == UNSPEC)
15027 {
15028 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
15029 sprintf (tls, ".reloc .,R_PPC%s_TLSGD,%%%u\n\t",
15030 rel64, funop + 1);
15031 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
15032 sprintf (tls, ".reloc .,R_PPC%s_TLSLD,%%&\n\t",
15033 rel64);
15034 }
15035
15036 const char *notoc = rs6000_pcrel_p () ? "_NOTOC" : "";
15037 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
15038 && flag_pic == 2 ? "+32768" : "");
15039 if (!speculate)
15040 {
15041 s += sprintf (s,
15042 "%s.reloc .,R_PPC%s_PLTSEQ%s,%%z%u%s\n\t",
15043 tls, rel64, notoc, funop, addend);
15044 s += sprintf (s, "crset 2\n\t");
15045 }
15046 s += sprintf (s,
15047 "%s.reloc .,R_PPC%s_PLTCALL%s,%%z%u%s\n\t",
15048 tls, rel64, notoc, funop, addend);
15049 }
15050 else if (!speculate)
15051 s += sprintf (s, "crset 2\n\t");
15052
15053 if (rs6000_pcrel_p ())
15054 {
15055 if (speculate)
15056 sprintf (s, "b%%T%ul", funop);
15057 else
15058 sprintf (s, "beq%%T%ul-", funop);
15059 }
15060 else if (DEFAULT_ABI == ABI_AIX)
15061 {
15062 if (speculate)
15063 sprintf (s,
15064 "b%%T%ul\n\t"
15065 "l%s 2,%%%u(1)",
15066 funop, ptrload, funop + 4);
15067 else
15068 sprintf (s,
15069 "beq%%T%ul-\n\t"
15070 "l%s 2,%%%u(1)",
15071 funop, ptrload, funop + 4);
15072 }
15073 else if (DEFAULT_ABI == ABI_ELFv2)
15074 {
15075 if (speculate)
15076 sprintf (s,
15077 "b%%T%ul\n\t"
15078 "l%s 2,%%%u(1)",
15079 funop, ptrload, funop + 3);
15080 else
15081 sprintf (s,
15082 "beq%%T%ul-\n\t"
15083 "l%s 2,%%%u(1)",
15084 funop, ptrload, funop + 3);
15085 }
15086 else
15087 {
15088 if (speculate)
15089 sprintf (s,
15090 "b%%T%u%s",
15091 funop, sibcall ? "" : "l");
15092 else
15093 sprintf (s,
15094 "beq%%T%u%s-%s",
15095 funop, sibcall ? "" : "l", sibcall ? "\n\tb $" : "");
15096 }
15097 return str;
15098 }
15099
15100 const char *
15101 rs6000_indirect_call_template (rtx *operands, unsigned int funop)
15102 {
15103 return rs6000_indirect_call_template_1 (operands, funop, false);
15104 }
15105
15106 const char *
15107 rs6000_indirect_sibcall_template (rtx *operands, unsigned int funop)
15108 {
15109 return rs6000_indirect_call_template_1 (operands, funop, true);
15110 }
15111
15112 #if HAVE_AS_PLTSEQ
15113 /* Output indirect call insns. WHICH identifies the type of sequence. */
15114 const char *
15115 rs6000_pltseq_template (rtx *operands, int which)
15116 {
15117 const char *rel64 = TARGET_64BIT ? "64" : "";
15118 char tls[30];
15119 tls[0] = 0;
15120 if (GET_CODE (operands[3]) == UNSPEC)
15121 {
15122 char off = which == RS6000_PLTSEQ_PLT_PCREL34 ? '8' : '4';
15123 if (XINT (operands[3], 1) == UNSPEC_TLSGD)
15124 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSGD,%%3\n\t",
15125 off, rel64);
15126 else if (XINT (operands[3], 1) == UNSPEC_TLSLD)
15127 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSLD,%%&\n\t",
15128 off, rel64);
15129 }
15130
15131 gcc_assert (DEFAULT_ABI == ABI_ELFv2 || DEFAULT_ABI == ABI_V4);
15132 static char str[96]; /* 10 spare */
15133 char off = WORDS_BIG_ENDIAN ? '2' : '4';
15134 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
15135 && flag_pic == 2 ? "+32768" : "");
15136 switch (which)
15137 {
15138 case RS6000_PLTSEQ_TOCSAVE:
15139 sprintf (str,
15140 "st%s\n\t"
15141 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2",
15142 TARGET_64BIT ? "d 2,24(1)" : "w 2,12(1)",
15143 tls, rel64);
15144 break;
15145 case RS6000_PLTSEQ_PLT16_HA:
15146 if (DEFAULT_ABI == ABI_V4 && !flag_pic)
15147 sprintf (str,
15148 "lis %%0,0\n\t"
15149 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2",
15150 tls, off, rel64);
15151 else
15152 sprintf (str,
15153 "addis %%0,%%1,0\n\t"
15154 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2%s",
15155 tls, off, rel64, addend);
15156 break;
15157 case RS6000_PLTSEQ_PLT16_LO:
15158 sprintf (str,
15159 "l%s %%0,0(%%1)\n\t"
15160 "%s.reloc .-%c,R_PPC%s_PLT16_LO%s,%%z2%s",
15161 TARGET_64BIT ? "d" : "wz",
15162 tls, off, rel64, TARGET_64BIT ? "_DS" : "", addend);
15163 break;
15164 case RS6000_PLTSEQ_MTCTR:
15165 sprintf (str,
15166 "mtctr %%1\n\t"
15167 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2%s",
15168 tls, rel64, addend);
15169 break;
15170 case RS6000_PLTSEQ_PLT_PCREL34:
15171 sprintf (str,
15172 "pl%s %%0,0(0),1\n\t"
15173 "%s.reloc .-8,R_PPC%s_PLT_PCREL34_NOTOC,%%z2",
15174 TARGET_64BIT ? "d" : "wz",
15175 tls, rel64);
15176 break;
15177 default:
15178 gcc_unreachable ();
15179 }
15180 return str;
15181 }
15182 #endif
15183 \f
15184 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
15185 /* Emit an assembler directive to set symbol visibility for DECL to
15186 VISIBILITY_TYPE. */
15187
15188 static void
15189 rs6000_assemble_visibility (tree decl, int vis)
15190 {
15191 if (TARGET_XCOFF)
15192 return;
15193
15194 /* Functions need to have their entry point symbol visibility set as
15195 well as their descriptor symbol visibility. */
15196 if (DEFAULT_ABI == ABI_AIX
15197 && DOT_SYMBOLS
15198 && TREE_CODE (decl) == FUNCTION_DECL)
15199 {
15200 static const char * const visibility_types[] = {
15201 NULL, "protected", "hidden", "internal"
15202 };
15203
15204 const char *name, *type;
15205
15206 name = ((* targetm.strip_name_encoding)
15207 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
15208 type = visibility_types[vis];
15209
15210 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
15211 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
15212 }
15213 else
15214 default_assemble_visibility (decl, vis);
15215 }
15216 #endif
15217 \f
15218 /* Write PATCH_AREA_SIZE NOPs into the asm outfile FILE around a function
15219 entry. If RECORD_P is true and the target supports named sections,
15220 the location of the NOPs will be recorded in a special object section
15221 called "__patchable_function_entries". This routine may be called
15222 twice per function to put NOPs before and after the function
15223 entry. */
15224
15225 void
15226 rs6000_print_patchable_function_entry (FILE *file,
15227 unsigned HOST_WIDE_INT patch_area_size,
15228 bool record_p)
15229 {
15230 bool global_entry_needed_p = rs6000_global_entry_point_prologue_needed_p ();
15231 /* For a function which needs global entry point, we will emit the
15232 patchable area before and after local entry point under the control of
15233 cfun->machine->global_entry_emitted, see the handling in function
15234 rs6000_output_function_prologue. */
15235 if (!global_entry_needed_p || cfun->machine->global_entry_emitted)
15236 default_print_patchable_function_entry (file, patch_area_size, record_p);
15237 }
15238 \f
15239 enum rtx_code
15240 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
15241 {
15242 /* Reversal of FP compares takes care -- an ordered compare
15243 becomes an unordered compare and vice versa. */
15244 if (mode == CCFPmode
15245 && (!flag_finite_math_only
15246 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
15247 || code == UNEQ || code == LTGT))
15248 return reverse_condition_maybe_unordered (code);
15249 else
15250 return reverse_condition (code);
15251 }
15252
15253 /* Check if C (as 64bit integer) can be rotated to a constant which constains
15254 nonzero bits at the LOWBITS low bits only.
15255
15256 Return true if C can be rotated to such constant. If so, *ROT is written
15257 to the number by which C is rotated.
15258 Return false otherwise. */
15259
15260 bool
15261 can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int *rot)
15262 {
15263 int clz = HOST_BITS_PER_WIDE_INT - lowbits;
15264
15265 /* case a. 0..0xxx: already at least clz zeros. */
15266 int lz = clz_hwi (c);
15267 if (lz >= clz)
15268 {
15269 *rot = 0;
15270 return true;
15271 }
15272
15273 /* case b. 0..0xxx0..0: at least clz zeros. */
15274 int tz = ctz_hwi (c);
15275 if (lz + tz >= clz)
15276 {
15277 *rot = HOST_BITS_PER_WIDE_INT - tz;
15278 return true;
15279 }
15280
15281 /* case c. xx10.....0xx: rotate 'clz - 1' bits first, then check case b.
15282 ^bit -> Vbit, , then zeros are at head or tail.
15283 00...00xxx100, 'clz - 1' >= 'bits of xxxx'. */
15284 const int rot_bits = lowbits + 1;
15285 unsigned HOST_WIDE_INT rc = (c >> rot_bits) | (c << (clz - 1));
15286 tz = ctz_hwi (rc);
15287 if (clz_hwi (rc) + tz >= clz)
15288 {
15289 *rot = HOST_BITS_PER_WIDE_INT - (tz + rot_bits);
15290 return true;
15291 }
15292
15293 return false;
15294 }
15295
15296 /* Check if C (as 64bit integer) can be rotated to a positive 16bits constant
15297 which contains 48bits leading zeros and 16bits of any value. */
15298
15299 bool
15300 can_be_rotated_to_positive_16bits (HOST_WIDE_INT c)
15301 {
15302 int rot = 0;
15303 bool res = can_be_rotated_to_lowbits (c, 16, &rot);
15304 return res && rot > 0;
15305 }
15306
15307 /* Check if C (as 64bit integer) can be rotated to a negative 15bits constant
15308 which contains 49bits leading ones and 15bits of any value. */
15309
15310 bool
15311 can_be_rotated_to_negative_15bits (HOST_WIDE_INT c)
15312 {
15313 int rot = 0;
15314 bool res = can_be_rotated_to_lowbits (~c, 15, &rot);
15315 return res && rot > 0;
15316 }
15317
15318 /* Generate a compare for CODE. Return a brand-new rtx that
15319 represents the result of the compare. */
15320
15321 static rtx
15322 rs6000_generate_compare (rtx cmp, machine_mode mode)
15323 {
15324 machine_mode comp_mode;
15325 rtx compare_result;
15326 enum rtx_code code = GET_CODE (cmp);
15327 rtx op0 = XEXP (cmp, 0);
15328 rtx op1 = XEXP (cmp, 1);
15329
15330 if (!TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode))
15331 comp_mode = CCmode;
15332 else if (FLOAT_MODE_P (mode))
15333 comp_mode = CCFPmode;
15334 else if (code == GTU || code == LTU
15335 || code == GEU || code == LEU)
15336 comp_mode = CCUNSmode;
15337 else if ((code == EQ || code == NE)
15338 && unsigned_reg_p (op0)
15339 && (unsigned_reg_p (op1)
15340 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
15341 /* These are unsigned values, perhaps there will be a later
15342 ordering compare that can be shared with this one. */
15343 comp_mode = CCUNSmode;
15344 else
15345 comp_mode = CCmode;
15346
15347 /* If we have an unsigned compare, make sure we don't have a signed value as
15348 an immediate. */
15349 if (comp_mode == CCUNSmode && CONST_INT_P (op1)
15350 && INTVAL (op1) < 0)
15351 {
15352 op0 = copy_rtx_if_shared (op0);
15353 op1 = force_reg (GET_MODE (op0), op1);
15354 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
15355 }
15356
15357 /* First, the compare. */
15358 compare_result = gen_reg_rtx (comp_mode);
15359
15360 /* IEEE 128-bit support in VSX registers when we do not have hardware
15361 support. */
15362 if (!TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode))
15363 {
15364 rtx libfunc = NULL_RTX;
15365 bool check_nan = false;
15366 rtx dest;
15367
15368 switch (code)
15369 {
15370 case EQ:
15371 case NE:
15372 libfunc = optab_libfunc (eq_optab, mode);
15373 break;
15374
15375 case GT:
15376 case GE:
15377 libfunc = optab_libfunc (ge_optab, mode);
15378 break;
15379
15380 case LT:
15381 case LE:
15382 libfunc = optab_libfunc (le_optab, mode);
15383 break;
15384
15385 case UNORDERED:
15386 case ORDERED:
15387 libfunc = optab_libfunc (unord_optab, mode);
15388 code = (code == UNORDERED) ? NE : EQ;
15389 break;
15390
15391 case UNGE:
15392 case UNGT:
15393 check_nan = true;
15394 libfunc = optab_libfunc (ge_optab, mode);
15395 code = (code == UNGE) ? GE : GT;
15396 break;
15397
15398 case UNLE:
15399 case UNLT:
15400 check_nan = true;
15401 libfunc = optab_libfunc (le_optab, mode);
15402 code = (code == UNLE) ? LE : LT;
15403 break;
15404
15405 case UNEQ:
15406 case LTGT:
15407 check_nan = true;
15408 libfunc = optab_libfunc (eq_optab, mode);
15409 code = (code = UNEQ) ? EQ : NE;
15410 break;
15411
15412 default:
15413 gcc_unreachable ();
15414 }
15415
15416 gcc_assert (libfunc);
15417
15418 if (!check_nan)
15419 dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
15420 SImode, op0, mode, op1, mode);
15421
15422 /* The library signals an exception for signalling NaNs, so we need to
15423 handle isgreater, etc. by first checking isordered. */
15424 else
15425 {
15426 rtx ne_rtx, normal_dest, unord_dest;
15427 rtx unord_func = optab_libfunc (unord_optab, mode);
15428 rtx join_label = gen_label_rtx ();
15429 rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label);
15430 rtx unord_cmp = gen_reg_rtx (comp_mode);
15431
15432
15433 /* Test for either value being a NaN. */
15434 gcc_assert (unord_func);
15435 unord_dest = emit_library_call_value (unord_func, NULL_RTX, LCT_CONST,
15436 SImode, op0, mode, op1, mode);
15437
15438 /* Set value (0) if either value is a NaN, and jump to the join
15439 label. */
15440 dest = gen_reg_rtx (SImode);
15441 emit_move_insn (dest, const1_rtx);
15442 emit_insn (gen_rtx_SET (unord_cmp,
15443 gen_rtx_COMPARE (comp_mode, unord_dest,
15444 const0_rtx)));
15445
15446 ne_rtx = gen_rtx_NE (comp_mode, unord_cmp, const0_rtx);
15447 emit_jump_insn (gen_rtx_SET (pc_rtx,
15448 gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
15449 join_ref,
15450 pc_rtx)));
15451
15452 /* Do the normal comparison, knowing that the values are not
15453 NaNs. */
15454 normal_dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
15455 SImode, op0, mode, op1, mode);
15456
15457 emit_insn (gen_cstoresi4 (dest,
15458 gen_rtx_fmt_ee (code, SImode, normal_dest,
15459 const0_rtx),
15460 normal_dest, const0_rtx));
15461
15462 /* Join NaN and non-Nan paths. Compare dest against 0. */
15463 emit_label (join_label);
15464 code = NE;
15465 }
15466
15467 emit_insn (gen_rtx_SET (compare_result,
15468 gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
15469 }
15470
15471 else
15472 {
15473 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
15474 CLOBBERs to match cmptf_internal2 pattern. */
15475 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
15476 && FLOAT128_IBM_P (GET_MODE (op0))
15477 && TARGET_HARD_FLOAT)
15478 emit_insn (gen_rtx_PARALLEL (VOIDmode,
15479 gen_rtvec (10,
15480 gen_rtx_SET (compare_result,
15481 gen_rtx_COMPARE (comp_mode, op0, op1)),
15482 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15483 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15484 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15485 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15486 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15487 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15488 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15489 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15490 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
15491 else if (GET_CODE (op1) == UNSPEC
15492 && XINT (op1, 1) == UNSPEC_SP_TEST)
15493 {
15494 rtx op1b = XVECEXP (op1, 0, 0);
15495 comp_mode = CCEQmode;
15496 compare_result = gen_reg_rtx (CCEQmode);
15497 if (TARGET_64BIT)
15498 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
15499 else
15500 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
15501 }
15502 else if (mode == V16QImode)
15503 {
15504 gcc_assert (code == EQ || code == NE);
15505
15506 rtx result_vector = gen_reg_rtx (V16QImode);
15507 rtx cc_bit = gen_reg_rtx (SImode);
15508 emit_insn (gen_altivec_vcmpequb_p (result_vector, op0, op1));
15509 emit_insn (gen_cr6_test_for_lt (cc_bit));
15510 emit_insn (gen_rtx_SET (compare_result,
15511 gen_rtx_COMPARE (comp_mode, cc_bit,
15512 const1_rtx)));
15513 }
15514 else
15515 emit_insn (gen_rtx_SET (compare_result,
15516 gen_rtx_COMPARE (comp_mode, op0, op1)));
15517 }
15518
15519 validate_condition_mode (code, GET_MODE (compare_result));
15520
15521 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
15522 }
15523
15524 \f
15525 /* Return the diagnostic message string if the binary operation OP is
15526 not permitted on TYPE1 and TYPE2, NULL otherwise. */
15527
15528 static const char*
15529 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED,
15530 const_tree type1,
15531 const_tree type2)
15532 {
15533 machine_mode mode1 = TYPE_MODE (type1);
15534 machine_mode mode2 = TYPE_MODE (type2);
15535
15536 /* For complex modes, use the inner type. */
15537 if (COMPLEX_MODE_P (mode1))
15538 mode1 = GET_MODE_INNER (mode1);
15539
15540 if (COMPLEX_MODE_P (mode2))
15541 mode2 = GET_MODE_INNER (mode2);
15542
15543 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
15544 double to intermix unless -mfloat128-convert. */
15545 if (mode1 == mode2)
15546 return NULL;
15547
15548 if (!TARGET_FLOAT128_CVT)
15549 {
15550 if ((FLOAT128_IEEE_P (mode1) && FLOAT128_IBM_P (mode2))
15551 || (FLOAT128_IBM_P (mode1) && FLOAT128_IEEE_P (mode2)))
15552 return N_("Invalid mixing of IEEE 128-bit and IBM 128-bit floating "
15553 "point types");
15554 }
15555
15556 return NULL;
15557 }
15558
15559 \f
15560 /* Expand floating point conversion to/from __float128 and __ibm128. */
15561
15562 void
15563 rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
15564 {
15565 machine_mode dest_mode = GET_MODE (dest);
15566 machine_mode src_mode = GET_MODE (src);
15567 convert_optab cvt = unknown_optab;
15568 bool do_move = false;
15569 rtx libfunc = NULL_RTX;
15570 rtx dest2;
15571 typedef rtx (*rtx_2func_t) (rtx, rtx);
15572 rtx_2func_t hw_convert = (rtx_2func_t)0;
15573 size_t kf_or_tf;
15574
15575 struct hw_conv_t {
15576 rtx_2func_t from_df;
15577 rtx_2func_t from_sf;
15578 rtx_2func_t from_si_sign;
15579 rtx_2func_t from_si_uns;
15580 rtx_2func_t from_di_sign;
15581 rtx_2func_t from_di_uns;
15582 rtx_2func_t to_df;
15583 rtx_2func_t to_sf;
15584 rtx_2func_t to_si_sign;
15585 rtx_2func_t to_si_uns;
15586 rtx_2func_t to_di_sign;
15587 rtx_2func_t to_di_uns;
15588 } hw_conversions[2] = {
15589 /* convertions to/from KFmode */
15590 {
15591 gen_extenddfkf2_hw, /* KFmode <- DFmode. */
15592 gen_extendsfkf2_hw, /* KFmode <- SFmode. */
15593 gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */
15594 gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */
15595 gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */
15596 gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */
15597 gen_trunckfdf2_hw, /* DFmode <- KFmode. */
15598 gen_trunckfsf2_hw, /* SFmode <- KFmode. */
15599 gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */
15600 gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */
15601 gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */
15602 gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */
15603 },
15604
15605 /* convertions to/from TFmode */
15606 {
15607 gen_extenddftf2_hw, /* TFmode <- DFmode. */
15608 gen_extendsftf2_hw, /* TFmode <- SFmode. */
15609 gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */
15610 gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */
15611 gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */
15612 gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */
15613 gen_trunctfdf2_hw, /* DFmode <- TFmode. */
15614 gen_trunctfsf2_hw, /* SFmode <- TFmode. */
15615 gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */
15616 gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */
15617 gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */
15618 gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */
15619 },
15620 };
15621
15622 if (dest_mode == src_mode)
15623 gcc_unreachable ();
15624
15625 /* Eliminate memory operations. */
15626 if (MEM_P (src))
15627 src = force_reg (src_mode, src);
15628
15629 if (MEM_P (dest))
15630 {
15631 rtx tmp = gen_reg_rtx (dest_mode);
15632 rs6000_expand_float128_convert (tmp, src, unsigned_p);
15633 rs6000_emit_move (dest, tmp, dest_mode);
15634 return;
15635 }
15636
15637 /* Convert to IEEE 128-bit floating point. */
15638 if (FLOAT128_IEEE_P (dest_mode))
15639 {
15640 if (dest_mode == KFmode)
15641 kf_or_tf = 0;
15642 else if (dest_mode == TFmode)
15643 kf_or_tf = 1;
15644 else
15645 gcc_unreachable ();
15646
15647 switch (src_mode)
15648 {
15649 case E_DFmode:
15650 cvt = sext_optab;
15651 hw_convert = hw_conversions[kf_or_tf].from_df;
15652 break;
15653
15654 case E_SFmode:
15655 cvt = sext_optab;
15656 hw_convert = hw_conversions[kf_or_tf].from_sf;
15657 break;
15658
15659 case E_KFmode:
15660 case E_IFmode:
15661 case E_TFmode:
15662 if (FLOAT128_IBM_P (src_mode))
15663 cvt = trunc_optab;
15664 else
15665 do_move = true;
15666 break;
15667
15668 case E_SImode:
15669 if (unsigned_p)
15670 {
15671 cvt = ufloat_optab;
15672 hw_convert = hw_conversions[kf_or_tf].from_si_uns;
15673 }
15674 else
15675 {
15676 cvt = sfloat_optab;
15677 hw_convert = hw_conversions[kf_or_tf].from_si_sign;
15678 }
15679 break;
15680
15681 case E_DImode:
15682 if (unsigned_p)
15683 {
15684 cvt = ufloat_optab;
15685 hw_convert = hw_conversions[kf_or_tf].from_di_uns;
15686 }
15687 else
15688 {
15689 cvt = sfloat_optab;
15690 hw_convert = hw_conversions[kf_or_tf].from_di_sign;
15691 }
15692 break;
15693
15694 default:
15695 gcc_unreachable ();
15696 }
15697 }
15698
15699 /* Convert from IEEE 128-bit floating point. */
15700 else if (FLOAT128_IEEE_P (src_mode))
15701 {
15702 if (src_mode == KFmode)
15703 kf_or_tf = 0;
15704 else if (src_mode == TFmode)
15705 kf_or_tf = 1;
15706 else
15707 gcc_unreachable ();
15708
15709 switch (dest_mode)
15710 {
15711 case E_DFmode:
15712 cvt = trunc_optab;
15713 hw_convert = hw_conversions[kf_or_tf].to_df;
15714 break;
15715
15716 case E_SFmode:
15717 cvt = trunc_optab;
15718 hw_convert = hw_conversions[kf_or_tf].to_sf;
15719 break;
15720
15721 case E_KFmode:
15722 case E_IFmode:
15723 case E_TFmode:
15724 if (FLOAT128_IBM_P (dest_mode))
15725 cvt = sext_optab;
15726 else
15727 do_move = true;
15728 break;
15729
15730 case E_SImode:
15731 if (unsigned_p)
15732 {
15733 cvt = ufix_optab;
15734 hw_convert = hw_conversions[kf_or_tf].to_si_uns;
15735 }
15736 else
15737 {
15738 cvt = sfix_optab;
15739 hw_convert = hw_conversions[kf_or_tf].to_si_sign;
15740 }
15741 break;
15742
15743 case E_DImode:
15744 if (unsigned_p)
15745 {
15746 cvt = ufix_optab;
15747 hw_convert = hw_conversions[kf_or_tf].to_di_uns;
15748 }
15749 else
15750 {
15751 cvt = sfix_optab;
15752 hw_convert = hw_conversions[kf_or_tf].to_di_sign;
15753 }
15754 break;
15755
15756 default:
15757 gcc_unreachable ();
15758 }
15759 }
15760
15761 /* Both IBM format. */
15762 else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode))
15763 do_move = true;
15764
15765 else
15766 gcc_unreachable ();
15767
15768 /* Handle conversion between TFmode/KFmode/IFmode. */
15769 if (do_move)
15770 emit_insn (gen_rtx_SET (dest, gen_rtx_FLOAT_EXTEND (dest_mode, src)));
15771
15772 /* Handle conversion if we have hardware support. */
15773 else if (TARGET_FLOAT128_HW && hw_convert)
15774 emit_insn ((hw_convert) (dest, src));
15775
15776 /* Call an external function to do the conversion. */
15777 else if (cvt != unknown_optab)
15778 {
15779 libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
15780 gcc_assert (libfunc != NULL_RTX);
15781
15782 dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode,
15783 src, src_mode);
15784
15785 gcc_assert (dest2 != NULL_RTX);
15786 if (!rtx_equal_p (dest, dest2))
15787 emit_move_insn (dest, dest2);
15788 }
15789
15790 else
15791 gcc_unreachable ();
15792
15793 return;
15794 }
15795
15796 \f
15797 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
15798 can be used as that dest register. Return the dest register. */
15799
15800 rtx
15801 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
15802 {
15803 if (op2 == const0_rtx)
15804 return op1;
15805
15806 if (GET_CODE (scratch) == SCRATCH)
15807 scratch = gen_reg_rtx (mode);
15808
15809 if (logical_operand (op2, mode))
15810 emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
15811 else
15812 emit_insn (gen_rtx_SET (scratch,
15813 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
15814
15815 return scratch;
15816 }
15817
15818 /* Emit code doing a cror of two CR bits, for FP comparisons with a CODE that
15819 requires this. The result is mode MODE. */
15820 rtx
15821 rs6000_emit_fp_cror (rtx_code code, machine_mode mode, rtx x)
15822 {
15823 rtx cond[2];
15824 int n = 0;
15825 if (code == LTGT || code == LE || code == UNLT)
15826 cond[n++] = gen_rtx_fmt_ee (LT, mode, x, const0_rtx);
15827 if (code == LTGT || code == GE || code == UNGT)
15828 cond[n++] = gen_rtx_fmt_ee (GT, mode, x, const0_rtx);
15829 if (code == LE || code == GE || code == UNEQ)
15830 cond[n++] = gen_rtx_fmt_ee (EQ, mode, x, const0_rtx);
15831 if (code == UNLT || code == UNGT || code == UNEQ)
15832 cond[n++] = gen_rtx_fmt_ee (UNORDERED, mode, x, const0_rtx);
15833
15834 gcc_assert (n == 2);
15835
15836 rtx cc = gen_reg_rtx (CCEQmode);
15837 rtx logical = gen_rtx_IOR (mode, cond[0], cond[1]);
15838 emit_insn (gen_cceq_ior_compare (mode, cc, logical, cond[0], x, cond[1], x));
15839
15840 return cc;
15841 }
15842
15843 void
15844 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
15845 {
15846 rtx condition_rtx = rs6000_generate_compare (operands[1], mode);
15847 rtx_code cond_code = GET_CODE (condition_rtx);
15848
15849 if (FLOAT_MODE_P (mode) && HONOR_NANS (mode)
15850 && !(FLOAT128_VECTOR_P (mode) && !TARGET_FLOAT128_HW))
15851 ;
15852 else if (cond_code == NE
15853 || cond_code == GE || cond_code == LE
15854 || cond_code == GEU || cond_code == LEU
15855 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
15856 {
15857 rtx not_result = gen_reg_rtx (CCEQmode);
15858 rtx not_op, rev_cond_rtx;
15859 machine_mode cc_mode;
15860
15861 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
15862
15863 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
15864 SImode, XEXP (condition_rtx, 0), const0_rtx);
15865 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
15866 emit_insn (gen_rtx_SET (not_result, not_op));
15867 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
15868 }
15869
15870 machine_mode op_mode = GET_MODE (XEXP (operands[1], 0));
15871 if (op_mode == VOIDmode)
15872 op_mode = GET_MODE (XEXP (operands[1], 1));
15873
15874 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
15875 {
15876 PUT_MODE (condition_rtx, DImode);
15877 convert_move (operands[0], condition_rtx, 0);
15878 }
15879 else
15880 {
15881 PUT_MODE (condition_rtx, SImode);
15882 emit_insn (gen_rtx_SET (operands[0], condition_rtx));
15883 }
15884 }
15885
15886 /* Emit a branch of kind CODE to location LOC. */
15887
15888 void
15889 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
15890 {
15891 rtx condition_rtx = rs6000_generate_compare (operands[0], mode);
15892 rtx loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
15893 rtx ite = gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx, loc_ref, pc_rtx);
15894 emit_jump_insn (gen_rtx_SET (pc_rtx, ite));
15895 }
15896
15897 /* Return the string to output a conditional branch to LABEL, which is
15898 the operand template of the label, or NULL if the branch is really a
15899 conditional return.
15900
15901 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
15902 condition code register and its mode specifies what kind of
15903 comparison we made.
15904
15905 REVERSED is nonzero if we should reverse the sense of the comparison.
15906
15907 INSN is the insn. */
15908
15909 char *
15910 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
15911 {
15912 static char string[64];
15913 enum rtx_code code = GET_CODE (op);
15914 rtx cc_reg = XEXP (op, 0);
15915 machine_mode mode = GET_MODE (cc_reg);
15916 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
15917 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
15918 int really_reversed = reversed ^ need_longbranch;
15919 char *s = string;
15920 const char *ccode;
15921 const char *pred;
15922 rtx note;
15923
15924 validate_condition_mode (code, mode);
15925
15926 /* Work out which way this really branches. We could use
15927 reverse_condition_maybe_unordered here always but this
15928 makes the resulting assembler clearer. */
15929 if (really_reversed)
15930 {
15931 /* Reversal of FP compares takes care -- an ordered compare
15932 becomes an unordered compare and vice versa. */
15933 if (mode == CCFPmode)
15934 code = reverse_condition_maybe_unordered (code);
15935 else
15936 code = reverse_condition (code);
15937 }
15938
15939 switch (code)
15940 {
15941 /* Not all of these are actually distinct opcodes, but
15942 we distinguish them for clarity of the resulting assembler. */
15943 case NE: case LTGT:
15944 ccode = "ne"; break;
15945 case EQ: case UNEQ:
15946 ccode = "eq"; break;
15947 case GE: case GEU:
15948 ccode = "ge"; break;
15949 case GT: case GTU: case UNGT:
15950 ccode = "gt"; break;
15951 case LE: case LEU:
15952 ccode = "le"; break;
15953 case LT: case LTU: case UNLT:
15954 ccode = "lt"; break;
15955 case UNORDERED: ccode = "un"; break;
15956 case ORDERED: ccode = "nu"; break;
15957 case UNGE: ccode = "nl"; break;
15958 case UNLE: ccode = "ng"; break;
15959 default:
15960 gcc_unreachable ();
15961 }
15962
15963 /* Maybe we have a guess as to how likely the branch is. */
15964 pred = "";
15965 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
15966 if (note != NULL_RTX)
15967 {
15968 /* PROB is the difference from 50%. */
15969 int prob = profile_probability::from_reg_br_prob_note (XINT (note, 0))
15970 .to_reg_br_prob_base () - REG_BR_PROB_BASE / 2;
15971
15972 /* Only hint for highly probable/improbable branches on newer cpus when
15973 we have real profile data, as static prediction overrides processor
15974 dynamic prediction. For older cpus we may as well always hint, but
15975 assume not taken for branches that are very close to 50% as a
15976 mispredicted taken branch is more expensive than a
15977 mispredicted not-taken branch. */
15978 if (rs6000_always_hint
15979 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
15980 && (profile_status_for_fn (cfun) != PROFILE_GUESSED)
15981 && br_prob_note_reliable_p (note)))
15982 {
15983 if (abs (prob) > REG_BR_PROB_BASE / 20
15984 && ((prob > 0) ^ need_longbranch))
15985 pred = "+";
15986 else
15987 pred = "-";
15988 }
15989 }
15990
15991 if (label == NULL)
15992 s += sprintf (s, "b%slr%s ", ccode, pred);
15993 else
15994 s += sprintf (s, "b%s%s ", ccode, pred);
15995
15996 /* We need to escape any '%' characters in the reg_names string.
15997 Assume they'd only be the first character.... */
15998 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
15999 *s++ = '%';
16000 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
16001
16002 if (label != NULL)
16003 {
16004 /* If the branch distance was too far, we may have to use an
16005 unconditional branch to go the distance. */
16006 if (need_longbranch)
16007 s += sprintf (s, ",$+8\n\tb %s", label);
16008 else
16009 s += sprintf (s, ",%s", label);
16010 }
16011
16012 return string;
16013 }
16014
16015 /* Return insn for VSX or Altivec comparisons. */
16016
16017 static rtx
16018 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
16019 {
16020 rtx mask;
16021 machine_mode mode = GET_MODE (op0);
16022
16023 switch (code)
16024 {
16025 default:
16026 break;
16027
16028 case GE:
16029 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
16030 return NULL_RTX;
16031 /* FALLTHRU */
16032
16033 case EQ:
16034 case GT:
16035 case GTU:
16036 case ORDERED:
16037 case UNORDERED:
16038 case UNEQ:
16039 case LTGT:
16040 mask = gen_reg_rtx (mode);
16041 emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
16042 return mask;
16043 }
16044
16045 return NULL_RTX;
16046 }
16047
16048 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
16049 DMODE is expected destination mode. This is a recursive function. */
16050
16051 static rtx
16052 rs6000_emit_vector_compare (enum rtx_code rcode,
16053 rtx op0, rtx op1,
16054 machine_mode dmode)
16055 {
16056 rtx mask;
16057 bool swap_operands = false;
16058 bool try_again = false;
16059
16060 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
16061 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
16062
16063 /* See if the comparison works as is. */
16064 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
16065 if (mask)
16066 return mask;
16067
16068 switch (rcode)
16069 {
16070 case LT:
16071 rcode = GT;
16072 swap_operands = true;
16073 try_again = true;
16074 break;
16075 case LTU:
16076 rcode = GTU;
16077 swap_operands = true;
16078 try_again = true;
16079 break;
16080 case NE:
16081 case UNLE:
16082 case UNLT:
16083 case UNGE:
16084 case UNGT:
16085 /* Invert condition and try again.
16086 e.g., A != B becomes ~(A==B). */
16087 {
16088 enum rtx_code rev_code;
16089 enum insn_code nor_code;
16090 rtx mask2;
16091
16092 rev_code = reverse_condition_maybe_unordered (rcode);
16093 if (rev_code == UNKNOWN)
16094 return NULL_RTX;
16095
16096 nor_code = optab_handler (one_cmpl_optab, dmode);
16097 if (nor_code == CODE_FOR_nothing)
16098 return NULL_RTX;
16099
16100 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
16101 if (!mask2)
16102 return NULL_RTX;
16103
16104 mask = gen_reg_rtx (dmode);
16105 emit_insn (GEN_FCN (nor_code) (mask, mask2));
16106 return mask;
16107 }
16108 break;
16109 case GE:
16110 case GEU:
16111 case LE:
16112 case LEU:
16113 /* Try GT/GTU/LT/LTU OR EQ */
16114 {
16115 rtx c_rtx, eq_rtx;
16116 enum insn_code ior_code;
16117 enum rtx_code new_code;
16118
16119 switch (rcode)
16120 {
16121 case GE:
16122 new_code = GT;
16123 break;
16124
16125 case GEU:
16126 new_code = GTU;
16127 break;
16128
16129 case LE:
16130 new_code = LT;
16131 break;
16132
16133 case LEU:
16134 new_code = LTU;
16135 break;
16136
16137 default:
16138 gcc_unreachable ();
16139 }
16140
16141 ior_code = optab_handler (ior_optab, dmode);
16142 if (ior_code == CODE_FOR_nothing)
16143 return NULL_RTX;
16144
16145 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
16146 if (!c_rtx)
16147 return NULL_RTX;
16148
16149 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
16150 if (!eq_rtx)
16151 return NULL_RTX;
16152
16153 mask = gen_reg_rtx (dmode);
16154 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
16155 return mask;
16156 }
16157 break;
16158 default:
16159 return NULL_RTX;
16160 }
16161
16162 if (try_again)
16163 {
16164 if (swap_operands)
16165 std::swap (op0, op1);
16166
16167 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
16168 if (mask)
16169 return mask;
16170 }
16171
16172 /* You only get two chances. */
16173 return NULL_RTX;
16174 }
16175
16176 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
16177 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
16178 operands for the relation operation COND. */
16179
16180 static int
16181 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
16182 rtx cond, rtx cc_op0, rtx cc_op1)
16183 {
16184 machine_mode dest_mode = GET_MODE (dest);
16185 machine_mode mask_mode = GET_MODE (cc_op0);
16186 enum rtx_code rcode = GET_CODE (cond);
16187 rtx mask;
16188 bool invert_move = false;
16189
16190 if (VECTOR_UNIT_NONE_P (dest_mode))
16191 return 0;
16192
16193 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
16194 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
16195
16196 switch (rcode)
16197 {
16198 /* Swap operands if we can, and fall back to doing the operation as
16199 specified, and doing a NOR to invert the test. */
16200 case NE:
16201 case UNLE:
16202 case UNLT:
16203 case UNGE:
16204 case UNGT:
16205 /* Invert condition and try again.
16206 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
16207 invert_move = true;
16208 rcode = reverse_condition_maybe_unordered (rcode);
16209 if (rcode == UNKNOWN)
16210 return 0;
16211 break;
16212
16213 case GE:
16214 case LE:
16215 if (GET_MODE_CLASS (mask_mode) == MODE_VECTOR_INT)
16216 {
16217 /* Invert condition to avoid compound test. */
16218 invert_move = true;
16219 rcode = reverse_condition (rcode);
16220 }
16221 break;
16222
16223 case GTU:
16224 case GEU:
16225 case LTU:
16226 case LEU:
16227
16228 /* Invert condition to avoid compound test if necessary. */
16229 if (rcode == GEU || rcode == LEU)
16230 {
16231 invert_move = true;
16232 rcode = reverse_condition (rcode);
16233 }
16234 break;
16235
16236 default:
16237 break;
16238 }
16239
16240 /* Get the vector mask for the given relational operations. */
16241 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
16242
16243 if (!mask)
16244 return 0;
16245
16246 if (mask_mode != dest_mode)
16247 mask = simplify_gen_subreg (dest_mode, mask, mask_mode, 0);
16248
16249 if (invert_move)
16250 std::swap (op_true, op_false);
16251
16252 /* Optimize vec1 == vec2, to know the mask generates -1/0. */
16253 if (GET_MODE_CLASS (dest_mode) == MODE_VECTOR_INT
16254 && (GET_CODE (op_true) == CONST_VECTOR
16255 || GET_CODE (op_false) == CONST_VECTOR))
16256 {
16257 rtx constant_0 = CONST0_RTX (dest_mode);
16258 rtx constant_m1 = CONSTM1_RTX (dest_mode);
16259
16260 if (op_true == constant_m1 && op_false == constant_0)
16261 {
16262 emit_move_insn (dest, mask);
16263 return 1;
16264 }
16265
16266 else if (op_true == constant_0 && op_false == constant_m1)
16267 {
16268 emit_insn (gen_rtx_SET (dest, gen_rtx_NOT (dest_mode, mask)));
16269 return 1;
16270 }
16271
16272 /* If we can't use the vector comparison directly, perhaps we can use
16273 the mask for the true or false fields, instead of loading up a
16274 constant. */
16275 if (op_true == constant_m1)
16276 op_true = mask;
16277
16278 if (op_false == constant_0)
16279 op_false = mask;
16280 }
16281
16282 if (!REG_P (op_true) && !SUBREG_P (op_true))
16283 op_true = force_reg (dest_mode, op_true);
16284
16285 if (!REG_P (op_false) && !SUBREG_P (op_false))
16286 op_false = force_reg (dest_mode, op_false);
16287
16288 rtx tmp = gen_rtx_IOR (dest_mode,
16289 gen_rtx_AND (dest_mode, gen_rtx_NOT (dest_mode, mask),
16290 op_false),
16291 gen_rtx_AND (dest_mode, mask, op_true));
16292 emit_insn (gen_rtx_SET (dest, tmp));
16293 return 1;
16294 }
16295
16296 /* Possibly emit the xsmaxc{dp,qp} and xsminc{dp,qp} instructions to emit a
16297 maximum or minimum with "C" semantics.
16298
16299 Unless you use -ffast-math, you can't use these instructions to replace
16300 conditions that implicitly reverse the condition because the comparison
16301 might generate a NaN or signed zer0.
16302
16303 I.e. the following can be replaced all of the time
16304 ret = (op1 > op2) ? op1 : op2 ; generate xsmaxcdp
16305 ret = (op1 >= op2) ? op1 : op2 ; generate xsmaxcdp
16306 ret = (op1 < op2) ? op1 : op2; ; generate xsmincdp
16307 ret = (op1 <= op2) ? op1 : op2; ; generate xsmincdp
16308
16309 The following can be replaced only if -ffast-math is used:
16310 ret = (op1 < op2) ? op2 : op1 ; generate xsmaxcdp
16311 ret = (op1 <= op2) ? op2 : op1 ; generate xsmaxcdp
16312 ret = (op1 > op2) ? op2 : op1; ; generate xsmincdp
16313 ret = (op1 >= op2) ? op2 : op1; ; generate xsmincdp
16314
16315 Move TRUE_COND to DEST if OP of the operands of the last comparison is
16316 nonzero/true, FALSE_COND if it is zero/false.
16317
16318 Return false if we can't generate the appropriate minimum or maximum, and
16319 true if we can did the minimum or maximum. */
16320
16321 static bool
16322 rs6000_maybe_emit_maxc_minc (rtx dest, rtx op, rtx true_cond, rtx false_cond)
16323 {
16324 enum rtx_code code = GET_CODE (op);
16325 rtx op0 = XEXP (op, 0);
16326 rtx op1 = XEXP (op, 1);
16327 machine_mode compare_mode = GET_MODE (op0);
16328 machine_mode result_mode = GET_MODE (dest);
16329
16330 if (result_mode != compare_mode)
16331 return false;
16332
16333 /* See the comments of this function, it simply expects GE/GT/LE/LT in
16334 the checks, but for the reversible equivalent UNLT/UNLE/UNGT/UNGE,
16335 we need to do the reversions first to make the following checks
16336 support fewer cases, like:
16337
16338 (a UNLT b) ? op1 : op2 => (a >= b) ? op2 : op1;
16339 (a UNLE b) ? op1 : op2 => (a > b) ? op2 : op1;
16340 (a UNGT b) ? op1 : op2 => (a <= b) ? op2 : op1;
16341 (a UNGE b) ? op1 : op2 => (a < b) ? op2 : op1;
16342
16343 By the way, if we see these UNLT/UNLE/UNGT/UNGE it's guaranteed
16344 that we have 4-way condition codes (LT/GT/EQ/UN), so we do not
16345 have to check for fast-math or the like. */
16346 if (code == UNGE || code == UNGT || code == UNLE || code == UNLT)
16347 {
16348 code = reverse_condition_maybe_unordered (code);
16349 std::swap (true_cond, false_cond);
16350 }
16351
16352 bool max_p;
16353 if (code == GE || code == GT)
16354 max_p = true;
16355 else if (code == LE || code == LT)
16356 max_p = false;
16357 else
16358 return false;
16359
16360 if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond))
16361 ;
16362
16363 /* Only when NaNs and signed-zeros are not in effect, smax could be
16364 used for `op0 < op1 ? op1 : op0`, and smin could be used for
16365 `op0 > op1 ? op1 : op0`. */
16366 else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond)
16367 && !HONOR_NANS (compare_mode) && !HONOR_SIGNED_ZEROS (compare_mode))
16368 max_p = !max_p;
16369
16370 else
16371 return false;
16372
16373 rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1);
16374 return true;
16375 }
16376
16377 /* Possibly emit a floating point conditional move by generating a compare that
16378 sets a mask instruction and a XXSEL select instruction.
16379
16380 Move TRUE_COND to DEST if OP of the operands of the last comparison is
16381 nonzero/true, FALSE_COND if it is zero/false.
16382
16383 Return false if the operation cannot be generated, and true if we could
16384 generate the instruction. */
16385
16386 static bool
16387 rs6000_maybe_emit_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
16388 {
16389 enum rtx_code code = GET_CODE (op);
16390 rtx op0 = XEXP (op, 0);
16391 rtx op1 = XEXP (op, 1);
16392 machine_mode compare_mode = GET_MODE (op0);
16393 machine_mode result_mode = GET_MODE (dest);
16394 rtx compare_rtx;
16395 rtx cmove_rtx;
16396 rtx clobber_rtx;
16397
16398 if (!can_create_pseudo_p ())
16399 return 0;
16400
16401 /* We allow the comparison to be either SFmode/DFmode and the true/false
16402 condition to be either SFmode/DFmode. I.e. we allow:
16403
16404 float a, b;
16405 double c, d, r;
16406
16407 r = (a == b) ? c : d;
16408
16409 and:
16410
16411 double a, b;
16412 float c, d, r;
16413
16414 r = (a == b) ? c : d;
16415
16416 but we don't allow intermixing the IEEE 128-bit floating point types with
16417 the 32/64-bit scalar types. */
16418
16419 if (!(compare_mode == result_mode
16420 || (compare_mode == SFmode && result_mode == DFmode)
16421 || (compare_mode == DFmode && result_mode == SFmode)))
16422 return false;
16423
16424 switch (code)
16425 {
16426 case EQ:
16427 case GE:
16428 case GT:
16429 break;
16430
16431 case NE:
16432 case LT:
16433 case LE:
16434 code = swap_condition (code);
16435 std::swap (op0, op1);
16436 break;
16437
16438 default:
16439 return false;
16440 }
16441
16442 /* Generate: [(parallel [(set (dest)
16443 (if_then_else (op (cmp1) (cmp2))
16444 (true)
16445 (false)))
16446 (clobber (scratch))])]. */
16447
16448 compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1);
16449 cmove_rtx = gen_rtx_SET (dest,
16450 gen_rtx_IF_THEN_ELSE (result_mode,
16451 compare_rtx,
16452 true_cond,
16453 false_cond));
16454
16455 clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode));
16456 emit_insn (gen_rtx_PARALLEL (VOIDmode,
16457 gen_rtvec (2, cmove_rtx, clobber_rtx)));
16458
16459 return true;
16460 }
16461
16462 /* Helper function to return true if the target has instructions to do a
16463 compare and set mask instruction that can be used with XXSEL to implement a
16464 conditional move. It is also assumed that such a target also supports the
16465 "C" minimum and maximum instructions. */
16466
16467 static bool
16468 have_compare_and_set_mask (machine_mode mode)
16469 {
16470 switch (mode)
16471 {
16472 case E_SFmode:
16473 case E_DFmode:
16474 return TARGET_P9_MINMAX;
16475
16476 case E_KFmode:
16477 case E_TFmode:
16478 return TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode);
16479
16480 default:
16481 break;
16482 }
16483
16484 return false;
16485 }
16486
16487 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
16488 operands of the last comparison is nonzero/true, FALSE_COND if it
16489 is zero/false. Return 0 if the hardware has no such operation. */
16490
16491 bool
16492 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
16493 {
16494 enum rtx_code code = GET_CODE (op);
16495 rtx op0 = XEXP (op, 0);
16496 rtx op1 = XEXP (op, 1);
16497 machine_mode compare_mode = GET_MODE (op0);
16498 machine_mode result_mode = GET_MODE (dest);
16499 rtx temp;
16500 bool is_against_zero;
16501
16502 /* These modes should always match. */
16503 if (GET_MODE (op1) != compare_mode
16504 /* In the isel case however, we can use a compare immediate, so
16505 op1 may be a small constant. */
16506 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
16507 return false;
16508 if (GET_MODE (true_cond) != result_mode)
16509 return false;
16510 if (GET_MODE (false_cond) != result_mode)
16511 return false;
16512
16513 /* See if we can use the "C" minimum, "C" maximum, and compare and set mask
16514 instructions. */
16515 if (have_compare_and_set_mask (compare_mode)
16516 && have_compare_and_set_mask (result_mode))
16517 {
16518 if (rs6000_maybe_emit_maxc_minc (dest, op, true_cond, false_cond))
16519 return true;
16520
16521 if (rs6000_maybe_emit_fp_cmove (dest, op, true_cond, false_cond))
16522 return true;
16523 }
16524
16525 /* Don't allow using floating point comparisons for integer results for
16526 now. */
16527 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
16528 return false;
16529
16530 /* First, work out if the hardware can do this at all, or
16531 if it's too slow.... */
16532 if (!FLOAT_MODE_P (compare_mode))
16533 {
16534 if (TARGET_ISEL)
16535 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
16536 return false;
16537 }
16538
16539 is_against_zero = op1 == CONST0_RTX (compare_mode);
16540
16541 /* A floating-point subtract might overflow, underflow, or produce
16542 an inexact result, thus changing the floating-point flags, so it
16543 can't be generated if we care about that. It's safe if one side
16544 of the construct is zero, since then no subtract will be
16545 generated. */
16546 if (SCALAR_FLOAT_MODE_P (compare_mode)
16547 && flag_trapping_math && ! is_against_zero)
16548 return false;
16549
16550 /* Eliminate half of the comparisons by switching operands, this
16551 makes the remaining code simpler. */
16552 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
16553 || code == LTGT || code == LT || code == UNLE)
16554 {
16555 code = reverse_condition_maybe_unordered (code);
16556 temp = true_cond;
16557 true_cond = false_cond;
16558 false_cond = temp;
16559 }
16560
16561 /* UNEQ and LTGT take four instructions for a comparison with zero,
16562 it'll probably be faster to use a branch here too. */
16563 if (code == UNEQ && HONOR_NANS (compare_mode))
16564 return false;
16565
16566 /* We're going to try to implement comparisons by performing
16567 a subtract, then comparing against zero. Unfortunately,
16568 Inf - Inf is NaN which is not zero, and so if we don't
16569 know that the operand is finite and the comparison
16570 would treat EQ different to UNORDERED, we can't do it. */
16571 if (HONOR_INFINITIES (compare_mode)
16572 && code != GT && code != UNGE
16573 && (!CONST_DOUBLE_P (op1)
16574 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1)))
16575 /* Constructs of the form (a OP b ? a : b) are safe. */
16576 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
16577 || (! rtx_equal_p (op0, true_cond)
16578 && ! rtx_equal_p (op1, true_cond))))
16579 return false;
16580
16581 /* At this point we know we can use fsel. */
16582
16583 /* Don't allow compare_mode other than SFmode or DFmode, for others there
16584 is no fsel instruction. */
16585 if (compare_mode != SFmode && compare_mode != DFmode)
16586 return false;
16587
16588 /* Reduce the comparison to a comparison against zero. */
16589 if (! is_against_zero)
16590 {
16591 temp = gen_reg_rtx (compare_mode);
16592 emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
16593 op0 = temp;
16594 op1 = CONST0_RTX (compare_mode);
16595 }
16596
16597 /* If we don't care about NaNs we can reduce some of the comparisons
16598 down to faster ones. */
16599 if (! HONOR_NANS (compare_mode))
16600 switch (code)
16601 {
16602 case GT:
16603 code = LE;
16604 temp = true_cond;
16605 true_cond = false_cond;
16606 false_cond = temp;
16607 break;
16608 case UNGE:
16609 code = GE;
16610 break;
16611 case UNEQ:
16612 code = EQ;
16613 break;
16614 default:
16615 break;
16616 }
16617
16618 /* Now, reduce everything down to a GE. */
16619 switch (code)
16620 {
16621 case GE:
16622 break;
16623
16624 case LE:
16625 temp = gen_reg_rtx (compare_mode);
16626 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
16627 op0 = temp;
16628 break;
16629
16630 case ORDERED:
16631 temp = gen_reg_rtx (compare_mode);
16632 emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
16633 op0 = temp;
16634 break;
16635
16636 case EQ:
16637 temp = gen_reg_rtx (compare_mode);
16638 emit_insn (gen_rtx_SET (temp,
16639 gen_rtx_NEG (compare_mode,
16640 gen_rtx_ABS (compare_mode, op0))));
16641 op0 = temp;
16642 break;
16643
16644 case UNGE:
16645 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
16646 temp = gen_reg_rtx (result_mode);
16647 emit_insn (gen_rtx_SET (temp,
16648 gen_rtx_IF_THEN_ELSE (result_mode,
16649 gen_rtx_GE (VOIDmode,
16650 op0, op1),
16651 true_cond, false_cond)));
16652 false_cond = true_cond;
16653 true_cond = temp;
16654
16655 temp = gen_reg_rtx (compare_mode);
16656 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
16657 op0 = temp;
16658 break;
16659
16660 case GT:
16661 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
16662 temp = gen_reg_rtx (result_mode);
16663 emit_insn (gen_rtx_SET (temp,
16664 gen_rtx_IF_THEN_ELSE (result_mode,
16665 gen_rtx_GE (VOIDmode,
16666 op0, op1),
16667 true_cond, false_cond)));
16668 true_cond = false_cond;
16669 false_cond = temp;
16670
16671 temp = gen_reg_rtx (compare_mode);
16672 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
16673 op0 = temp;
16674 break;
16675
16676 default:
16677 gcc_unreachable ();
16678 }
16679
16680 emit_insn (gen_rtx_SET (dest,
16681 gen_rtx_IF_THEN_ELSE (result_mode,
16682 gen_rtx_GE (VOIDmode,
16683 op0, op1),
16684 true_cond, false_cond)));
16685 return true;
16686 }
16687
16688 /* Same as above, but for ints (isel). */
16689
16690 bool
16691 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
16692 {
16693 rtx condition_rtx, cr;
16694 machine_mode mode = GET_MODE (dest);
16695 enum rtx_code cond_code;
16696 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
16697 bool signedp;
16698
16699 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
16700 return false;
16701
16702 /* PR104335: We now need to expect CC-mode "comparisons"
16703 coming from ifcvt. The following code expects proper
16704 comparisons so better abort here. */
16705 if (GET_MODE_CLASS (GET_MODE (XEXP (op, 0))) == MODE_CC)
16706 return false;
16707
16708 /* We still have to do the compare, because isel doesn't do a
16709 compare, it just looks at the CRx bits set by a previous compare
16710 instruction. */
16711 condition_rtx = rs6000_generate_compare (op, mode);
16712 cond_code = GET_CODE (condition_rtx);
16713 cr = XEXP (condition_rtx, 0);
16714 signedp = GET_MODE (cr) == CCmode;
16715
16716 isel_func = (mode == SImode
16717 ? (signedp ? gen_isel_cc_si : gen_isel_ccuns_si)
16718 : (signedp ? gen_isel_cc_di : gen_isel_ccuns_di));
16719
16720 switch (cond_code)
16721 {
16722 case LT: case GT: case LTU: case GTU: case EQ:
16723 /* isel handles these directly. */
16724 break;
16725
16726 default:
16727 /* We need to swap the sense of the comparison. */
16728 {
16729 std::swap (false_cond, true_cond);
16730 PUT_CODE (condition_rtx, reverse_condition (cond_code));
16731 }
16732 break;
16733 }
16734
16735 false_cond = force_reg (mode, false_cond);
16736 if (true_cond != const0_rtx)
16737 true_cond = force_reg (mode, true_cond);
16738
16739 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
16740
16741 return true;
16742 }
16743
16744 void
16745 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
16746 {
16747 machine_mode mode = GET_MODE (op0);
16748 enum rtx_code c;
16749 rtx target;
16750
16751 /* VSX/altivec have direct min/max insns. */
16752 if ((code == SMAX || code == SMIN)
16753 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
16754 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))
16755 || (TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode))))
16756 {
16757 emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
16758 return;
16759 }
16760
16761 if (code == SMAX || code == SMIN)
16762 c = GE;
16763 else
16764 c = GEU;
16765
16766 if (code == SMAX || code == UMAX)
16767 target = emit_conditional_move (dest, { c, op0, op1, mode },
16768 op0, op1, mode, 0);
16769 else
16770 target = emit_conditional_move (dest, { c, op0, op1, mode },
16771 op1, op0, mode, 0);
16772 gcc_assert (target);
16773 if (target != dest)
16774 emit_move_insn (dest, target);
16775 }
16776
16777 /* A subroutine of the atomic operation splitters. Jump to LABEL if
16778 COND is true. Mark the jump as unlikely to be taken. */
16779
16780 static void
16781 emit_unlikely_jump (rtx cond, rtx label)
16782 {
16783 rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
16784 rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
16785 add_reg_br_prob_note (insn, profile_probability::very_unlikely ());
16786 }
16787
16788 /* A subroutine of the atomic operation splitters. Emit a load-locked
16789 instruction in MODE. For QI/HImode, possibly use a pattern than includes
16790 the zero_extend operation. */
16791
16792 static void
16793 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
16794 {
16795 rtx (*fn) (rtx, rtx) = NULL;
16796
16797 switch (mode)
16798 {
16799 case E_QImode:
16800 fn = gen_load_lockedqi;
16801 break;
16802 case E_HImode:
16803 fn = gen_load_lockedhi;
16804 break;
16805 case E_SImode:
16806 if (GET_MODE (mem) == QImode)
16807 fn = gen_load_lockedqi_si;
16808 else if (GET_MODE (mem) == HImode)
16809 fn = gen_load_lockedhi_si;
16810 else
16811 fn = gen_load_lockedsi;
16812 break;
16813 case E_DImode:
16814 fn = gen_load_lockeddi;
16815 break;
16816 case E_TImode:
16817 fn = gen_load_lockedti;
16818 break;
16819 default:
16820 gcc_unreachable ();
16821 }
16822 emit_insn (fn (reg, mem));
16823 }
16824
16825 /* A subroutine of the atomic operation splitters. Emit a store-conditional
16826 instruction in MODE. */
16827
16828 static void
16829 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
16830 {
16831 rtx (*fn) (rtx, rtx, rtx) = NULL;
16832
16833 switch (mode)
16834 {
16835 case E_QImode:
16836 fn = gen_store_conditionalqi;
16837 break;
16838 case E_HImode:
16839 fn = gen_store_conditionalhi;
16840 break;
16841 case E_SImode:
16842 fn = gen_store_conditionalsi;
16843 break;
16844 case E_DImode:
16845 fn = gen_store_conditionaldi;
16846 break;
16847 case E_TImode:
16848 fn = gen_store_conditionalti;
16849 break;
16850 default:
16851 gcc_unreachable ();
16852 }
16853
16854 /* Emit sync before stwcx. to address PPC405 Erratum. */
16855 if (PPC405_ERRATUM77)
16856 emit_insn (gen_hwsync ());
16857
16858 emit_insn (fn (res, mem, val));
16859 }
16860
16861 /* Expand barriers before and after a load_locked/store_cond sequence. */
16862
16863 static rtx
16864 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
16865 {
16866 rtx addr = XEXP (mem, 0);
16867
16868 if (!legitimate_indirect_address_p (addr, reload_completed)
16869 && !legitimate_indexed_address_p (addr, reload_completed))
16870 {
16871 addr = force_reg (Pmode, addr);
16872 mem = replace_equiv_address_nv (mem, addr);
16873 }
16874
16875 switch (model)
16876 {
16877 case MEMMODEL_RELAXED:
16878 case MEMMODEL_CONSUME:
16879 case MEMMODEL_ACQUIRE:
16880 break;
16881 case MEMMODEL_RELEASE:
16882 case MEMMODEL_ACQ_REL:
16883 emit_insn (gen_lwsync ());
16884 break;
16885 case MEMMODEL_SEQ_CST:
16886 emit_insn (gen_hwsync ());
16887 break;
16888 default:
16889 gcc_unreachable ();
16890 }
16891 return mem;
16892 }
16893
16894 static void
16895 rs6000_post_atomic_barrier (enum memmodel model)
16896 {
16897 switch (model)
16898 {
16899 case MEMMODEL_RELAXED:
16900 case MEMMODEL_CONSUME:
16901 case MEMMODEL_RELEASE:
16902 break;
16903 case MEMMODEL_ACQUIRE:
16904 case MEMMODEL_ACQ_REL:
16905 case MEMMODEL_SEQ_CST:
16906 emit_insn (gen_isync ());
16907 break;
16908 default:
16909 gcc_unreachable ();
16910 }
16911 }
16912
16913 /* A subroutine of the various atomic expanders. For sub-word operations,
16914 we must adjust things to operate on SImode. Given the original MEM,
16915 return a new aligned memory. Also build and return the quantities by
16916 which to shift and mask. */
16917
16918 static rtx
16919 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
16920 {
16921 rtx addr, align, shift, mask, mem;
16922 HOST_WIDE_INT shift_mask;
16923 machine_mode mode = GET_MODE (orig_mem);
16924
16925 /* For smaller modes, we have to implement this via SImode. */
16926 shift_mask = (mode == QImode ? 0x18 : 0x10);
16927
16928 addr = XEXP (orig_mem, 0);
16929 addr = force_reg (GET_MODE (addr), addr);
16930
16931 /* Aligned memory containing subword. Generate a new memory. We
16932 do not want any of the existing MEM_ATTR data, as we're now
16933 accessing memory outside the original object. */
16934 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
16935 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16936 mem = gen_rtx_MEM (SImode, align);
16937 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
16938 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
16939 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
16940
16941 /* Shift amount for subword relative to aligned word. */
16942 shift = gen_reg_rtx (SImode);
16943 addr = gen_lowpart (SImode, addr);
16944 rtx tmp = gen_reg_rtx (SImode);
16945 emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
16946 emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
16947 if (BYTES_BIG_ENDIAN)
16948 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
16949 shift, 1, OPTAB_LIB_WIDEN);
16950 *pshift = shift;
16951
16952 /* Mask for insertion. */
16953 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
16954 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
16955 *pmask = mask;
16956
16957 return mem;
16958 }
16959
16960 /* A subroutine of the various atomic expanders. For sub-word operands,
16961 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
16962
16963 static rtx
16964 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
16965 {
16966 rtx x;
16967
16968 x = gen_reg_rtx (SImode);
16969 emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
16970 gen_rtx_NOT (SImode, mask),
16971 oldval)));
16972
16973 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
16974
16975 return x;
16976 }
16977
16978 /* A subroutine of the various atomic expanders. For sub-word operands,
16979 extract WIDE to NARROW via SHIFT. */
16980
16981 static void
16982 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
16983 {
16984 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
16985 wide, 1, OPTAB_LIB_WIDEN);
16986 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
16987 }
16988
16989 /* Expand an atomic compare and swap operation. */
16990
16991 void
16992 rs6000_expand_atomic_compare_and_swap (rtx operands[])
16993 {
16994 rtx boolval, retval, mem, oldval, newval, cond;
16995 rtx label1, label2, x, mask, shift;
16996 machine_mode mode, orig_mode;
16997 enum memmodel mod_s, mod_f;
16998 bool is_weak;
16999
17000 boolval = operands[0];
17001 retval = operands[1];
17002 mem = operands[2];
17003 oldval = operands[3];
17004 newval = operands[4];
17005 is_weak = (INTVAL (operands[5]) != 0);
17006 mod_s = memmodel_base (INTVAL (operands[6]));
17007 mod_f = memmodel_base (INTVAL (operands[7]));
17008 orig_mode = mode = GET_MODE (mem);
17009
17010 mask = shift = NULL_RTX;
17011 if (mode == QImode || mode == HImode)
17012 {
17013 /* Before power8, we didn't have access to lbarx/lharx, so generate a
17014 lwarx and shift/mask operations. With power8, we need to do the
17015 comparison in SImode, but the store is still done in QI/HImode. */
17016 oldval = convert_modes (SImode, mode, oldval, 1);
17017
17018 if (!TARGET_SYNC_HI_QI)
17019 {
17020 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
17021
17022 /* Shift and mask OLDVAL into position with the word. */
17023 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
17024 NULL_RTX, 1, OPTAB_LIB_WIDEN);
17025
17026 /* Shift and mask NEWVAL into position within the word. */
17027 newval = convert_modes (SImode, mode, newval, 1);
17028 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
17029 NULL_RTX, 1, OPTAB_LIB_WIDEN);
17030 }
17031
17032 /* Prepare to adjust the return value. */
17033 retval = gen_reg_rtx (SImode);
17034 mode = SImode;
17035 }
17036 else if (reg_overlap_mentioned_p (retval, oldval))
17037 oldval = copy_to_reg (oldval);
17038
17039 if (mode != TImode && !reg_or_short_operand (oldval, mode))
17040 oldval = copy_to_mode_reg (mode, oldval);
17041
17042 if (reg_overlap_mentioned_p (retval, newval))
17043 newval = copy_to_reg (newval);
17044
17045 mem = rs6000_pre_atomic_barrier (mem, mod_s);
17046
17047 label1 = NULL_RTX;
17048 if (!is_weak)
17049 {
17050 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
17051 emit_label (XEXP (label1, 0));
17052 }
17053 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
17054
17055 emit_load_locked (mode, retval, mem);
17056
17057 x = retval;
17058 if (mask)
17059 x = expand_simple_binop (SImode, AND, retval, mask,
17060 NULL_RTX, 1, OPTAB_LIB_WIDEN);
17061
17062 cond = gen_reg_rtx (CCmode);
17063 /* If we have TImode, synthesize a comparison. */
17064 if (mode != TImode)
17065 x = gen_rtx_COMPARE (CCmode, x, oldval);
17066 else
17067 {
17068 rtx xor1_result = gen_reg_rtx (DImode);
17069 rtx xor2_result = gen_reg_rtx (DImode);
17070 rtx or_result = gen_reg_rtx (DImode);
17071 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
17072 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
17073 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
17074 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
17075
17076 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
17077 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
17078 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
17079 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
17080 }
17081
17082 emit_insn (gen_rtx_SET (cond, x));
17083
17084 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
17085 emit_unlikely_jump (x, label2);
17086
17087 x = newval;
17088 if (mask)
17089 x = rs6000_mask_atomic_subword (retval, newval, mask);
17090
17091 emit_store_conditional (orig_mode, cond, mem, x);
17092
17093 if (!is_weak)
17094 {
17095 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
17096 emit_unlikely_jump (x, label1);
17097 }
17098
17099 if (!is_mm_relaxed (mod_f))
17100 emit_label (XEXP (label2, 0));
17101
17102 rs6000_post_atomic_barrier (mod_s);
17103
17104 if (is_mm_relaxed (mod_f))
17105 emit_label (XEXP (label2, 0));
17106
17107 if (shift)
17108 rs6000_finish_atomic_subword (operands[1], retval, shift);
17109 else if (mode != GET_MODE (operands[1]))
17110 convert_move (operands[1], retval, 1);
17111
17112 /* In all cases, CR0 contains EQ on success, and NE on failure. */
17113 x = gen_rtx_EQ (SImode, cond, const0_rtx);
17114 emit_insn (gen_rtx_SET (boolval, x));
17115 }
17116
17117 /* Expand an atomic exchange operation. */
17118
17119 void
17120 rs6000_expand_atomic_exchange (rtx operands[])
17121 {
17122 rtx retval, mem, val, cond;
17123 machine_mode mode;
17124 enum memmodel model;
17125 rtx label, x, mask, shift;
17126
17127 retval = operands[0];
17128 mem = operands[1];
17129 val = operands[2];
17130 model = memmodel_base (INTVAL (operands[3]));
17131 mode = GET_MODE (mem);
17132
17133 mask = shift = NULL_RTX;
17134 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
17135 {
17136 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
17137
17138 /* Shift and mask VAL into position with the word. */
17139 val = convert_modes (SImode, mode, val, 1);
17140 val = expand_simple_binop (SImode, ASHIFT, val, shift,
17141 NULL_RTX, 1, OPTAB_LIB_WIDEN);
17142
17143 /* Prepare to adjust the return value. */
17144 retval = gen_reg_rtx (SImode);
17145 mode = SImode;
17146 }
17147
17148 mem = rs6000_pre_atomic_barrier (mem, model);
17149
17150 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
17151 emit_label (XEXP (label, 0));
17152
17153 emit_load_locked (mode, retval, mem);
17154
17155 x = val;
17156 if (mask)
17157 x = rs6000_mask_atomic_subword (retval, val, mask);
17158
17159 cond = gen_reg_rtx (CCmode);
17160 emit_store_conditional (mode, cond, mem, x);
17161
17162 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
17163 emit_unlikely_jump (x, label);
17164
17165 rs6000_post_atomic_barrier (model);
17166
17167 if (shift)
17168 rs6000_finish_atomic_subword (operands[0], retval, shift);
17169 }
17170
17171 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
17172 to perform. MEM is the memory on which to operate. VAL is the second
17173 operand of the binary operator. BEFORE and AFTER are optional locations to
17174 return the value of MEM either before of after the operation. MODEL_RTX
17175 is a CONST_INT containing the memory model to use. */
17176
17177 void
17178 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
17179 rtx orig_before, rtx orig_after, rtx model_rtx)
17180 {
17181 enum memmodel model = memmodel_base (INTVAL (model_rtx));
17182 machine_mode mode = GET_MODE (mem);
17183 machine_mode store_mode = mode;
17184 rtx label, x, cond, mask, shift;
17185 rtx before = orig_before, after = orig_after;
17186
17187 mask = shift = NULL_RTX;
17188 /* On power8, we want to use SImode for the operation. On previous systems,
17189 use the operation in a subword and shift/mask to get the proper byte or
17190 halfword. */
17191 if (mode == QImode || mode == HImode)
17192 {
17193 if (TARGET_SYNC_HI_QI)
17194 {
17195 val = convert_modes (SImode, mode, val, 1);
17196
17197 /* Prepare to adjust the return value. */
17198 before = gen_reg_rtx (SImode);
17199 if (after)
17200 after = gen_reg_rtx (SImode);
17201 mode = SImode;
17202 }
17203 else
17204 {
17205 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
17206
17207 /* Shift and mask VAL into position with the word. */
17208 val = convert_modes (SImode, mode, val, 1);
17209 val = expand_simple_binop (SImode, ASHIFT, val, shift,
17210 NULL_RTX, 1, OPTAB_LIB_WIDEN);
17211
17212 switch (code)
17213 {
17214 case IOR:
17215 case XOR:
17216 /* We've already zero-extended VAL. That is sufficient to
17217 make certain that it does not affect other bits. */
17218 mask = NULL;
17219 break;
17220
17221 case AND:
17222 /* If we make certain that all of the other bits in VAL are
17223 set, that will be sufficient to not affect other bits. */
17224 x = gen_rtx_NOT (SImode, mask);
17225 x = gen_rtx_IOR (SImode, x, val);
17226 emit_insn (gen_rtx_SET (val, x));
17227 mask = NULL;
17228 break;
17229
17230 case NOT:
17231 case PLUS:
17232 case MINUS:
17233 /* These will all affect bits outside the field and need
17234 adjustment via MASK within the loop. */
17235 break;
17236
17237 default:
17238 gcc_unreachable ();
17239 }
17240
17241 /* Prepare to adjust the return value. */
17242 before = gen_reg_rtx (SImode);
17243 if (after)
17244 after = gen_reg_rtx (SImode);
17245 store_mode = mode = SImode;
17246 }
17247 }
17248
17249 mem = rs6000_pre_atomic_barrier (mem, model);
17250
17251 label = gen_label_rtx ();
17252 emit_label (label);
17253 label = gen_rtx_LABEL_REF (VOIDmode, label);
17254
17255 if (before == NULL_RTX)
17256 before = gen_reg_rtx (mode);
17257
17258 emit_load_locked (mode, before, mem);
17259
17260 if (code == NOT)
17261 {
17262 x = expand_simple_binop (mode, AND, before, val,
17263 NULL_RTX, 1, OPTAB_LIB_WIDEN);
17264 after = expand_simple_unop (mode, NOT, x, after, 1);
17265 }
17266 else
17267 {
17268 after = expand_simple_binop (mode, code, before, val,
17269 after, 1, OPTAB_LIB_WIDEN);
17270 }
17271
17272 x = after;
17273 if (mask)
17274 {
17275 x = expand_simple_binop (SImode, AND, after, mask,
17276 NULL_RTX, 1, OPTAB_LIB_WIDEN);
17277 x = rs6000_mask_atomic_subword (before, x, mask);
17278 }
17279 else if (store_mode != mode)
17280 x = convert_modes (store_mode, mode, x, 1);
17281
17282 cond = gen_reg_rtx (CCmode);
17283 emit_store_conditional (store_mode, cond, mem, x);
17284
17285 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
17286 emit_unlikely_jump (x, label);
17287
17288 rs6000_post_atomic_barrier (model);
17289
17290 if (shift)
17291 {
17292 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
17293 then do the calcuations in a SImode register. */
17294 if (orig_before)
17295 rs6000_finish_atomic_subword (orig_before, before, shift);
17296 if (orig_after)
17297 rs6000_finish_atomic_subword (orig_after, after, shift);
17298 }
17299 else if (store_mode != mode)
17300 {
17301 /* QImode/HImode on machines with lbarx/lharx where we do the native
17302 operation and then do the calcuations in a SImode register. */
17303 if (orig_before)
17304 convert_move (orig_before, before, 1);
17305 if (orig_after)
17306 convert_move (orig_after, after, 1);
17307 }
17308 else if (orig_after && after != orig_after)
17309 emit_move_insn (orig_after, after);
17310 }
17311
17312 static GTY(()) alias_set_type TOC_alias_set = -1;
17313
17314 alias_set_type
17315 get_TOC_alias_set (void)
17316 {
17317 if (TOC_alias_set == -1)
17318 TOC_alias_set = new_alias_set ();
17319 return TOC_alias_set;
17320 }
17321
17322 /* The mode the ABI uses for a word. This is not the same as word_mode
17323 for -m32 -mpowerpc64. This is used to implement various target hooks. */
17324
17325 static scalar_int_mode
17326 rs6000_abi_word_mode (void)
17327 {
17328 return TARGET_32BIT ? SImode : DImode;
17329 }
17330
17331 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
17332 static char *
17333 rs6000_offload_options (void)
17334 {
17335 if (TARGET_64BIT)
17336 return xstrdup ("-foffload-abi=lp64");
17337 else
17338 return xstrdup ("-foffload-abi=ilp32");
17339 }
17340
17341 \f
17342 /* A quick summary of the various types of 'constant-pool tables'
17343 under PowerPC:
17344
17345 Target Flags Name One table per
17346 AIX (none) AIX TOC object file
17347 AIX -mfull-toc AIX TOC object file
17348 AIX -mminimal-toc AIX minimal TOC translation unit
17349 SVR4/EABI (none) SVR4 SDATA object file
17350 SVR4/EABI -fpic SVR4 pic object file
17351 SVR4/EABI -fPIC SVR4 PIC translation unit
17352 SVR4/EABI -mrelocatable EABI TOC function
17353 SVR4/EABI -maix AIX TOC object file
17354 SVR4/EABI -maix -mminimal-toc
17355 AIX minimal TOC translation unit
17356
17357 Name Reg. Set by entries contains:
17358 made by addrs? fp? sum?
17359
17360 AIX TOC 2 crt0 as Y option option
17361 AIX minimal TOC 30 prolog gcc Y Y option
17362 SVR4 SDATA 13 crt0 gcc N Y N
17363 SVR4 pic 30 prolog ld Y not yet N
17364 SVR4 PIC 30 prolog gcc Y option option
17365 EABI TOC 30 prolog gcc Y option option
17366
17367 */
17368
17369 /* Hash functions for the hash table. */
17370
17371 static unsigned
17372 rs6000_hash_constant (rtx k)
17373 {
17374 enum rtx_code code = GET_CODE (k);
17375 machine_mode mode = GET_MODE (k);
17376 unsigned result = (code << 3) ^ mode;
17377 const char *format;
17378 int flen, fidx;
17379
17380 format = GET_RTX_FORMAT (code);
17381 flen = strlen (format);
17382 fidx = 0;
17383
17384 switch (code)
17385 {
17386 case LABEL_REF:
17387 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
17388
17389 case CONST_WIDE_INT:
17390 {
17391 int i;
17392 flen = CONST_WIDE_INT_NUNITS (k);
17393 for (i = 0; i < flen; i++)
17394 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
17395 return result;
17396 }
17397
17398 case CONST_DOUBLE:
17399 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
17400
17401 case CODE_LABEL:
17402 fidx = 3;
17403 break;
17404
17405 default:
17406 break;
17407 }
17408
17409 for (; fidx < flen; fidx++)
17410 switch (format[fidx])
17411 {
17412 case 's':
17413 {
17414 unsigned i, len;
17415 const char *str = XSTR (k, fidx);
17416 len = strlen (str);
17417 result = result * 613 + len;
17418 for (i = 0; i < len; i++)
17419 result = result * 613 + (unsigned) str[i];
17420 break;
17421 }
17422 case 'u':
17423 case 'e':
17424 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
17425 break;
17426 case 'i':
17427 case 'n':
17428 result = result * 613 + (unsigned) XINT (k, fidx);
17429 break;
17430 case 'w':
17431 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
17432 result = result * 613 + (unsigned) XWINT (k, fidx);
17433 else
17434 {
17435 size_t i;
17436 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
17437 result = result * 613 + (unsigned) (XWINT (k, fidx)
17438 >> CHAR_BIT * i);
17439 }
17440 break;
17441 case '0':
17442 break;
17443 default:
17444 gcc_unreachable ();
17445 }
17446
17447 return result;
17448 }
17449
17450 hashval_t
17451 toc_hasher::hash (toc_hash_struct *thc)
17452 {
17453 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
17454 }
17455
17456 /* Compare H1 and H2 for equivalence. */
17457
17458 bool
17459 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
17460 {
17461 rtx r1 = h1->key;
17462 rtx r2 = h2->key;
17463
17464 if (h1->key_mode != h2->key_mode)
17465 return 0;
17466
17467 return rtx_equal_p (r1, r2);
17468 }
17469
17470 /* These are the names given by the C++ front-end to vtables, and
17471 vtable-like objects. Ideally, this logic should not be here;
17472 instead, there should be some programmatic way of inquiring as
17473 to whether or not an object is a vtable. */
17474
17475 #define VTABLE_NAME_P(NAME) \
17476 (startswith (name, "_vt.") \
17477 || startswith (name, "_ZTV") \
17478 || startswith (name, "_ZTT") \
17479 || startswith (name, "_ZTI") \
17480 || startswith (name, "_ZTC"))
17481
17482 #ifdef NO_DOLLAR_IN_LABEL
17483 /* Return a GGC-allocated character string translating dollar signs in
17484 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
17485
17486 const char *
17487 rs6000_xcoff_strip_dollar (const char *name)
17488 {
17489 char *strip, *p;
17490 const char *q;
17491 size_t len;
17492
17493 q = (const char *) strchr (name, '$');
17494
17495 if (q == 0 || q == name)
17496 return name;
17497
17498 len = strlen (name);
17499 strip = XALLOCAVEC (char, len + 1);
17500 strcpy (strip, name);
17501 p = strip + (q - name);
17502 while (p)
17503 {
17504 *p = '_';
17505 p = strchr (p + 1, '$');
17506 }
17507
17508 return ggc_alloc_string (strip, len);
17509 }
17510 #endif
17511
17512 void
17513 rs6000_output_symbol_ref (FILE *file, rtx x)
17514 {
17515 const char *name = XSTR (x, 0);
17516
17517 /* Currently C++ toc references to vtables can be emitted before it
17518 is decided whether the vtable is public or private. If this is
17519 the case, then the linker will eventually complain that there is
17520 a reference to an unknown section. Thus, for vtables only,
17521 we emit the TOC reference to reference the identifier and not the
17522 symbol. */
17523 if (VTABLE_NAME_P (name))
17524 {
17525 RS6000_OUTPUT_BASENAME (file, name);
17526 }
17527 else
17528 assemble_name (file, name);
17529 }
17530
17531 /* Output a TOC entry. We derive the entry name from what is being
17532 written. */
17533
17534 void
17535 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
17536 {
17537 char buf[256];
17538 const char *name = buf;
17539 rtx base = x;
17540 HOST_WIDE_INT offset = 0;
17541
17542 gcc_assert (!TARGET_NO_TOC_OR_PCREL);
17543
17544 /* When the linker won't eliminate them, don't output duplicate
17545 TOC entries (this happens on AIX if there is any kind of TOC,
17546 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
17547 CODE_LABELs. */
17548 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
17549 {
17550 struct toc_hash_struct *h;
17551
17552 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
17553 time because GGC is not initialized at that point. */
17554 if (toc_hash_table == NULL)
17555 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
17556
17557 h = ggc_alloc<toc_hash_struct> ();
17558 h->key = x;
17559 h->key_mode = mode;
17560 h->labelno = labelno;
17561
17562 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
17563 if (*found == NULL)
17564 *found = h;
17565 else /* This is indeed a duplicate.
17566 Set this label equal to that label. */
17567 {
17568 fputs ("\t.set ", file);
17569 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
17570 fprintf (file, "%d,", labelno);
17571 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
17572 fprintf (file, "%d\n", ((*found)->labelno));
17573
17574 #ifdef HAVE_AS_TLS
17575 if (TARGET_XCOFF && SYMBOL_REF_P (x)
17576 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
17577 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
17578 {
17579 fputs ("\t.set ", file);
17580 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
17581 fprintf (file, "%d,", labelno);
17582 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
17583 fprintf (file, "%d\n", ((*found)->labelno));
17584 }
17585 #endif
17586 return;
17587 }
17588 }
17589
17590 /* If we're going to put a double constant in the TOC, make sure it's
17591 aligned properly when strict alignment is on. */
17592 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
17593 && STRICT_ALIGNMENT
17594 && GET_MODE_BITSIZE (mode) >= 64
17595 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
17596 ASM_OUTPUT_ALIGN (file, 3);
17597 }
17598
17599 (*targetm.asm_out.internal_label) (file, "LC", labelno);
17600
17601 /* Handle FP constants specially. Note that if we have a minimal
17602 TOC, things we put here aren't actually in the TOC, so we can allow
17603 FP constants. */
17604 if (CONST_DOUBLE_P (x)
17605 && (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode
17606 || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode))
17607 {
17608 long k[4];
17609
17610 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
17611 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k);
17612 else
17613 real_to_target (k, CONST_DOUBLE_REAL_VALUE (x), GET_MODE (x));
17614
17615 if (TARGET_64BIT)
17616 {
17617 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17618 fputs (DOUBLE_INT_ASM_OP, file);
17619 else
17620 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
17621 k[0] & 0xffffffff, k[1] & 0xffffffff,
17622 k[2] & 0xffffffff, k[3] & 0xffffffff);
17623 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
17624 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
17625 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
17626 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
17627 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
17628 return;
17629 }
17630 else
17631 {
17632 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17633 fputs ("\t.long ", file);
17634 else
17635 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
17636 k[0] & 0xffffffff, k[1] & 0xffffffff,
17637 k[2] & 0xffffffff, k[3] & 0xffffffff);
17638 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
17639 k[0] & 0xffffffff, k[1] & 0xffffffff,
17640 k[2] & 0xffffffff, k[3] & 0xffffffff);
17641 return;
17642 }
17643 }
17644 else if (CONST_DOUBLE_P (x)
17645 && (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
17646 {
17647 long k[2];
17648
17649 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
17650 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k);
17651 else
17652 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
17653
17654 if (TARGET_64BIT)
17655 {
17656 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17657 fputs (DOUBLE_INT_ASM_OP, file);
17658 else
17659 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
17660 k[0] & 0xffffffff, k[1] & 0xffffffff);
17661 fprintf (file, "0x%lx%08lx\n",
17662 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
17663 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
17664 return;
17665 }
17666 else
17667 {
17668 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17669 fputs ("\t.long ", file);
17670 else
17671 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
17672 k[0] & 0xffffffff, k[1] & 0xffffffff);
17673 fprintf (file, "0x%lx,0x%lx\n",
17674 k[0] & 0xffffffff, k[1] & 0xffffffff);
17675 return;
17676 }
17677 }
17678 else if (CONST_DOUBLE_P (x)
17679 && (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
17680 {
17681 long l;
17682
17683 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
17684 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l);
17685 else
17686 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
17687
17688 if (TARGET_64BIT)
17689 {
17690 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17691 fputs (DOUBLE_INT_ASM_OP, file);
17692 else
17693 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
17694 if (WORDS_BIG_ENDIAN)
17695 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
17696 else
17697 fprintf (file, "0x%lx\n", l & 0xffffffff);
17698 return;
17699 }
17700 else
17701 {
17702 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17703 fputs ("\t.long ", file);
17704 else
17705 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
17706 fprintf (file, "0x%lx\n", l & 0xffffffff);
17707 return;
17708 }
17709 }
17710 else if (GET_MODE (x) == VOIDmode && CONST_INT_P (x))
17711 {
17712 unsigned HOST_WIDE_INT low;
17713 HOST_WIDE_INT high;
17714
17715 low = INTVAL (x) & 0xffffffff;
17716 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
17717
17718 /* TOC entries are always Pmode-sized, so when big-endian
17719 smaller integer constants in the TOC need to be padded.
17720 (This is still a win over putting the constants in
17721 a separate constant pool, because then we'd have
17722 to have both a TOC entry _and_ the actual constant.)
17723
17724 For a 32-bit target, CONST_INT values are loaded and shifted
17725 entirely within `low' and can be stored in one TOC entry. */
17726
17727 /* It would be easy to make this work, but it doesn't now. */
17728 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
17729
17730 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
17731 {
17732 low |= high << 32;
17733 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
17734 high = (HOST_WIDE_INT) low >> 32;
17735 low &= 0xffffffff;
17736 }
17737
17738 if (TARGET_64BIT)
17739 {
17740 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17741 fputs (DOUBLE_INT_ASM_OP, file);
17742 else
17743 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
17744 (long) high & 0xffffffff, (long) low & 0xffffffff);
17745 fprintf (file, "0x%lx%08lx\n",
17746 (long) high & 0xffffffff, (long) low & 0xffffffff);
17747 return;
17748 }
17749 else
17750 {
17751 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
17752 {
17753 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17754 fputs ("\t.long ", file);
17755 else
17756 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
17757 (long) high & 0xffffffff, (long) low & 0xffffffff);
17758 fprintf (file, "0x%lx,0x%lx\n",
17759 (long) high & 0xffffffff, (long) low & 0xffffffff);
17760 }
17761 else
17762 {
17763 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17764 fputs ("\t.long ", file);
17765 else
17766 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
17767 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
17768 }
17769 return;
17770 }
17771 }
17772
17773 if (GET_CODE (x) == CONST)
17774 {
17775 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
17776 && CONST_INT_P (XEXP (XEXP (x, 0), 1)));
17777
17778 base = XEXP (XEXP (x, 0), 0);
17779 offset = INTVAL (XEXP (XEXP (x, 0), 1));
17780 }
17781
17782 switch (GET_CODE (base))
17783 {
17784 case SYMBOL_REF:
17785 name = XSTR (base, 0);
17786 break;
17787
17788 case LABEL_REF:
17789 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
17790 CODE_LABEL_NUMBER (XEXP (base, 0)));
17791 break;
17792
17793 case CODE_LABEL:
17794 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
17795 break;
17796
17797 default:
17798 gcc_unreachable ();
17799 }
17800
17801 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17802 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
17803 else
17804 {
17805 fputs ("\t.tc ", file);
17806 RS6000_OUTPUT_BASENAME (file, name);
17807
17808 if (offset < 0)
17809 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
17810 else if (offset)
17811 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
17812
17813 /* Mark large TOC symbols on AIX with [TE] so they are mapped
17814 after other TOC symbols, reducing overflow of small TOC access
17815 to [TC] symbols. */
17816 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
17817 ? "[TE]," : "[TC],", file);
17818 }
17819
17820 /* Currently C++ toc references to vtables can be emitted before it
17821 is decided whether the vtable is public or private. If this is
17822 the case, then the linker will eventually complain that there is
17823 a TOC reference to an unknown section. Thus, for vtables only,
17824 we emit the TOC reference to reference the symbol and not the
17825 section. */
17826 if (VTABLE_NAME_P (name))
17827 {
17828 RS6000_OUTPUT_BASENAME (file, name);
17829 if (offset < 0)
17830 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
17831 else if (offset > 0)
17832 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
17833 }
17834 else
17835 output_addr_const (file, x);
17836
17837 #if HAVE_AS_TLS
17838 if (TARGET_XCOFF && SYMBOL_REF_P (base))
17839 {
17840 switch (SYMBOL_REF_TLS_MODEL (base))
17841 {
17842 case 0:
17843 break;
17844 case TLS_MODEL_LOCAL_EXEC:
17845 fputs ("@le", file);
17846 break;
17847 case TLS_MODEL_INITIAL_EXEC:
17848 fputs ("@ie", file);
17849 break;
17850 /* Use global-dynamic for local-dynamic. */
17851 case TLS_MODEL_GLOBAL_DYNAMIC:
17852 case TLS_MODEL_LOCAL_DYNAMIC:
17853 putc ('\n', file);
17854 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
17855 fputs ("\t.tc .", file);
17856 RS6000_OUTPUT_BASENAME (file, name);
17857 fputs ("[TC],", file);
17858 output_addr_const (file, x);
17859 fputs ("@m", file);
17860 break;
17861 default:
17862 gcc_unreachable ();
17863 }
17864 }
17865 #endif
17866
17867 putc ('\n', file);
17868 }
17869 \f
17870 /* Output an assembler pseudo-op to write an ASCII string of N characters
17871 starting at P to FILE.
17872
17873 On the RS/6000, we have to do this using the .byte operation and
17874 write out special characters outside the quoted string.
17875 Also, the assembler is broken; very long strings are truncated,
17876 so we must artificially break them up early. */
17877
17878 void
17879 output_ascii (FILE *file, const char *p, int n)
17880 {
17881 char c;
17882 int i, count_string;
17883 const char *for_string = "\t.byte \"";
17884 const char *for_decimal = "\t.byte ";
17885 const char *to_close = NULL;
17886
17887 count_string = 0;
17888 for (i = 0; i < n; i++)
17889 {
17890 c = *p++;
17891 if (c >= ' ' && c < 0177)
17892 {
17893 if (for_string)
17894 fputs (for_string, file);
17895 putc (c, file);
17896
17897 /* Write two quotes to get one. */
17898 if (c == '"')
17899 {
17900 putc (c, file);
17901 ++count_string;
17902 }
17903
17904 for_string = NULL;
17905 for_decimal = "\"\n\t.byte ";
17906 to_close = "\"\n";
17907 ++count_string;
17908
17909 if (count_string >= 512)
17910 {
17911 fputs (to_close, file);
17912
17913 for_string = "\t.byte \"";
17914 for_decimal = "\t.byte ";
17915 to_close = NULL;
17916 count_string = 0;
17917 }
17918 }
17919 else
17920 {
17921 if (for_decimal)
17922 fputs (for_decimal, file);
17923 fprintf (file, "%d", c);
17924
17925 for_string = "\n\t.byte \"";
17926 for_decimal = ", ";
17927 to_close = "\n";
17928 count_string = 0;
17929 }
17930 }
17931
17932 /* Now close the string if we have written one. Then end the line. */
17933 if (to_close)
17934 fputs (to_close, file);
17935 }
17936 \f
17937 /* Generate a unique section name for FILENAME for a section type
17938 represented by SECTION_DESC. Output goes into BUF.
17939
17940 SECTION_DESC can be any string, as long as it is different for each
17941 possible section type.
17942
17943 We name the section in the same manner as xlc. The name begins with an
17944 underscore followed by the filename (after stripping any leading directory
17945 names) with the last period replaced by the string SECTION_DESC. If
17946 FILENAME does not contain a period, SECTION_DESC is appended to the end of
17947 the name. */
17948
17949 void
17950 rs6000_gen_section_name (char **buf, const char *filename,
17951 const char *section_desc)
17952 {
17953 const char *q, *after_last_slash, *last_period = 0;
17954 char *p;
17955 int len;
17956
17957 after_last_slash = filename;
17958 for (q = filename; *q; q++)
17959 {
17960 if (*q == '/')
17961 after_last_slash = q + 1;
17962 else if (*q == '.')
17963 last_period = q;
17964 }
17965
17966 len = strlen (after_last_slash) + strlen (section_desc) + 2;
17967 *buf = (char *) xmalloc (len);
17968
17969 p = *buf;
17970 *p++ = '_';
17971
17972 for (q = after_last_slash; *q; q++)
17973 {
17974 if (q == last_period)
17975 {
17976 strcpy (p, section_desc);
17977 p += strlen (section_desc);
17978 break;
17979 }
17980
17981 else if (ISALNUM (*q))
17982 *p++ = *q;
17983 }
17984
17985 if (last_period == 0)
17986 strcpy (p, section_desc);
17987 else
17988 *p = '\0';
17989 }
17990 \f
17991 /* Emit profile function. */
17992
17993 void
17994 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
17995 {
17996 /* Non-standard profiling for kernels, which just saves LR then calls
17997 _mcount without worrying about arg saves. The idea is to change
17998 the function prologue as little as possible as it isn't easy to
17999 account for arg save/restore code added just for _mcount. */
18000 if (TARGET_PROFILE_KERNEL)
18001 return;
18002
18003 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
18004 {
18005 #ifndef NO_PROFILE_COUNTERS
18006 # define NO_PROFILE_COUNTERS 0
18007 #endif
18008 if (NO_PROFILE_COUNTERS)
18009 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
18010 LCT_NORMAL, VOIDmode);
18011 else
18012 {
18013 char buf[30];
18014 const char *label_name;
18015 rtx fun;
18016
18017 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
18018 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
18019 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
18020
18021 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
18022 LCT_NORMAL, VOIDmode, fun, Pmode);
18023 }
18024 }
18025 else if (DEFAULT_ABI == ABI_DARWIN)
18026 {
18027 const char *mcount_name = RS6000_MCOUNT;
18028 int caller_addr_regno = LR_REGNO;
18029
18030 /* Be conservative and always set this, at least for now. */
18031 crtl->uses_pic_offset_table = 1;
18032
18033 #if TARGET_MACHO
18034 /* For PIC code, set up a stub and collect the caller's address
18035 from r0, which is where the prologue puts it. */
18036 if (MACHOPIC_INDIRECT
18037 && crtl->uses_pic_offset_table)
18038 caller_addr_regno = 0;
18039 #endif
18040 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
18041 LCT_NORMAL, VOIDmode,
18042 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
18043 }
18044 }
18045
18046 /* Write function profiler code. */
18047
18048 void
18049 output_function_profiler (FILE *file, int labelno)
18050 {
18051 char buf[100];
18052
18053 switch (DEFAULT_ABI)
18054 {
18055 default:
18056 gcc_unreachable ();
18057
18058 case ABI_V4:
18059 if (!TARGET_32BIT)
18060 {
18061 warning (0, "no profiling of 64-bit code for this ABI");
18062 return;
18063 }
18064 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
18065 fprintf (file, "\tmflr %s\n", reg_names[0]);
18066 if (NO_PROFILE_COUNTERS)
18067 {
18068 asm_fprintf (file, "\tstw %s,4(%s)\n",
18069 reg_names[0], reg_names[1]);
18070 }
18071 else if (TARGET_SECURE_PLT && flag_pic)
18072 {
18073 if (TARGET_LINK_STACK)
18074 {
18075 char name[32];
18076 get_ppc476_thunk_name (name);
18077 asm_fprintf (file, "\tbl %s\n", name);
18078 }
18079 else
18080 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
18081 asm_fprintf (file, "\tstw %s,4(%s)\n",
18082 reg_names[0], reg_names[1]);
18083 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
18084 asm_fprintf (file, "\taddis %s,%s,",
18085 reg_names[12], reg_names[12]);
18086 assemble_name (file, buf);
18087 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
18088 assemble_name (file, buf);
18089 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
18090 }
18091 else if (flag_pic == 1)
18092 {
18093 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
18094 asm_fprintf (file, "\tstw %s,4(%s)\n",
18095 reg_names[0], reg_names[1]);
18096 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
18097 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
18098 assemble_name (file, buf);
18099 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
18100 }
18101 else if (flag_pic > 1)
18102 {
18103 asm_fprintf (file, "\tstw %s,4(%s)\n",
18104 reg_names[0], reg_names[1]);
18105 /* Now, we need to get the address of the label. */
18106 if (TARGET_LINK_STACK)
18107 {
18108 char name[32];
18109 get_ppc476_thunk_name (name);
18110 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
18111 assemble_name (file, buf);
18112 fputs ("-.\n1:", file);
18113 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
18114 asm_fprintf (file, "\taddi %s,%s,4\n",
18115 reg_names[11], reg_names[11]);
18116 }
18117 else
18118 {
18119 fputs ("\tbcl 20,31,1f\n\t.long ", file);
18120 assemble_name (file, buf);
18121 fputs ("-.\n1:", file);
18122 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
18123 }
18124 asm_fprintf (file, "\tlwz %s,0(%s)\n",
18125 reg_names[0], reg_names[11]);
18126 asm_fprintf (file, "\tadd %s,%s,%s\n",
18127 reg_names[0], reg_names[0], reg_names[11]);
18128 }
18129 else
18130 {
18131 asm_fprintf (file, "\tlis %s,", reg_names[12]);
18132 assemble_name (file, buf);
18133 fputs ("@ha\n", file);
18134 asm_fprintf (file, "\tstw %s,4(%s)\n",
18135 reg_names[0], reg_names[1]);
18136 asm_fprintf (file, "\tla %s,", reg_names[0]);
18137 assemble_name (file, buf);
18138 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
18139 }
18140
18141 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
18142 fprintf (file, "\tbl %s%s\n",
18143 RS6000_MCOUNT, flag_pic ? "@plt" : "");
18144 break;
18145
18146 case ABI_AIX:
18147 case ABI_ELFv2:
18148 case ABI_DARWIN:
18149 /* Don't do anything, done in output_profile_hook (). */
18150 break;
18151 }
18152 }
18153
18154 \f
18155
18156 /* The following variable value is the last issued insn. */
18157
18158 static rtx_insn *last_scheduled_insn;
18159
18160 /* The following variable helps to balance issuing of load and
18161 store instructions */
18162
18163 static int load_store_pendulum;
18164
18165 /* The following variable helps pair divide insns during scheduling. */
18166 static int divide_cnt;
18167 /* The following variable helps pair and alternate vector and vector load
18168 insns during scheduling. */
18169 static int vec_pairing;
18170
18171
18172 /* Power4 load update and store update instructions are cracked into a
18173 load or store and an integer insn which are executed in the same cycle.
18174 Branches have their own dispatch slot which does not count against the
18175 GCC issue rate, but it changes the program flow so there are no other
18176 instructions to issue in this cycle. */
18177
18178 static int
18179 rs6000_variable_issue_1 (rtx_insn *insn, int more)
18180 {
18181 last_scheduled_insn = insn;
18182 if (GET_CODE (PATTERN (insn)) == USE
18183 || GET_CODE (PATTERN (insn)) == CLOBBER)
18184 {
18185 cached_can_issue_more = more;
18186 return cached_can_issue_more;
18187 }
18188
18189 if (insn_terminates_group_p (insn, current_group))
18190 {
18191 cached_can_issue_more = 0;
18192 return cached_can_issue_more;
18193 }
18194
18195 /* If no reservation, but reach here */
18196 if (recog_memoized (insn) < 0)
18197 return more;
18198
18199 if (rs6000_sched_groups)
18200 {
18201 if (is_microcoded_insn (insn))
18202 cached_can_issue_more = 0;
18203 else if (is_cracked_insn (insn))
18204 cached_can_issue_more = more > 2 ? more - 2 : 0;
18205 else
18206 cached_can_issue_more = more - 1;
18207
18208 return cached_can_issue_more;
18209 }
18210
18211 if (rs6000_tune == PROCESSOR_CELL && is_nonpipeline_insn (insn))
18212 return 0;
18213
18214 cached_can_issue_more = more - 1;
18215 return cached_can_issue_more;
18216 }
18217
18218 static int
18219 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
18220 {
18221 int r = rs6000_variable_issue_1 (insn, more);
18222 if (verbose)
18223 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
18224 return r;
18225 }
18226
18227 /* Adjust the cost of a scheduling dependency. Return the new cost of
18228 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
18229
18230 static int
18231 rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
18232 unsigned int)
18233 {
18234 enum attr_type attr_type;
18235
18236 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
18237 return cost;
18238
18239 switch (dep_type)
18240 {
18241 case REG_DEP_TRUE:
18242 {
18243 /* Data dependency; DEP_INSN writes a register that INSN reads
18244 some cycles later. */
18245
18246 /* Separate a load from a narrower, dependent store. */
18247 if ((rs6000_sched_groups || rs6000_tune == PROCESSOR_POWER9
18248 || rs6000_tune == PROCESSOR_POWER10
18249 || rs6000_tune == PROCESSOR_POWER11)
18250 && GET_CODE (PATTERN (insn)) == SET
18251 && GET_CODE (PATTERN (dep_insn)) == SET
18252 && MEM_P (XEXP (PATTERN (insn), 1))
18253 && MEM_P (XEXP (PATTERN (dep_insn), 0))
18254 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
18255 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
18256 return cost + 14;
18257
18258 attr_type = get_attr_type (insn);
18259
18260 switch (attr_type)
18261 {
18262 case TYPE_JMPREG:
18263 /* Tell the first scheduling pass about the latency between
18264 a mtctr and bctr (and mtlr and br/blr). The first
18265 scheduling pass will not know about this latency since
18266 the mtctr instruction, which has the latency associated
18267 to it, will be generated by reload. */
18268 return 4;
18269 case TYPE_BRANCH:
18270 /* Leave some extra cycles between a compare and its
18271 dependent branch, to inhibit expensive mispredicts. */
18272 if ((rs6000_tune == PROCESSOR_PPC603
18273 || rs6000_tune == PROCESSOR_PPC604
18274 || rs6000_tune == PROCESSOR_PPC604e
18275 || rs6000_tune == PROCESSOR_PPC620
18276 || rs6000_tune == PROCESSOR_PPC630
18277 || rs6000_tune == PROCESSOR_PPC750
18278 || rs6000_tune == PROCESSOR_PPC7400
18279 || rs6000_tune == PROCESSOR_PPC7450
18280 || rs6000_tune == PROCESSOR_PPCE5500
18281 || rs6000_tune == PROCESSOR_PPCE6500
18282 || rs6000_tune == PROCESSOR_POWER4
18283 || rs6000_tune == PROCESSOR_POWER5
18284 || rs6000_tune == PROCESSOR_POWER7
18285 || rs6000_tune == PROCESSOR_POWER8
18286 || rs6000_tune == PROCESSOR_POWER9
18287 || rs6000_tune == PROCESSOR_POWER10
18288 || rs6000_tune == PROCESSOR_POWER11
18289 || rs6000_tune == PROCESSOR_CELL)
18290 && recog_memoized (dep_insn)
18291 && (INSN_CODE (dep_insn) >= 0))
18292
18293 switch (get_attr_type (dep_insn))
18294 {
18295 case TYPE_CMP:
18296 case TYPE_FPCOMPARE:
18297 case TYPE_CR_LOGICAL:
18298 return cost + 2;
18299 case TYPE_EXTS:
18300 case TYPE_MUL:
18301 if (get_attr_dot (dep_insn) == DOT_YES)
18302 return cost + 2;
18303 else
18304 break;
18305 case TYPE_SHIFT:
18306 if (get_attr_dot (dep_insn) == DOT_YES
18307 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
18308 return cost + 2;
18309 else
18310 break;
18311 default:
18312 break;
18313 }
18314 break;
18315
18316 case TYPE_STORE:
18317 case TYPE_FPSTORE:
18318 if ((rs6000_tune == PROCESSOR_POWER6)
18319 && recog_memoized (dep_insn)
18320 && (INSN_CODE (dep_insn) >= 0))
18321 {
18322
18323 if (GET_CODE (PATTERN (insn)) != SET)
18324 /* If this happens, we have to extend this to schedule
18325 optimally. Return default for now. */
18326 return cost;
18327
18328 /* Adjust the cost for the case where the value written
18329 by a fixed point operation is used as the address
18330 gen value on a store. */
18331 switch (get_attr_type (dep_insn))
18332 {
18333 case TYPE_LOAD:
18334 case TYPE_CNTLZ:
18335 {
18336 if (! rs6000_store_data_bypass_p (dep_insn, insn))
18337 return get_attr_sign_extend (dep_insn)
18338 == SIGN_EXTEND_YES ? 6 : 4;
18339 break;
18340 }
18341 case TYPE_SHIFT:
18342 {
18343 if (! rs6000_store_data_bypass_p (dep_insn, insn))
18344 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
18345 6 : 3;
18346 break;
18347 }
18348 case TYPE_INTEGER:
18349 case TYPE_ADD:
18350 case TYPE_LOGICAL:
18351 case TYPE_EXTS:
18352 case TYPE_INSERT:
18353 {
18354 if (! rs6000_store_data_bypass_p (dep_insn, insn))
18355 return 3;
18356 break;
18357 }
18358 case TYPE_STORE:
18359 case TYPE_FPLOAD:
18360 case TYPE_FPSTORE:
18361 {
18362 if (get_attr_update (dep_insn) == UPDATE_YES
18363 && ! rs6000_store_data_bypass_p (dep_insn, insn))
18364 return 3;
18365 break;
18366 }
18367 case TYPE_MUL:
18368 {
18369 if (! rs6000_store_data_bypass_p (dep_insn, insn))
18370 return 17;
18371 break;
18372 }
18373 case TYPE_DIV:
18374 {
18375 if (! rs6000_store_data_bypass_p (dep_insn, insn))
18376 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
18377 break;
18378 }
18379 default:
18380 break;
18381 }
18382 }
18383 break;
18384
18385 case TYPE_LOAD:
18386 if ((rs6000_tune == PROCESSOR_POWER6)
18387 && recog_memoized (dep_insn)
18388 && (INSN_CODE (dep_insn) >= 0))
18389 {
18390
18391 /* Adjust the cost for the case where the value written
18392 by a fixed point instruction is used within the address
18393 gen portion of a subsequent load(u)(x) */
18394 switch (get_attr_type (dep_insn))
18395 {
18396 case TYPE_LOAD:
18397 case TYPE_CNTLZ:
18398 {
18399 if (set_to_load_agen (dep_insn, insn))
18400 return get_attr_sign_extend (dep_insn)
18401 == SIGN_EXTEND_YES ? 6 : 4;
18402 break;
18403 }
18404 case TYPE_SHIFT:
18405 {
18406 if (set_to_load_agen (dep_insn, insn))
18407 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
18408 6 : 3;
18409 break;
18410 }
18411 case TYPE_INTEGER:
18412 case TYPE_ADD:
18413 case TYPE_LOGICAL:
18414 case TYPE_EXTS:
18415 case TYPE_INSERT:
18416 {
18417 if (set_to_load_agen (dep_insn, insn))
18418 return 3;
18419 break;
18420 }
18421 case TYPE_STORE:
18422 case TYPE_FPLOAD:
18423 case TYPE_FPSTORE:
18424 {
18425 if (get_attr_update (dep_insn) == UPDATE_YES
18426 && set_to_load_agen (dep_insn, insn))
18427 return 3;
18428 break;
18429 }
18430 case TYPE_MUL:
18431 {
18432 if (set_to_load_agen (dep_insn, insn))
18433 return 17;
18434 break;
18435 }
18436 case TYPE_DIV:
18437 {
18438 if (set_to_load_agen (dep_insn, insn))
18439 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
18440 break;
18441 }
18442 default:
18443 break;
18444 }
18445 }
18446 break;
18447
18448 default:
18449 break;
18450 }
18451
18452 /* Fall out to return default cost. */
18453 }
18454 break;
18455
18456 case REG_DEP_OUTPUT:
18457 /* Output dependency; DEP_INSN writes a register that INSN writes some
18458 cycles later. */
18459 if ((rs6000_tune == PROCESSOR_POWER6)
18460 && recog_memoized (dep_insn)
18461 && (INSN_CODE (dep_insn) >= 0))
18462 {
18463 attr_type = get_attr_type (insn);
18464
18465 switch (attr_type)
18466 {
18467 case TYPE_FP:
18468 case TYPE_FPSIMPLE:
18469 if (get_attr_type (dep_insn) == TYPE_FP
18470 || get_attr_type (dep_insn) == TYPE_FPSIMPLE)
18471 return 1;
18472 break;
18473 default:
18474 break;
18475 }
18476 }
18477 /* Fall through, no cost for output dependency. */
18478 /* FALLTHRU */
18479
18480 case REG_DEP_ANTI:
18481 /* Anti dependency; DEP_INSN reads a register that INSN writes some
18482 cycles later. */
18483 return 0;
18484
18485 default:
18486 gcc_unreachable ();
18487 }
18488
18489 return cost;
18490 }
18491
18492 /* Debug version of rs6000_adjust_cost. */
18493
18494 static int
18495 rs6000_debug_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
18496 int cost, unsigned int dw)
18497 {
18498 int ret = rs6000_adjust_cost (insn, dep_type, dep_insn, cost, dw);
18499
18500 if (ret != cost)
18501 {
18502 const char *dep;
18503
18504 switch (dep_type)
18505 {
18506 default: dep = "unknown depencency"; break;
18507 case REG_DEP_TRUE: dep = "data dependency"; break;
18508 case REG_DEP_OUTPUT: dep = "output dependency"; break;
18509 case REG_DEP_ANTI: dep = "anti depencency"; break;
18510 }
18511
18512 fprintf (stderr,
18513 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
18514 "%s, insn:\n", ret, cost, dep);
18515
18516 debug_rtx (insn);
18517 }
18518
18519 return ret;
18520 }
18521
18522 /* The function returns a true if INSN is microcoded.
18523 Return false otherwise. */
18524
18525 static bool
18526 is_microcoded_insn (rtx_insn *insn)
18527 {
18528 if (!insn || !NONDEBUG_INSN_P (insn)
18529 || GET_CODE (PATTERN (insn)) == USE
18530 || GET_CODE (PATTERN (insn)) == CLOBBER)
18531 return false;
18532
18533 if (rs6000_tune == PROCESSOR_CELL)
18534 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
18535
18536 if (rs6000_sched_groups
18537 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
18538 {
18539 enum attr_type type = get_attr_type (insn);
18540 if ((type == TYPE_LOAD
18541 && get_attr_update (insn) == UPDATE_YES
18542 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
18543 || ((type == TYPE_LOAD || type == TYPE_STORE)
18544 && get_attr_update (insn) == UPDATE_YES
18545 && get_attr_indexed (insn) == INDEXED_YES)
18546 || type == TYPE_MFCR)
18547 return true;
18548 }
18549
18550 return false;
18551 }
18552
18553 /* The function returns true if INSN is cracked into 2 instructions
18554 by the processor (and therefore occupies 2 issue slots). */
18555
18556 static bool
18557 is_cracked_insn (rtx_insn *insn)
18558 {
18559 if (!insn || !NONDEBUG_INSN_P (insn)
18560 || GET_CODE (PATTERN (insn)) == USE
18561 || GET_CODE (PATTERN (insn)) == CLOBBER)
18562 return false;
18563
18564 if (rs6000_sched_groups
18565 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
18566 {
18567 enum attr_type type = get_attr_type (insn);
18568 if ((type == TYPE_LOAD
18569 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
18570 && get_attr_update (insn) == UPDATE_NO)
18571 || (type == TYPE_LOAD
18572 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
18573 && get_attr_update (insn) == UPDATE_YES
18574 && get_attr_indexed (insn) == INDEXED_NO)
18575 || (type == TYPE_STORE
18576 && get_attr_update (insn) == UPDATE_YES
18577 && get_attr_indexed (insn) == INDEXED_NO)
18578 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
18579 && get_attr_update (insn) == UPDATE_YES)
18580 || (type == TYPE_CR_LOGICAL
18581 && get_attr_cr_logical_3op (insn) == CR_LOGICAL_3OP_YES)
18582 || (type == TYPE_EXTS
18583 && get_attr_dot (insn) == DOT_YES)
18584 || (type == TYPE_SHIFT
18585 && get_attr_dot (insn) == DOT_YES
18586 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
18587 || (type == TYPE_MUL
18588 && get_attr_dot (insn) == DOT_YES)
18589 || type == TYPE_DIV
18590 || (type == TYPE_INSERT
18591 && get_attr_size (insn) == SIZE_32))
18592 return true;
18593 }
18594
18595 return false;
18596 }
18597
18598 /* The function returns true if INSN can be issued only from
18599 the branch slot. */
18600
18601 static bool
18602 is_branch_slot_insn (rtx_insn *insn)
18603 {
18604 if (!insn || !NONDEBUG_INSN_P (insn)
18605 || GET_CODE (PATTERN (insn)) == USE
18606 || GET_CODE (PATTERN (insn)) == CLOBBER)
18607 return false;
18608
18609 if (rs6000_sched_groups)
18610 {
18611 enum attr_type type = get_attr_type (insn);
18612 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
18613 return true;
18614 return false;
18615 }
18616
18617 return false;
18618 }
18619
18620 /* The function returns true if out_inst sets a value that is
18621 used in the address generation computation of in_insn */
18622 static bool
18623 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
18624 {
18625 rtx out_set, in_set;
18626
18627 /* For performance reasons, only handle the simple case where
18628 both loads are a single_set. */
18629 out_set = single_set (out_insn);
18630 if (out_set)
18631 {
18632 in_set = single_set (in_insn);
18633 if (in_set)
18634 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
18635 }
18636
18637 return false;
18638 }
18639
18640 /* Try to determine base/offset/size parts of the given MEM.
18641 Return true if successful, false if all the values couldn't
18642 be determined.
18643
18644 This function only looks for REG or REG+CONST address forms.
18645 REG+REG address form will return false. */
18646
18647 static bool
18648 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
18649 HOST_WIDE_INT *size)
18650 {
18651 rtx addr_rtx;
18652 if (MEM_SIZE_KNOWN_P (mem))
18653 *size = MEM_SIZE (mem);
18654 else
18655 return false;
18656
18657 addr_rtx = (XEXP (mem, 0));
18658 if (GET_CODE (addr_rtx) == PRE_MODIFY)
18659 addr_rtx = XEXP (addr_rtx, 1);
18660
18661 *offset = 0;
18662 while (GET_CODE (addr_rtx) == PLUS
18663 && CONST_INT_P (XEXP (addr_rtx, 1)))
18664 {
18665 *offset += INTVAL (XEXP (addr_rtx, 1));
18666 addr_rtx = XEXP (addr_rtx, 0);
18667 }
18668 if (!REG_P (addr_rtx))
18669 return false;
18670
18671 *base = addr_rtx;
18672 return true;
18673 }
18674
18675 /* If the target storage locations of arguments MEM1 and MEM2 are
18676 adjacent, then return the argument that has the lower address.
18677 Otherwise, return NULL_RTX. */
18678
18679 static rtx
18680 adjacent_mem_locations (rtx mem1, rtx mem2)
18681 {
18682 rtx reg1, reg2;
18683 HOST_WIDE_INT off1, size1, off2, size2;
18684
18685 if (MEM_P (mem1)
18686 && MEM_P (mem2)
18687 && get_memref_parts (mem1, &reg1, &off1, &size1)
18688 && get_memref_parts (mem2, &reg2, &off2, &size2)
18689 && REGNO (reg1) == REGNO (reg2))
18690 {
18691 if (off1 + size1 == off2)
18692 return mem1;
18693 else if (off2 + size2 == off1)
18694 return mem2;
18695 }
18696
18697 return NULL_RTX;
18698 }
18699
18700 /* This function returns true if it can be determined that the two MEM
18701 locations overlap by at least 1 byte based on base reg/offset/size. */
18702
18703 static bool
18704 mem_locations_overlap (rtx mem1, rtx mem2)
18705 {
18706 rtx reg1, reg2;
18707 HOST_WIDE_INT off1, size1, off2, size2;
18708
18709 if (get_memref_parts (mem1, &reg1, &off1, &size1)
18710 && get_memref_parts (mem2, &reg2, &off2, &size2))
18711 return ((REGNO (reg1) == REGNO (reg2))
18712 && (((off1 <= off2) && (off1 + size1 > off2))
18713 || ((off2 <= off1) && (off2 + size2 > off1))));
18714
18715 return false;
18716 }
18717
18718 /* A C statement (sans semicolon) to update the integer scheduling
18719 priority INSN_PRIORITY (INSN). Increase the priority to execute the
18720 INSN earlier, reduce the priority to execute INSN later. Do not
18721 define this macro if you do not need to adjust the scheduling
18722 priorities of insns. */
18723
18724 static int
18725 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
18726 {
18727 rtx load_mem, str_mem;
18728 /* On machines (like the 750) which have asymmetric integer units,
18729 where one integer unit can do multiply and divides and the other
18730 can't, reduce the priority of multiply/divide so it is scheduled
18731 before other integer operations. */
18732
18733 #if 0
18734 if (! INSN_P (insn))
18735 return priority;
18736
18737 if (GET_CODE (PATTERN (insn)) == USE)
18738 return priority;
18739
18740 switch (rs6000_tune) {
18741 case PROCESSOR_PPC750:
18742 switch (get_attr_type (insn))
18743 {
18744 default:
18745 break;
18746
18747 case TYPE_MUL:
18748 case TYPE_DIV:
18749 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
18750 priority, priority);
18751 if (priority >= 0 && priority < 0x01000000)
18752 priority >>= 3;
18753 break;
18754 }
18755 }
18756 #endif
18757
18758 if (insn_must_be_first_in_group (insn)
18759 && reload_completed
18760 && current_sched_info->sched_max_insns_priority
18761 && rs6000_sched_restricted_insns_priority)
18762 {
18763
18764 /* Prioritize insns that can be dispatched only in the first
18765 dispatch slot. */
18766 if (rs6000_sched_restricted_insns_priority == 1)
18767 /* Attach highest priority to insn. This means that in
18768 haifa-sched.cc:ready_sort(), dispatch-slot restriction considerations
18769 precede 'priority' (critical path) considerations. */
18770 return current_sched_info->sched_max_insns_priority;
18771 else if (rs6000_sched_restricted_insns_priority == 2)
18772 /* Increase priority of insn by a minimal amount. This means that in
18773 haifa-sched.cc:ready_sort(), only 'priority' (critical path)
18774 considerations precede dispatch-slot restriction considerations. */
18775 return (priority + 1);
18776 }
18777
18778 if (rs6000_tune == PROCESSOR_POWER6
18779 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
18780 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
18781 /* Attach highest priority to insn if the scheduler has just issued two
18782 stores and this instruction is a load, or two loads and this instruction
18783 is a store. Power6 wants loads and stores scheduled alternately
18784 when possible */
18785 return current_sched_info->sched_max_insns_priority;
18786
18787 return priority;
18788 }
18789
18790 /* Return true if the instruction is nonpipelined on the Cell. */
18791 static bool
18792 is_nonpipeline_insn (rtx_insn *insn)
18793 {
18794 enum attr_type type;
18795 if (!insn || !NONDEBUG_INSN_P (insn)
18796 || GET_CODE (PATTERN (insn)) == USE
18797 || GET_CODE (PATTERN (insn)) == CLOBBER)
18798 return false;
18799
18800 type = get_attr_type (insn);
18801 if (type == TYPE_MUL
18802 || type == TYPE_DIV
18803 || type == TYPE_SDIV
18804 || type == TYPE_DDIV
18805 || type == TYPE_SSQRT
18806 || type == TYPE_DSQRT
18807 || type == TYPE_MFCR
18808 || type == TYPE_MFCRF
18809 || type == TYPE_MFJMPR)
18810 {
18811 return true;
18812 }
18813 return false;
18814 }
18815
18816
18817 /* Return how many instructions the machine can issue per cycle. */
18818
18819 static int
18820 rs6000_issue_rate (void)
18821 {
18822 /* Unless scheduling for register pressure, use issue rate of 1 for
18823 first scheduling pass to decrease degradation. */
18824 if (!reload_completed && !flag_sched_pressure)
18825 return 1;
18826
18827 switch (rs6000_tune) {
18828 case PROCESSOR_RS64A:
18829 case PROCESSOR_PPC601: /* ? */
18830 case PROCESSOR_PPC7450:
18831 return 3;
18832 case PROCESSOR_PPC440:
18833 case PROCESSOR_PPC603:
18834 case PROCESSOR_PPC750:
18835 case PROCESSOR_PPC7400:
18836 case PROCESSOR_PPC8540:
18837 case PROCESSOR_PPC8548:
18838 case PROCESSOR_CELL:
18839 case PROCESSOR_PPCE300C2:
18840 case PROCESSOR_PPCE300C3:
18841 case PROCESSOR_PPCE500MC:
18842 case PROCESSOR_PPCE500MC64:
18843 case PROCESSOR_PPCE5500:
18844 case PROCESSOR_PPCE6500:
18845 case PROCESSOR_TITAN:
18846 return 2;
18847 case PROCESSOR_PPC476:
18848 case PROCESSOR_PPC604:
18849 case PROCESSOR_PPC604e:
18850 case PROCESSOR_PPC620:
18851 case PROCESSOR_PPC630:
18852 return 4;
18853 case PROCESSOR_POWER4:
18854 case PROCESSOR_POWER5:
18855 case PROCESSOR_POWER6:
18856 case PROCESSOR_POWER7:
18857 return 5;
18858 case PROCESSOR_POWER8:
18859 return 7;
18860 case PROCESSOR_POWER9:
18861 return 6;
18862 case PROCESSOR_POWER10:
18863 case PROCESSOR_POWER11:
18864 return 8;
18865 default:
18866 return 1;
18867 }
18868 }
18869
18870 /* Return how many instructions to look ahead for better insn
18871 scheduling. */
18872
18873 static int
18874 rs6000_use_sched_lookahead (void)
18875 {
18876 switch (rs6000_tune)
18877 {
18878 case PROCESSOR_PPC8540:
18879 case PROCESSOR_PPC8548:
18880 return 4;
18881
18882 case PROCESSOR_CELL:
18883 return (reload_completed ? 8 : 0);
18884
18885 default:
18886 return 0;
18887 }
18888 }
18889
18890 /* We are choosing insn from the ready queue. Return zero if INSN can be
18891 chosen. */
18892 static int
18893 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
18894 {
18895 if (ready_index == 0)
18896 return 0;
18897
18898 if (rs6000_tune != PROCESSOR_CELL)
18899 return 0;
18900
18901 gcc_assert (insn != NULL_RTX && INSN_P (insn));
18902
18903 if (!reload_completed
18904 || is_nonpipeline_insn (insn)
18905 || is_microcoded_insn (insn))
18906 return 1;
18907
18908 return 0;
18909 }
18910
18911 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
18912 and return true. */
18913
18914 static bool
18915 find_mem_ref (rtx pat, rtx *mem_ref)
18916 {
18917 const char * fmt;
18918 int i, j;
18919
18920 /* stack_tie does not produce any real memory traffic. */
18921 if (tie_operand (pat, VOIDmode))
18922 return false;
18923
18924 if (MEM_P (pat))
18925 {
18926 *mem_ref = pat;
18927 return true;
18928 }
18929
18930 /* Recursively process the pattern. */
18931 fmt = GET_RTX_FORMAT (GET_CODE (pat));
18932
18933 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
18934 {
18935 if (fmt[i] == 'e')
18936 {
18937 if (find_mem_ref (XEXP (pat, i), mem_ref))
18938 return true;
18939 }
18940 else if (fmt[i] == 'E')
18941 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
18942 {
18943 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
18944 return true;
18945 }
18946 }
18947
18948 return false;
18949 }
18950
18951 /* Determine if PAT is a PATTERN of a load insn. */
18952
18953 static bool
18954 is_load_insn1 (rtx pat, rtx *load_mem)
18955 {
18956 if (!pat || pat == NULL_RTX)
18957 return false;
18958
18959 if (GET_CODE (pat) == SET)
18960 {
18961 if (REG_P (SET_DEST (pat)))
18962 return find_mem_ref (SET_SRC (pat), load_mem);
18963 else
18964 return false;
18965 }
18966
18967 if (GET_CODE (pat) == PARALLEL)
18968 {
18969 int i;
18970
18971 for (i = 0; i < XVECLEN (pat, 0); i++)
18972 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
18973 return true;
18974 }
18975
18976 return false;
18977 }
18978
18979 /* Determine if INSN loads from memory. */
18980
18981 static bool
18982 is_load_insn (rtx insn, rtx *load_mem)
18983 {
18984 if (!insn || !INSN_P (insn))
18985 return false;
18986
18987 if (CALL_P (insn))
18988 return false;
18989
18990 return is_load_insn1 (PATTERN (insn), load_mem);
18991 }
18992
18993 /* Determine if PAT is a PATTERN of a store insn. */
18994
18995 static bool
18996 is_store_insn1 (rtx pat, rtx *str_mem)
18997 {
18998 if (!pat || pat == NULL_RTX)
18999 return false;
19000
19001 if (GET_CODE (pat) == SET)
19002 {
19003 if (REG_P (SET_SRC (pat)) || SUBREG_P (SET_SRC (pat)))
19004 return find_mem_ref (SET_DEST (pat), str_mem);
19005 else
19006 return false;
19007 }
19008
19009 if (GET_CODE (pat) == PARALLEL)
19010 {
19011 int i;
19012
19013 for (i = 0; i < XVECLEN (pat, 0); i++)
19014 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
19015 return true;
19016 }
19017
19018 return false;
19019 }
19020
19021 /* Determine if INSN stores to memory. */
19022
19023 static bool
19024 is_store_insn (rtx insn, rtx *str_mem)
19025 {
19026 if (!insn || !INSN_P (insn))
19027 return false;
19028
19029 return is_store_insn1 (PATTERN (insn), str_mem);
19030 }
19031
19032 /* Return whether TYPE is a Power9 pairable vector instruction type. */
19033
19034 static bool
19035 is_power9_pairable_vec_type (enum attr_type type)
19036 {
19037 switch (type)
19038 {
19039 case TYPE_VECSIMPLE:
19040 case TYPE_VECCOMPLEX:
19041 case TYPE_VECDIV:
19042 case TYPE_VECCMP:
19043 case TYPE_VECPERM:
19044 case TYPE_VECFLOAT:
19045 case TYPE_VECFDIV:
19046 case TYPE_VECDOUBLE:
19047 return true;
19048 default:
19049 break;
19050 }
19051 return false;
19052 }
19053
19054 /* Returns whether the dependence between INSN and NEXT is considered
19055 costly by the given target. */
19056
19057 static bool
19058 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
19059 {
19060 rtx insn;
19061 rtx next;
19062 rtx load_mem, str_mem;
19063
19064 /* If the flag is not enabled - no dependence is considered costly;
19065 allow all dependent insns in the same group.
19066 This is the most aggressive option. */
19067 if (rs6000_sched_costly_dep == no_dep_costly)
19068 return false;
19069
19070 /* If the flag is set to 1 - a dependence is always considered costly;
19071 do not allow dependent instructions in the same group.
19072 This is the most conservative option. */
19073 if (rs6000_sched_costly_dep == all_deps_costly)
19074 return true;
19075
19076 insn = DEP_PRO (dep);
19077 next = DEP_CON (dep);
19078
19079 if (rs6000_sched_costly_dep == store_to_load_dep_costly
19080 && is_load_insn (next, &load_mem)
19081 && is_store_insn (insn, &str_mem))
19082 /* Prevent load after store in the same group. */
19083 return true;
19084
19085 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
19086 && is_load_insn (next, &load_mem)
19087 && is_store_insn (insn, &str_mem)
19088 && DEP_TYPE (dep) == REG_DEP_TRUE
19089 && mem_locations_overlap(str_mem, load_mem))
19090 /* Prevent load after store in the same group if it is a true
19091 dependence. */
19092 return true;
19093
19094 /* The flag is set to X; dependences with latency >= X are considered costly,
19095 and will not be scheduled in the same group. */
19096 if (rs6000_sched_costly_dep <= max_dep_latency
19097 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
19098 return true;
19099
19100 return false;
19101 }
19102
19103 /* Return the next insn after INSN that is found before TAIL is reached,
19104 skipping any "non-active" insns - insns that will not actually occupy
19105 an issue slot. Return NULL_RTX if such an insn is not found. */
19106
19107 static rtx_insn *
19108 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
19109 {
19110 if (insn == NULL_RTX || insn == tail)
19111 return NULL;
19112
19113 while (1)
19114 {
19115 insn = NEXT_INSN (insn);
19116 if (insn == NULL_RTX || insn == tail)
19117 return NULL;
19118
19119 if (CALL_P (insn)
19120 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
19121 || (NONJUMP_INSN_P (insn)
19122 && GET_CODE (PATTERN (insn)) != USE
19123 && GET_CODE (PATTERN (insn)) != CLOBBER
19124 && INSN_CODE (insn) != CODE_FOR_stack_tie))
19125 break;
19126 }
19127 return insn;
19128 }
19129
19130 /* Move instruction at POS to the end of the READY list. */
19131
19132 static void
19133 move_to_end_of_ready (rtx_insn **ready, int pos, int lastpos)
19134 {
19135 rtx_insn *tmp;
19136 int i;
19137
19138 tmp = ready[pos];
19139 for (i = pos; i < lastpos; i++)
19140 ready[i] = ready[i + 1];
19141 ready[lastpos] = tmp;
19142 }
19143
19144 /* Do Power6 specific sched_reorder2 reordering of ready list. */
19145
19146 static int
19147 power6_sched_reorder2 (rtx_insn **ready, int lastpos)
19148 {
19149 /* For Power6, we need to handle some special cases to try and keep the
19150 store queue from overflowing and triggering expensive flushes.
19151
19152 This code monitors how load and store instructions are being issued
19153 and skews the ready list one way or the other to increase the likelihood
19154 that a desired instruction is issued at the proper time.
19155
19156 A couple of things are done. First, we maintain a "load_store_pendulum"
19157 to track the current state of load/store issue.
19158
19159 - If the pendulum is at zero, then no loads or stores have been
19160 issued in the current cycle so we do nothing.
19161
19162 - If the pendulum is 1, then a single load has been issued in this
19163 cycle and we attempt to locate another load in the ready list to
19164 issue with it.
19165
19166 - If the pendulum is -2, then two stores have already been
19167 issued in this cycle, so we increase the priority of the first load
19168 in the ready list to increase it's likelihood of being chosen first
19169 in the next cycle.
19170
19171 - If the pendulum is -1, then a single store has been issued in this
19172 cycle and we attempt to locate another store in the ready list to
19173 issue with it, preferring a store to an adjacent memory location to
19174 facilitate store pairing in the store queue.
19175
19176 - If the pendulum is 2, then two loads have already been
19177 issued in this cycle, so we increase the priority of the first store
19178 in the ready list to increase it's likelihood of being chosen first
19179 in the next cycle.
19180
19181 - If the pendulum < -2 or > 2, then do nothing.
19182
19183 Note: This code covers the most common scenarios. There exist non
19184 load/store instructions which make use of the LSU and which
19185 would need to be accounted for to strictly model the behavior
19186 of the machine. Those instructions are currently unaccounted
19187 for to help minimize compile time overhead of this code.
19188 */
19189 int pos;
19190 rtx load_mem, str_mem;
19191
19192 if (is_store_insn (last_scheduled_insn, &str_mem))
19193 /* Issuing a store, swing the load_store_pendulum to the left */
19194 load_store_pendulum--;
19195 else if (is_load_insn (last_scheduled_insn, &load_mem))
19196 /* Issuing a load, swing the load_store_pendulum to the right */
19197 load_store_pendulum++;
19198 else
19199 return cached_can_issue_more;
19200
19201 /* If the pendulum is balanced, or there is only one instruction on
19202 the ready list, then all is well, so return. */
19203 if ((load_store_pendulum == 0) || (lastpos <= 0))
19204 return cached_can_issue_more;
19205
19206 if (load_store_pendulum == 1)
19207 {
19208 /* A load has been issued in this cycle. Scan the ready list
19209 for another load to issue with it */
19210 pos = lastpos;
19211
19212 while (pos >= 0)
19213 {
19214 if (is_load_insn (ready[pos], &load_mem))
19215 {
19216 /* Found a load. Move it to the head of the ready list,
19217 and adjust it's priority so that it is more likely to
19218 stay there */
19219 move_to_end_of_ready (ready, pos, lastpos);
19220
19221 if (!sel_sched_p ()
19222 && INSN_PRIORITY_KNOWN (ready[lastpos]))
19223 INSN_PRIORITY (ready[lastpos])++;
19224 break;
19225 }
19226 pos--;
19227 }
19228 }
19229 else if (load_store_pendulum == -2)
19230 {
19231 /* Two stores have been issued in this cycle. Increase the
19232 priority of the first load in the ready list to favor it for
19233 issuing in the next cycle. */
19234 pos = lastpos;
19235
19236 while (pos >= 0)
19237 {
19238 if (is_load_insn (ready[pos], &load_mem)
19239 && !sel_sched_p ()
19240 && INSN_PRIORITY_KNOWN (ready[pos]))
19241 {
19242 INSN_PRIORITY (ready[pos])++;
19243
19244 /* Adjust the pendulum to account for the fact that a load
19245 was found and increased in priority. This is to prevent
19246 increasing the priority of multiple loads */
19247 load_store_pendulum--;
19248
19249 break;
19250 }
19251 pos--;
19252 }
19253 }
19254 else if (load_store_pendulum == -1)
19255 {
19256 /* A store has been issued in this cycle. Scan the ready list for
19257 another store to issue with it, preferring a store to an adjacent
19258 memory location */
19259 int first_store_pos = -1;
19260
19261 pos = lastpos;
19262
19263 while (pos >= 0)
19264 {
19265 if (is_store_insn (ready[pos], &str_mem))
19266 {
19267 rtx str_mem2;
19268 /* Maintain the index of the first store found on the
19269 list */
19270 if (first_store_pos == -1)
19271 first_store_pos = pos;
19272
19273 if (is_store_insn (last_scheduled_insn, &str_mem2)
19274 && adjacent_mem_locations (str_mem, str_mem2))
19275 {
19276 /* Found an adjacent store. Move it to the head of the
19277 ready list, and adjust it's priority so that it is
19278 more likely to stay there */
19279 move_to_end_of_ready (ready, pos, lastpos);
19280
19281 if (!sel_sched_p ()
19282 && INSN_PRIORITY_KNOWN (ready[lastpos]))
19283 INSN_PRIORITY (ready[lastpos])++;
19284
19285 first_store_pos = -1;
19286
19287 break;
19288 };
19289 }
19290 pos--;
19291 }
19292
19293 if (first_store_pos >= 0)
19294 {
19295 /* An adjacent store wasn't found, but a non-adjacent store was,
19296 so move the non-adjacent store to the front of the ready
19297 list, and adjust its priority so that it is more likely to
19298 stay there. */
19299 move_to_end_of_ready (ready, first_store_pos, lastpos);
19300 if (!sel_sched_p ()
19301 && INSN_PRIORITY_KNOWN (ready[lastpos]))
19302 INSN_PRIORITY (ready[lastpos])++;
19303 }
19304 }
19305 else if (load_store_pendulum == 2)
19306 {
19307 /* Two loads have been issued in this cycle. Increase the priority
19308 of the first store in the ready list to favor it for issuing in
19309 the next cycle. */
19310 pos = lastpos;
19311
19312 while (pos >= 0)
19313 {
19314 if (is_store_insn (ready[pos], &str_mem)
19315 && !sel_sched_p ()
19316 && INSN_PRIORITY_KNOWN (ready[pos]))
19317 {
19318 INSN_PRIORITY (ready[pos])++;
19319
19320 /* Adjust the pendulum to account for the fact that a store
19321 was found and increased in priority. This is to prevent
19322 increasing the priority of multiple stores */
19323 load_store_pendulum++;
19324
19325 break;
19326 }
19327 pos--;
19328 }
19329 }
19330
19331 return cached_can_issue_more;
19332 }
19333
19334 /* Do Power9 specific sched_reorder2 reordering of ready list. */
19335
19336 static int
19337 power9_sched_reorder2 (rtx_insn **ready, int lastpos)
19338 {
19339 int pos;
19340 enum attr_type type, type2;
19341
19342 type = get_attr_type (last_scheduled_insn);
19343
19344 /* Try to issue fixed point divides back-to-back in pairs so they will be
19345 routed to separate execution units and execute in parallel. */
19346 if (type == TYPE_DIV && divide_cnt == 0)
19347 {
19348 /* First divide has been scheduled. */
19349 divide_cnt = 1;
19350
19351 /* Scan the ready list looking for another divide, if found move it
19352 to the end of the list so it is chosen next. */
19353 pos = lastpos;
19354 while (pos >= 0)
19355 {
19356 if (recog_memoized (ready[pos]) >= 0
19357 && get_attr_type (ready[pos]) == TYPE_DIV)
19358 {
19359 move_to_end_of_ready (ready, pos, lastpos);
19360 break;
19361 }
19362 pos--;
19363 }
19364 }
19365 else
19366 {
19367 /* Last insn was the 2nd divide or not a divide, reset the counter. */
19368 divide_cnt = 0;
19369
19370 /* The best dispatch throughput for vector and vector load insns can be
19371 achieved by interleaving a vector and vector load such that they'll
19372 dispatch to the same superslice. If this pairing cannot be achieved
19373 then it is best to pair vector insns together and vector load insns
19374 together.
19375
19376 To aid in this pairing, vec_pairing maintains the current state with
19377 the following values:
19378
19379 0 : Initial state, no vecload/vector pairing has been started.
19380
19381 1 : A vecload or vector insn has been issued and a candidate for
19382 pairing has been found and moved to the end of the ready
19383 list. */
19384 if (type == TYPE_VECLOAD)
19385 {
19386 /* Issued a vecload. */
19387 if (vec_pairing == 0)
19388 {
19389 int vecload_pos = -1;
19390 /* We issued a single vecload, look for a vector insn to pair it
19391 with. If one isn't found, try to pair another vecload. */
19392 pos = lastpos;
19393 while (pos >= 0)
19394 {
19395 if (recog_memoized (ready[pos]) >= 0)
19396 {
19397 type2 = get_attr_type (ready[pos]);
19398 if (is_power9_pairable_vec_type (type2))
19399 {
19400 /* Found a vector insn to pair with, move it to the
19401 end of the ready list so it is scheduled next. */
19402 move_to_end_of_ready (ready, pos, lastpos);
19403 vec_pairing = 1;
19404 return cached_can_issue_more;
19405 }
19406 else if (type2 == TYPE_VECLOAD && vecload_pos == -1)
19407 /* Remember position of first vecload seen. */
19408 vecload_pos = pos;
19409 }
19410 pos--;
19411 }
19412 if (vecload_pos >= 0)
19413 {
19414 /* Didn't find a vector to pair with but did find a vecload,
19415 move it to the end of the ready list. */
19416 move_to_end_of_ready (ready, vecload_pos, lastpos);
19417 vec_pairing = 1;
19418 return cached_can_issue_more;
19419 }
19420 }
19421 }
19422 else if (is_power9_pairable_vec_type (type))
19423 {
19424 /* Issued a vector operation. */
19425 if (vec_pairing == 0)
19426 {
19427 int vec_pos = -1;
19428 /* We issued a single vector insn, look for a vecload to pair it
19429 with. If one isn't found, try to pair another vector. */
19430 pos = lastpos;
19431 while (pos >= 0)
19432 {
19433 if (recog_memoized (ready[pos]) >= 0)
19434 {
19435 type2 = get_attr_type (ready[pos]);
19436 if (type2 == TYPE_VECLOAD)
19437 {
19438 /* Found a vecload insn to pair with, move it to the
19439 end of the ready list so it is scheduled next. */
19440 move_to_end_of_ready (ready, pos, lastpos);
19441 vec_pairing = 1;
19442 return cached_can_issue_more;
19443 }
19444 else if (is_power9_pairable_vec_type (type2)
19445 && vec_pos == -1)
19446 /* Remember position of first vector insn seen. */
19447 vec_pos = pos;
19448 }
19449 pos--;
19450 }
19451 if (vec_pos >= 0)
19452 {
19453 /* Didn't find a vecload to pair with but did find a vector
19454 insn, move it to the end of the ready list. */
19455 move_to_end_of_ready (ready, vec_pos, lastpos);
19456 vec_pairing = 1;
19457 return cached_can_issue_more;
19458 }
19459 }
19460 }
19461
19462 /* We've either finished a vec/vecload pair, couldn't find an insn to
19463 continue the current pair, or the last insn had nothing to do with
19464 with pairing. In any case, reset the state. */
19465 vec_pairing = 0;
19466 }
19467
19468 return cached_can_issue_more;
19469 }
19470
19471 /* Determine if INSN is a store to memory that can be fused with a similar
19472 adjacent store. */
19473
19474 static bool
19475 is_fusable_store (rtx_insn *insn, rtx *str_mem)
19476 {
19477 /* Insn must be a non-prefixed base+disp form store. */
19478 if (is_store_insn (insn, str_mem)
19479 && get_attr_prefixed (insn) == PREFIXED_NO
19480 && get_attr_update (insn) == UPDATE_NO
19481 && get_attr_indexed (insn) == INDEXED_NO)
19482 {
19483 /* Further restrictions by mode and size. */
19484 if (!MEM_SIZE_KNOWN_P (*str_mem))
19485 return false;
19486
19487 machine_mode mode = GET_MODE (*str_mem);
19488 HOST_WIDE_INT size = MEM_SIZE (*str_mem);
19489
19490 if (INTEGRAL_MODE_P (mode))
19491 /* Must be word or dword size. */
19492 return (size == 4 || size == 8);
19493 else if (FLOAT_MODE_P (mode))
19494 /* Must be dword size. */
19495 return (size == 8);
19496 }
19497
19498 return false;
19499 }
19500
19501 /* Do Power10 specific reordering of the ready list. */
19502
19503 static int
19504 power10_sched_reorder (rtx_insn **ready, int lastpos)
19505 {
19506 rtx mem1;
19507
19508 /* Do store fusion during sched2 only. */
19509 if (!reload_completed)
19510 return cached_can_issue_more;
19511
19512 /* If the prior insn finished off a store fusion pair then simply
19513 reset the counter and return, nothing more to do. */
19514 if (load_store_pendulum != 0)
19515 {
19516 load_store_pendulum = 0;
19517 return cached_can_issue_more;
19518 }
19519
19520 /* Try to pair certain store insns to adjacent memory locations
19521 so that the hardware will fuse them to a single operation. */
19522 if (TARGET_P10_FUSION && is_fusable_store (last_scheduled_insn, &mem1))
19523 {
19524
19525 /* A fusable store was just scheduled. Scan the ready list for another
19526 store that it can fuse with. */
19527 int pos = lastpos;
19528 while (pos >= 0)
19529 {
19530 rtx mem2;
19531 /* GPR stores can be ascending or descending offsets, FPR/VSR stores
19532 must be ascending only. */
19533 if (is_fusable_store (ready[pos], &mem2)
19534 && ((INTEGRAL_MODE_P (GET_MODE (mem1))
19535 && adjacent_mem_locations (mem1, mem2))
19536 || (FLOAT_MODE_P (GET_MODE (mem1))
19537 && (adjacent_mem_locations (mem1, mem2) == mem1))))
19538 {
19539 /* Found a fusable store. Move it to the end of the ready list
19540 so it is scheduled next. */
19541 move_to_end_of_ready (ready, pos, lastpos);
19542
19543 load_store_pendulum = -1;
19544 break;
19545 }
19546 pos--;
19547 }
19548 }
19549
19550 return cached_can_issue_more;
19551 }
19552
19553 /* We are about to begin issuing insns for this clock cycle. */
19554
19555 static int
19556 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
19557 rtx_insn **ready ATTRIBUTE_UNUSED,
19558 int *pn_ready ATTRIBUTE_UNUSED,
19559 int clock_var ATTRIBUTE_UNUSED)
19560 {
19561 int n_ready = *pn_ready;
19562
19563 if (sched_verbose)
19564 fprintf (dump, "// rs6000_sched_reorder :\n");
19565
19566 /* Reorder the ready list, if the second to last ready insn
19567 is a nonepipeline insn. */
19568 if (rs6000_tune == PROCESSOR_CELL && n_ready > 1)
19569 {
19570 if (is_nonpipeline_insn (ready[n_ready - 1])
19571 && (recog_memoized (ready[n_ready - 2]) > 0))
19572 /* Simply swap first two insns. */
19573 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
19574 }
19575
19576 if (rs6000_tune == PROCESSOR_POWER6)
19577 load_store_pendulum = 0;
19578
19579 /* Do Power10/Power11 dependent reordering. */
19580 if (last_scheduled_insn
19581 && (rs6000_tune == PROCESSOR_POWER10
19582 || rs6000_tune == PROCESSOR_POWER11))
19583 power10_sched_reorder (ready, n_ready - 1);
19584
19585 return rs6000_issue_rate ();
19586 }
19587
19588 /* Like rs6000_sched_reorder, but called after issuing each insn. */
19589
19590 static int
19591 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
19592 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
19593 {
19594 if (sched_verbose)
19595 fprintf (dump, "// rs6000_sched_reorder2 :\n");
19596
19597 /* Do Power6 dependent reordering if necessary. */
19598 if (rs6000_tune == PROCESSOR_POWER6 && last_scheduled_insn)
19599 return power6_sched_reorder2 (ready, *pn_ready - 1);
19600
19601 /* Do Power9 dependent reordering if necessary. */
19602 if (rs6000_tune == PROCESSOR_POWER9 && last_scheduled_insn
19603 && recog_memoized (last_scheduled_insn) >= 0)
19604 return power9_sched_reorder2 (ready, *pn_ready - 1);
19605
19606 /* Do Power10/Power11 dependent reordering. */
19607 if (last_scheduled_insn
19608 && (rs6000_tune == PROCESSOR_POWER10
19609 || rs6000_tune == PROCESSOR_POWER11))
19610 return power10_sched_reorder (ready, *pn_ready - 1);
19611
19612 return cached_can_issue_more;
19613 }
19614
19615 /* Return whether the presence of INSN causes a dispatch group termination
19616 of group WHICH_GROUP.
19617
19618 If WHICH_GROUP == current_group, this function will return true if INSN
19619 causes the termination of the current group (i.e, the dispatch group to
19620 which INSN belongs). This means that INSN will be the last insn in the
19621 group it belongs to.
19622
19623 If WHICH_GROUP == previous_group, this function will return true if INSN
19624 causes the termination of the previous group (i.e, the dispatch group that
19625 precedes the group to which INSN belongs). This means that INSN will be
19626 the first insn in the group it belongs to). */
19627
19628 static bool
19629 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
19630 {
19631 bool first, last;
19632
19633 if (! insn)
19634 return false;
19635
19636 first = insn_must_be_first_in_group (insn);
19637 last = insn_must_be_last_in_group (insn);
19638
19639 if (first && last)
19640 return true;
19641
19642 if (which_group == current_group)
19643 return last;
19644 else if (which_group == previous_group)
19645 return first;
19646
19647 return false;
19648 }
19649
19650
19651 static bool
19652 insn_must_be_first_in_group (rtx_insn *insn)
19653 {
19654 enum attr_type type;
19655
19656 if (!insn
19657 || NOTE_P (insn)
19658 || DEBUG_INSN_P (insn)
19659 || GET_CODE (PATTERN (insn)) == USE
19660 || GET_CODE (PATTERN (insn)) == CLOBBER)
19661 return false;
19662
19663 switch (rs6000_tune)
19664 {
19665 case PROCESSOR_POWER5:
19666 if (is_cracked_insn (insn))
19667 return true;
19668 /* FALLTHRU */
19669 case PROCESSOR_POWER4:
19670 if (is_microcoded_insn (insn))
19671 return true;
19672
19673 if (!rs6000_sched_groups)
19674 return false;
19675
19676 type = get_attr_type (insn);
19677
19678 switch (type)
19679 {
19680 case TYPE_MFCR:
19681 case TYPE_MFCRF:
19682 case TYPE_MTCR:
19683 case TYPE_CR_LOGICAL:
19684 case TYPE_MTJMPR:
19685 case TYPE_MFJMPR:
19686 case TYPE_DIV:
19687 case TYPE_LOAD_L:
19688 case TYPE_STORE_C:
19689 case TYPE_ISYNC:
19690 case TYPE_SYNC:
19691 return true;
19692 default:
19693 break;
19694 }
19695 break;
19696 case PROCESSOR_POWER6:
19697 type = get_attr_type (insn);
19698
19699 switch (type)
19700 {
19701 case TYPE_EXTS:
19702 case TYPE_CNTLZ:
19703 case TYPE_TRAP:
19704 case TYPE_MUL:
19705 case TYPE_INSERT:
19706 case TYPE_FPCOMPARE:
19707 case TYPE_MFCR:
19708 case TYPE_MTCR:
19709 case TYPE_MFJMPR:
19710 case TYPE_MTJMPR:
19711 case TYPE_ISYNC:
19712 case TYPE_SYNC:
19713 case TYPE_LOAD_L:
19714 case TYPE_STORE_C:
19715 return true;
19716 case TYPE_SHIFT:
19717 if (get_attr_dot (insn) == DOT_NO
19718 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
19719 return true;
19720 else
19721 break;
19722 case TYPE_DIV:
19723 if (get_attr_size (insn) == SIZE_32)
19724 return true;
19725 else
19726 break;
19727 case TYPE_LOAD:
19728 case TYPE_STORE:
19729 case TYPE_FPLOAD:
19730 case TYPE_FPSTORE:
19731 if (get_attr_update (insn) == UPDATE_YES)
19732 return true;
19733 else
19734 break;
19735 default:
19736 break;
19737 }
19738 break;
19739 case PROCESSOR_POWER7:
19740 type = get_attr_type (insn);
19741
19742 switch (type)
19743 {
19744 case TYPE_CR_LOGICAL:
19745 case TYPE_MFCR:
19746 case TYPE_MFCRF:
19747 case TYPE_MTCR:
19748 case TYPE_DIV:
19749 case TYPE_ISYNC:
19750 case TYPE_LOAD_L:
19751 case TYPE_STORE_C:
19752 case TYPE_MFJMPR:
19753 case TYPE_MTJMPR:
19754 return true;
19755 case TYPE_MUL:
19756 case TYPE_SHIFT:
19757 case TYPE_EXTS:
19758 if (get_attr_dot (insn) == DOT_YES)
19759 return true;
19760 else
19761 break;
19762 case TYPE_LOAD:
19763 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19764 || get_attr_update (insn) == UPDATE_YES)
19765 return true;
19766 else
19767 break;
19768 case TYPE_STORE:
19769 case TYPE_FPLOAD:
19770 case TYPE_FPSTORE:
19771 if (get_attr_update (insn) == UPDATE_YES)
19772 return true;
19773 else
19774 break;
19775 default:
19776 break;
19777 }
19778 break;
19779 case PROCESSOR_POWER8:
19780 type = get_attr_type (insn);
19781
19782 switch (type)
19783 {
19784 case TYPE_CR_LOGICAL:
19785 case TYPE_MFCR:
19786 case TYPE_MFCRF:
19787 case TYPE_MTCR:
19788 case TYPE_SYNC:
19789 case TYPE_ISYNC:
19790 case TYPE_LOAD_L:
19791 case TYPE_STORE_C:
19792 case TYPE_VECSTORE:
19793 case TYPE_MFJMPR:
19794 case TYPE_MTJMPR:
19795 return true;
19796 case TYPE_SHIFT:
19797 case TYPE_EXTS:
19798 case TYPE_MUL:
19799 if (get_attr_dot (insn) == DOT_YES)
19800 return true;
19801 else
19802 break;
19803 case TYPE_LOAD:
19804 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19805 || get_attr_update (insn) == UPDATE_YES)
19806 return true;
19807 else
19808 break;
19809 case TYPE_STORE:
19810 if (get_attr_update (insn) == UPDATE_YES
19811 && get_attr_indexed (insn) == INDEXED_YES)
19812 return true;
19813 else
19814 break;
19815 default:
19816 break;
19817 }
19818 break;
19819 default:
19820 break;
19821 }
19822
19823 return false;
19824 }
19825
19826 static bool
19827 insn_must_be_last_in_group (rtx_insn *insn)
19828 {
19829 enum attr_type type;
19830
19831 if (!insn
19832 || NOTE_P (insn)
19833 || DEBUG_INSN_P (insn)
19834 || GET_CODE (PATTERN (insn)) == USE
19835 || GET_CODE (PATTERN (insn)) == CLOBBER)
19836 return false;
19837
19838 switch (rs6000_tune) {
19839 case PROCESSOR_POWER4:
19840 case PROCESSOR_POWER5:
19841 if (is_microcoded_insn (insn))
19842 return true;
19843
19844 if (is_branch_slot_insn (insn))
19845 return true;
19846
19847 break;
19848 case PROCESSOR_POWER6:
19849 type = get_attr_type (insn);
19850
19851 switch (type)
19852 {
19853 case TYPE_EXTS:
19854 case TYPE_CNTLZ:
19855 case TYPE_TRAP:
19856 case TYPE_MUL:
19857 case TYPE_FPCOMPARE:
19858 case TYPE_MFCR:
19859 case TYPE_MTCR:
19860 case TYPE_MFJMPR:
19861 case TYPE_MTJMPR:
19862 case TYPE_ISYNC:
19863 case TYPE_SYNC:
19864 case TYPE_LOAD_L:
19865 case TYPE_STORE_C:
19866 return true;
19867 case TYPE_SHIFT:
19868 if (get_attr_dot (insn) == DOT_NO
19869 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
19870 return true;
19871 else
19872 break;
19873 case TYPE_DIV:
19874 if (get_attr_size (insn) == SIZE_32)
19875 return true;
19876 else
19877 break;
19878 default:
19879 break;
19880 }
19881 break;
19882 case PROCESSOR_POWER7:
19883 type = get_attr_type (insn);
19884
19885 switch (type)
19886 {
19887 case TYPE_ISYNC:
19888 case TYPE_SYNC:
19889 case TYPE_LOAD_L:
19890 case TYPE_STORE_C:
19891 return true;
19892 case TYPE_LOAD:
19893 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19894 && get_attr_update (insn) == UPDATE_YES)
19895 return true;
19896 else
19897 break;
19898 case TYPE_STORE:
19899 if (get_attr_update (insn) == UPDATE_YES
19900 && get_attr_indexed (insn) == INDEXED_YES)
19901 return true;
19902 else
19903 break;
19904 default:
19905 break;
19906 }
19907 break;
19908 case PROCESSOR_POWER8:
19909 type = get_attr_type (insn);
19910
19911 switch (type)
19912 {
19913 case TYPE_MFCR:
19914 case TYPE_MTCR:
19915 case TYPE_ISYNC:
19916 case TYPE_SYNC:
19917 case TYPE_LOAD_L:
19918 case TYPE_STORE_C:
19919 return true;
19920 case TYPE_LOAD:
19921 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19922 && get_attr_update (insn) == UPDATE_YES)
19923 return true;
19924 else
19925 break;
19926 case TYPE_STORE:
19927 if (get_attr_update (insn) == UPDATE_YES
19928 && get_attr_indexed (insn) == INDEXED_YES)
19929 return true;
19930 else
19931 break;
19932 default:
19933 break;
19934 }
19935 break;
19936 default:
19937 break;
19938 }
19939
19940 return false;
19941 }
19942
19943 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
19944 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
19945
19946 static bool
19947 is_costly_group (rtx *group_insns, rtx next_insn)
19948 {
19949 int i;
19950 int issue_rate = rs6000_issue_rate ();
19951
19952 for (i = 0; i < issue_rate; i++)
19953 {
19954 sd_iterator_def sd_it;
19955 dep_t dep;
19956 rtx insn = group_insns[i];
19957
19958 if (!insn)
19959 continue;
19960
19961 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
19962 {
19963 rtx next = DEP_CON (dep);
19964
19965 if (next == next_insn
19966 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
19967 return true;
19968 }
19969 }
19970
19971 return false;
19972 }
19973
19974 /* Utility of the function redefine_groups.
19975 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
19976 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
19977 to keep it "far" (in a separate group) from GROUP_INSNS, following
19978 one of the following schemes, depending on the value of the flag
19979 -minsert_sched_nops = X:
19980 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
19981 in order to force NEXT_INSN into a separate group.
19982 (2) X < sched_finish_regroup_exact: insert exactly X nops.
19983 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
19984 insertion (has a group just ended, how many vacant issue slots remain in the
19985 last group, and how many dispatch groups were encountered so far). */
19986
19987 static int
19988 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
19989 rtx_insn *next_insn, bool *group_end, int can_issue_more,
19990 int *group_count)
19991 {
19992 rtx nop;
19993 bool force;
19994 int issue_rate = rs6000_issue_rate ();
19995 bool end = *group_end;
19996 int i;
19997
19998 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
19999 return can_issue_more;
20000
20001 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
20002 return can_issue_more;
20003
20004 force = is_costly_group (group_insns, next_insn);
20005 if (!force)
20006 return can_issue_more;
20007
20008 if (sched_verbose > 6)
20009 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
20010 *group_count ,can_issue_more);
20011
20012 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
20013 {
20014 if (*group_end)
20015 can_issue_more = 0;
20016
20017 /* Since only a branch can be issued in the last issue_slot, it is
20018 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
20019 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
20020 in this case the last nop will start a new group and the branch
20021 will be forced to the new group. */
20022 if (can_issue_more && !is_branch_slot_insn (next_insn))
20023 can_issue_more--;
20024
20025 /* Do we have a special group ending nop? */
20026 if (rs6000_tune == PROCESSOR_POWER6 || rs6000_tune == PROCESSOR_POWER7
20027 || rs6000_tune == PROCESSOR_POWER8)
20028 {
20029 nop = gen_group_ending_nop ();
20030 emit_insn_before (nop, next_insn);
20031 can_issue_more = 0;
20032 }
20033 else
20034 while (can_issue_more > 0)
20035 {
20036 nop = gen_nop ();
20037 emit_insn_before (nop, next_insn);
20038 can_issue_more--;
20039 }
20040
20041 *group_end = true;
20042 return 0;
20043 }
20044
20045 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
20046 {
20047 int n_nops = rs6000_sched_insert_nops;
20048
20049 /* Nops can't be issued from the branch slot, so the effective
20050 issue_rate for nops is 'issue_rate - 1'. */
20051 if (can_issue_more == 0)
20052 can_issue_more = issue_rate;
20053 can_issue_more--;
20054 if (can_issue_more == 0)
20055 {
20056 can_issue_more = issue_rate - 1;
20057 (*group_count)++;
20058 end = true;
20059 for (i = 0; i < issue_rate; i++)
20060 {
20061 group_insns[i] = 0;
20062 }
20063 }
20064
20065 while (n_nops > 0)
20066 {
20067 nop = gen_nop ();
20068 emit_insn_before (nop, next_insn);
20069 if (can_issue_more == issue_rate - 1) /* new group begins */
20070 end = false;
20071 can_issue_more--;
20072 if (can_issue_more == 0)
20073 {
20074 can_issue_more = issue_rate - 1;
20075 (*group_count)++;
20076 end = true;
20077 for (i = 0; i < issue_rate; i++)
20078 {
20079 group_insns[i] = 0;
20080 }
20081 }
20082 n_nops--;
20083 }
20084
20085 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
20086 can_issue_more++;
20087
20088 /* Is next_insn going to start a new group? */
20089 *group_end
20090 = (end
20091 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
20092 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
20093 || (can_issue_more < issue_rate &&
20094 insn_terminates_group_p (next_insn, previous_group)));
20095 if (*group_end && end)
20096 (*group_count)--;
20097
20098 if (sched_verbose > 6)
20099 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
20100 *group_count, can_issue_more);
20101 return can_issue_more;
20102 }
20103
20104 return can_issue_more;
20105 }
20106
20107 /* This function tries to synch the dispatch groups that the compiler "sees"
20108 with the dispatch groups that the processor dispatcher is expected to
20109 form in practice. It tries to achieve this synchronization by forcing the
20110 estimated processor grouping on the compiler (as opposed to the function
20111 'pad_goups' which tries to force the scheduler's grouping on the processor).
20112
20113 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
20114 examines the (estimated) dispatch groups that will be formed by the processor
20115 dispatcher. It marks these group boundaries to reflect the estimated
20116 processor grouping, overriding the grouping that the scheduler had marked.
20117 Depending on the value of the flag '-minsert-sched-nops' this function can
20118 force certain insns into separate groups or force a certain distance between
20119 them by inserting nops, for example, if there exists a "costly dependence"
20120 between the insns.
20121
20122 The function estimates the group boundaries that the processor will form as
20123 follows: It keeps track of how many vacant issue slots are available after
20124 each insn. A subsequent insn will start a new group if one of the following
20125 4 cases applies:
20126 - no more vacant issue slots remain in the current dispatch group.
20127 - only the last issue slot, which is the branch slot, is vacant, but the next
20128 insn is not a branch.
20129 - only the last 2 or less issue slots, including the branch slot, are vacant,
20130 which means that a cracked insn (which occupies two issue slots) can't be
20131 issued in this group.
20132 - less than 'issue_rate' slots are vacant, and the next insn always needs to
20133 start a new group. */
20134
20135 static int
20136 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
20137 rtx_insn *tail)
20138 {
20139 rtx_insn *insn, *next_insn;
20140 int issue_rate;
20141 int can_issue_more;
20142 int slot, i;
20143 bool group_end;
20144 int group_count = 0;
20145 rtx *group_insns;
20146
20147 /* Initialize. */
20148 issue_rate = rs6000_issue_rate ();
20149 group_insns = XALLOCAVEC (rtx, issue_rate);
20150 for (i = 0; i < issue_rate; i++)
20151 {
20152 group_insns[i] = 0;
20153 }
20154 can_issue_more = issue_rate;
20155 slot = 0;
20156 insn = get_next_active_insn (prev_head_insn, tail);
20157 group_end = false;
20158
20159 while (insn != NULL_RTX)
20160 {
20161 slot = (issue_rate - can_issue_more);
20162 group_insns[slot] = insn;
20163 can_issue_more =
20164 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
20165 if (insn_terminates_group_p (insn, current_group))
20166 can_issue_more = 0;
20167
20168 next_insn = get_next_active_insn (insn, tail);
20169 if (next_insn == NULL_RTX)
20170 return group_count + 1;
20171
20172 /* Is next_insn going to start a new group? */
20173 group_end
20174 = (can_issue_more == 0
20175 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
20176 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
20177 || (can_issue_more < issue_rate &&
20178 insn_terminates_group_p (next_insn, previous_group)));
20179
20180 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
20181 next_insn, &group_end, can_issue_more,
20182 &group_count);
20183
20184 if (group_end)
20185 {
20186 group_count++;
20187 can_issue_more = 0;
20188 for (i = 0; i < issue_rate; i++)
20189 {
20190 group_insns[i] = 0;
20191 }
20192 }
20193
20194 if (GET_MODE (next_insn) == TImode && can_issue_more)
20195 PUT_MODE (next_insn, VOIDmode);
20196 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
20197 PUT_MODE (next_insn, TImode);
20198
20199 insn = next_insn;
20200 if (can_issue_more == 0)
20201 can_issue_more = issue_rate;
20202 } /* while */
20203
20204 return group_count;
20205 }
20206
20207 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
20208 dispatch group boundaries that the scheduler had marked. Pad with nops
20209 any dispatch groups which have vacant issue slots, in order to force the
20210 scheduler's grouping on the processor dispatcher. The function
20211 returns the number of dispatch groups found. */
20212
20213 static int
20214 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
20215 rtx_insn *tail)
20216 {
20217 rtx_insn *insn, *next_insn;
20218 rtx nop;
20219 int issue_rate;
20220 int can_issue_more;
20221 int group_end;
20222 int group_count = 0;
20223
20224 /* Initialize issue_rate. */
20225 issue_rate = rs6000_issue_rate ();
20226 can_issue_more = issue_rate;
20227
20228 insn = get_next_active_insn (prev_head_insn, tail);
20229 next_insn = get_next_active_insn (insn, tail);
20230
20231 while (insn != NULL_RTX)
20232 {
20233 can_issue_more =
20234 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
20235
20236 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
20237
20238 if (next_insn == NULL_RTX)
20239 break;
20240
20241 if (group_end)
20242 {
20243 /* If the scheduler had marked group termination at this location
20244 (between insn and next_insn), and neither insn nor next_insn will
20245 force group termination, pad the group with nops to force group
20246 termination. */
20247 if (can_issue_more
20248 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
20249 && !insn_terminates_group_p (insn, current_group)
20250 && !insn_terminates_group_p (next_insn, previous_group))
20251 {
20252 if (!is_branch_slot_insn (next_insn))
20253 can_issue_more--;
20254
20255 while (can_issue_more)
20256 {
20257 nop = gen_nop ();
20258 emit_insn_before (nop, next_insn);
20259 can_issue_more--;
20260 }
20261 }
20262
20263 can_issue_more = issue_rate;
20264 group_count++;
20265 }
20266
20267 insn = next_insn;
20268 next_insn = get_next_active_insn (insn, tail);
20269 }
20270
20271 return group_count;
20272 }
20273
20274 /* We're beginning a new block. Initialize data structures as necessary. */
20275
20276 static void
20277 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
20278 int sched_verbose ATTRIBUTE_UNUSED,
20279 int max_ready ATTRIBUTE_UNUSED)
20280 {
20281 last_scheduled_insn = NULL;
20282 load_store_pendulum = 0;
20283 divide_cnt = 0;
20284 vec_pairing = 0;
20285 }
20286
20287 /* The following function is called at the end of scheduling BB.
20288 After reload, it inserts nops at insn group bundling. */
20289
20290 static void
20291 rs6000_sched_finish (FILE *dump, int sched_verbose)
20292 {
20293 int n_groups;
20294
20295 if (sched_verbose)
20296 fprintf (dump, "=== Finishing schedule.\n");
20297
20298 if (reload_completed && rs6000_sched_groups)
20299 {
20300 /* Do not run sched_finish hook when selective scheduling enabled. */
20301 if (sel_sched_p ())
20302 return;
20303
20304 if (rs6000_sched_insert_nops == sched_finish_none)
20305 return;
20306
20307 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
20308 n_groups = pad_groups (dump, sched_verbose,
20309 current_sched_info->prev_head,
20310 current_sched_info->next_tail);
20311 else
20312 n_groups = redefine_groups (dump, sched_verbose,
20313 current_sched_info->prev_head,
20314 current_sched_info->next_tail);
20315
20316 if (sched_verbose >= 6)
20317 {
20318 fprintf (dump, "ngroups = %d\n", n_groups);
20319 print_rtl (dump, current_sched_info->prev_head);
20320 fprintf (dump, "Done finish_sched\n");
20321 }
20322 }
20323 }
20324
20325 struct rs6000_sched_context
20326 {
20327 short cached_can_issue_more;
20328 rtx_insn *last_scheduled_insn;
20329 int load_store_pendulum;
20330 int divide_cnt;
20331 int vec_pairing;
20332 };
20333
20334 typedef struct rs6000_sched_context rs6000_sched_context_def;
20335 typedef rs6000_sched_context_def *rs6000_sched_context_t;
20336
20337 /* Allocate store for new scheduling context. */
20338 static void *
20339 rs6000_alloc_sched_context (void)
20340 {
20341 return xmalloc (sizeof (rs6000_sched_context_def));
20342 }
20343
20344 /* If CLEAN_P is true then initializes _SC with clean data,
20345 and from the global context otherwise. */
20346 static void
20347 rs6000_init_sched_context (void *_sc, bool clean_p)
20348 {
20349 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
20350
20351 if (clean_p)
20352 {
20353 sc->cached_can_issue_more = 0;
20354 sc->last_scheduled_insn = NULL;
20355 sc->load_store_pendulum = 0;
20356 sc->divide_cnt = 0;
20357 sc->vec_pairing = 0;
20358 }
20359 else
20360 {
20361 sc->cached_can_issue_more = cached_can_issue_more;
20362 sc->last_scheduled_insn = last_scheduled_insn;
20363 sc->load_store_pendulum = load_store_pendulum;
20364 sc->divide_cnt = divide_cnt;
20365 sc->vec_pairing = vec_pairing;
20366 }
20367 }
20368
20369 /* Sets the global scheduling context to the one pointed to by _SC. */
20370 static void
20371 rs6000_set_sched_context (void *_sc)
20372 {
20373 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
20374
20375 gcc_assert (sc != NULL);
20376
20377 cached_can_issue_more = sc->cached_can_issue_more;
20378 last_scheduled_insn = sc->last_scheduled_insn;
20379 load_store_pendulum = sc->load_store_pendulum;
20380 divide_cnt = sc->divide_cnt;
20381 vec_pairing = sc->vec_pairing;
20382 }
20383
20384 /* Free _SC. */
20385 static void
20386 rs6000_free_sched_context (void *_sc)
20387 {
20388 gcc_assert (_sc != NULL);
20389
20390 free (_sc);
20391 }
20392
20393 static bool
20394 rs6000_sched_can_speculate_insn (rtx_insn *insn)
20395 {
20396 switch (get_attr_type (insn))
20397 {
20398 case TYPE_DIV:
20399 case TYPE_SDIV:
20400 case TYPE_DDIV:
20401 case TYPE_VECDIV:
20402 case TYPE_SSQRT:
20403 case TYPE_DSQRT:
20404 return false;
20405
20406 default:
20407 return true;
20408 }
20409 }
20410 \f
20411 /* Length in units of the trampoline for entering a nested function. */
20412
20413 int
20414 rs6000_trampoline_size (void)
20415 {
20416 int ret = 0;
20417
20418 switch (DEFAULT_ABI)
20419 {
20420 default:
20421 gcc_unreachable ();
20422
20423 case ABI_AIX:
20424 ret = (TARGET_32BIT) ? 12 : 24;
20425 break;
20426
20427 case ABI_ELFv2:
20428 gcc_assert (!TARGET_32BIT);
20429 ret = 32;
20430 break;
20431
20432 case ABI_DARWIN:
20433 case ABI_V4:
20434 ret = (TARGET_32BIT) ? 40 : 48;
20435 break;
20436 }
20437
20438 return ret;
20439 }
20440
20441 /* Emit RTL insns to initialize the variable parts of a trampoline.
20442 FNADDR is an RTX for the address of the function's pure code.
20443 CXT is an RTX for the static chain value for the function. */
20444
20445 static void
20446 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
20447 {
20448 int regsize = (TARGET_32BIT) ? 4 : 8;
20449 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
20450 rtx ctx_reg = force_reg (Pmode, cxt);
20451 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
20452
20453 switch (DEFAULT_ABI)
20454 {
20455 default:
20456 gcc_unreachable ();
20457
20458 /* Under AIX, just build the 3 word function descriptor */
20459 case ABI_AIX:
20460 {
20461 rtx fnmem, fn_reg, toc_reg;
20462
20463 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
20464 error ("you cannot take the address of a nested function if you use "
20465 "the %qs option", "-mno-pointers-to-nested-functions");
20466
20467 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
20468 fn_reg = gen_reg_rtx (Pmode);
20469 toc_reg = gen_reg_rtx (Pmode);
20470
20471 /* Macro to shorten the code expansions below. */
20472 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
20473
20474 m_tramp = replace_equiv_address (m_tramp, addr);
20475
20476 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
20477 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
20478 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
20479 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
20480 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
20481
20482 # undef MEM_PLUS
20483 }
20484 break;
20485
20486 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
20487 case ABI_ELFv2:
20488 case ABI_DARWIN:
20489 case ABI_V4:
20490 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
20491 LCT_NORMAL, VOIDmode,
20492 addr, Pmode,
20493 GEN_INT (rs6000_trampoline_size ()), SImode,
20494 fnaddr, Pmode,
20495 ctx_reg, Pmode);
20496 break;
20497 }
20498 }
20499
20500 \f
20501 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
20502 identifier as an argument, so the front end shouldn't look it up. */
20503
20504 static bool
20505 rs6000_attribute_takes_identifier_p (const_tree attr_id)
20506 {
20507 return is_attribute_p ("altivec", attr_id);
20508 }
20509
20510 /* Handle the "altivec" attribute. The attribute may have
20511 arguments as follows:
20512
20513 __attribute__((altivec(vector__)))
20514 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
20515 __attribute__((altivec(bool__))) (always followed by 'unsigned')
20516
20517 and may appear more than once (e.g., 'vector bool char') in a
20518 given declaration. */
20519
20520 static tree
20521 rs6000_handle_altivec_attribute (tree *node,
20522 tree name ATTRIBUTE_UNUSED,
20523 tree args,
20524 int flags ATTRIBUTE_UNUSED,
20525 bool *no_add_attrs)
20526 {
20527 tree type = *node, result = NULL_TREE;
20528 machine_mode mode;
20529 int unsigned_p;
20530 char altivec_type
20531 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
20532 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
20533 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
20534 : '?');
20535
20536 while (POINTER_TYPE_P (type)
20537 || TREE_CODE (type) == FUNCTION_TYPE
20538 || TREE_CODE (type) == METHOD_TYPE
20539 || TREE_CODE (type) == ARRAY_TYPE)
20540 type = TREE_TYPE (type);
20541
20542 mode = TYPE_MODE (type);
20543
20544 /* Check for invalid AltiVec type qualifiers. */
20545 if (type == long_double_type_node)
20546 error ("use of %<long double%> in AltiVec types is invalid");
20547 else if (type == boolean_type_node)
20548 error ("use of boolean types in AltiVec types is invalid");
20549 else if (TREE_CODE (type) == COMPLEX_TYPE)
20550 error ("use of %<complex%> in AltiVec types is invalid");
20551 else if (DECIMAL_FLOAT_MODE_P (mode))
20552 error ("use of decimal floating-point types in AltiVec types is invalid");
20553 else if (!TARGET_VSX)
20554 {
20555 if (type == long_unsigned_type_node || type == long_integer_type_node)
20556 {
20557 if (TARGET_64BIT)
20558 error ("use of %<long%> in AltiVec types is invalid for "
20559 "64-bit code without %qs", "-mvsx");
20560 else if (rs6000_warn_altivec_long)
20561 warning (0, "use of %<long%> in AltiVec types is deprecated; "
20562 "use %<int%>");
20563 }
20564 else if (type == long_long_unsigned_type_node
20565 || type == long_long_integer_type_node)
20566 error ("use of %<long long%> in AltiVec types is invalid without %qs",
20567 "-mvsx");
20568 else if (type == double_type_node)
20569 error ("use of %<double%> in AltiVec types is invalid without %qs",
20570 "-mvsx");
20571 }
20572
20573 switch (altivec_type)
20574 {
20575 case 'v':
20576 unsigned_p = TYPE_UNSIGNED (type);
20577 switch (mode)
20578 {
20579 case E_TImode:
20580 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
20581 break;
20582 case E_DImode:
20583 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
20584 break;
20585 case E_SImode:
20586 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
20587 break;
20588 case E_HImode:
20589 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
20590 break;
20591 case E_QImode:
20592 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
20593 break;
20594 case E_SFmode: result = V4SF_type_node; break;
20595 case E_DFmode: result = V2DF_type_node; break;
20596 /* If the user says 'vector int bool', we may be handed the 'bool'
20597 attribute _before_ the 'vector' attribute, and so select the
20598 proper type in the 'b' case below. */
20599 case E_V4SImode: case E_V8HImode: case E_V16QImode: case E_V4SFmode:
20600 case E_V2DImode: case E_V2DFmode:
20601 result = type;
20602 default: break;
20603 }
20604 break;
20605 case 'b':
20606 switch (mode)
20607 {
20608 case E_TImode: case E_V1TImode: result = bool_V1TI_type_node; break;
20609 case E_DImode: case E_V2DImode: result = bool_V2DI_type_node; break;
20610 case E_SImode: case E_V4SImode: result = bool_V4SI_type_node; break;
20611 case E_HImode: case E_V8HImode: result = bool_V8HI_type_node; break;
20612 case E_QImode: case E_V16QImode: result = bool_V16QI_type_node;
20613 default: break;
20614 }
20615 break;
20616 case 'p':
20617 switch (mode)
20618 {
20619 case E_V8HImode: result = pixel_V8HI_type_node;
20620 default: break;
20621 }
20622 default: break;
20623 }
20624
20625 /* Propagate qualifiers attached to the element type
20626 onto the vector type. */
20627 if (result && result != type && TYPE_QUALS (type))
20628 result = build_qualified_type (result, TYPE_QUALS (type));
20629
20630 *no_add_attrs = true; /* No need to hang on to the attribute. */
20631
20632 if (result)
20633 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
20634
20635 return NULL_TREE;
20636 }
20637
20638 /* AltiVec defines five built-in scalar types that serve as vector
20639 elements; we must teach the compiler how to mangle them. The 128-bit
20640 floating point mangling is target-specific as well. MMA defines
20641 two built-in types to be used as opaque vector types. */
20642
20643 static const char *
20644 rs6000_mangle_type (const_tree type)
20645 {
20646 type = TYPE_MAIN_VARIANT (type);
20647
20648 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
20649 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE
20650 && TREE_CODE (type) != OPAQUE_TYPE)
20651 return NULL;
20652
20653 if (type == bool_char_type_node) return "U6__boolc";
20654 if (type == bool_short_type_node) return "U6__bools";
20655 if (type == pixel_type_node) return "u7__pixel";
20656 if (type == bool_int_type_node) return "U6__booli";
20657 if (type == bool_long_long_type_node) return "U6__boolx";
20658
20659 if (type == float128_type_node || type == float64x_type_node)
20660 return NULL;
20661
20662 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IBM_P (TYPE_MODE (type)))
20663 return "g";
20664 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IEEE_P (TYPE_MODE (type)))
20665 return "u9__ieee128";
20666
20667 if (type == vector_pair_type_node)
20668 return "u13__vector_pair";
20669 if (type == vector_quad_type_node)
20670 return "u13__vector_quad";
20671
20672 /* For all other types, use the default mangling. */
20673 return NULL;
20674 }
20675
20676 /* Handle a "longcall" or "shortcall" attribute; arguments as in
20677 struct attribute_spec.handler. */
20678
20679 static tree
20680 rs6000_handle_longcall_attribute (tree *node, tree name,
20681 tree args ATTRIBUTE_UNUSED,
20682 int flags ATTRIBUTE_UNUSED,
20683 bool *no_add_attrs)
20684 {
20685 if (TREE_CODE (*node) != FUNCTION_TYPE
20686 && TREE_CODE (*node) != FIELD_DECL
20687 && TREE_CODE (*node) != TYPE_DECL)
20688 {
20689 warning (OPT_Wattributes, "%qE attribute only applies to functions",
20690 name);
20691 *no_add_attrs = true;
20692 }
20693
20694 return NULL_TREE;
20695 }
20696
20697 /* Set longcall attributes on all functions declared when
20698 rs6000_default_long_calls is true. */
20699 static void
20700 rs6000_set_default_type_attributes (tree type)
20701 {
20702 if (rs6000_default_long_calls
20703 && FUNC_OR_METHOD_TYPE_P (type))
20704 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
20705 NULL_TREE,
20706 TYPE_ATTRIBUTES (type));
20707
20708 #if TARGET_MACHO
20709 darwin_set_default_type_attributes (type);
20710 #endif
20711 }
20712
20713 /* Return a reference suitable for calling a function with the
20714 longcall attribute. */
20715
20716 static rtx
20717 rs6000_longcall_ref (rtx call_ref, rtx arg)
20718 {
20719 /* System V adds '.' to the internal name, so skip them. */
20720 const char *call_name = XSTR (call_ref, 0);
20721 if (*call_name == '.')
20722 {
20723 while (*call_name == '.')
20724 call_name++;
20725
20726 tree node = get_identifier (call_name);
20727 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
20728 }
20729
20730 if (TARGET_PLTSEQ)
20731 {
20732 rtx base = const0_rtx;
20733 int regno = 12;
20734 if (rs6000_pcrel_p ())
20735 {
20736 rtx reg = gen_rtx_REG (Pmode, regno);
20737 rtx u = gen_rtx_UNSPEC_VOLATILE (Pmode,
20738 gen_rtvec (3, base, call_ref, arg),
20739 UNSPECV_PLT_PCREL);
20740 emit_insn (gen_rtx_SET (reg, u));
20741 return reg;
20742 }
20743
20744 if (DEFAULT_ABI == ABI_ELFv2)
20745 base = gen_rtx_REG (Pmode, TOC_REGISTER);
20746 else
20747 {
20748 if (flag_pic)
20749 base = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
20750 regno = 11;
20751 }
20752 /* Reg must match that used by linker PLT stubs. For ELFv2, r12
20753 may be used by a function global entry point. For SysV4, r11
20754 is used by __glink_PLTresolve lazy resolver entry. */
20755 rtx reg = gen_rtx_REG (Pmode, regno);
20756 rtx hi = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, base, call_ref, arg),
20757 UNSPEC_PLT16_HA);
20758 rtx lo = gen_rtx_UNSPEC_VOLATILE (Pmode,
20759 gen_rtvec (3, reg, call_ref, arg),
20760 UNSPECV_PLT16_LO);
20761 emit_insn (gen_rtx_SET (reg, hi));
20762 emit_insn (gen_rtx_SET (reg, lo));
20763 return reg;
20764 }
20765
20766 return force_reg (Pmode, call_ref);
20767 }
20768 \f
20769 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
20770 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
20771 #endif
20772
20773 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
20774 struct attribute_spec.handler. */
20775 static tree
20776 rs6000_handle_struct_attribute (tree *node, tree name,
20777 tree args ATTRIBUTE_UNUSED,
20778 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
20779 {
20780 tree *type = NULL;
20781 if (DECL_P (*node))
20782 {
20783 if (TREE_CODE (*node) == TYPE_DECL)
20784 type = &TREE_TYPE (*node);
20785 }
20786 else
20787 type = node;
20788
20789 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
20790 || TREE_CODE (*type) == UNION_TYPE)))
20791 {
20792 warning (OPT_Wattributes, "%qE attribute ignored", name);
20793 *no_add_attrs = true;
20794 }
20795
20796 else if ((is_attribute_p ("ms_struct", name)
20797 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
20798 || ((is_attribute_p ("gcc_struct", name)
20799 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
20800 {
20801 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
20802 name);
20803 *no_add_attrs = true;
20804 }
20805
20806 return NULL_TREE;
20807 }
20808
20809 static bool
20810 rs6000_ms_bitfield_layout_p (const_tree record_type)
20811 {
20812 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
20813 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
20814 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
20815 }
20816 \f
20817 #ifdef USING_ELFOS_H
20818
20819 /* A get_unnamed_section callback, used for switching to toc_section. */
20820
20821 static void
20822 rs6000_elf_output_toc_section_asm_op (const char *data ATTRIBUTE_UNUSED)
20823 {
20824 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
20825 && TARGET_MINIMAL_TOC)
20826 {
20827 if (!toc_initialized)
20828 {
20829 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
20830 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20831 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
20832 fprintf (asm_out_file, "\t.tc ");
20833 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
20834 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
20835 fprintf (asm_out_file, "\n");
20836
20837 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
20838 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20839 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
20840 fprintf (asm_out_file, " = .+32768\n");
20841 toc_initialized = 1;
20842 }
20843 else
20844 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
20845 }
20846 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
20847 {
20848 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
20849 if (!toc_initialized)
20850 {
20851 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20852 toc_initialized = 1;
20853 }
20854 }
20855 else
20856 {
20857 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
20858 if (!toc_initialized)
20859 {
20860 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20861 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
20862 fprintf (asm_out_file, " = .+32768\n");
20863 toc_initialized = 1;
20864 }
20865 }
20866 }
20867
20868 /* Implement TARGET_ASM_INIT_SECTIONS. */
20869
20870 static void
20871 rs6000_elf_asm_init_sections (void)
20872 {
20873 toc_section
20874 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
20875
20876 sdata2_section
20877 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
20878 SDATA2_SECTION_ASM_OP);
20879 }
20880
20881 /* Implement TARGET_SELECT_RTX_SECTION. */
20882
20883 static section *
20884 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
20885 unsigned HOST_WIDE_INT align)
20886 {
20887 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
20888 return toc_section;
20889 else
20890 return default_elf_select_rtx_section (mode, x, align);
20891 }
20892 \f
20893 /* For a SYMBOL_REF, set generic flags and then perform some
20894 target-specific processing.
20895
20896 When the AIX ABI is requested on a non-AIX system, replace the
20897 function name with the real name (with a leading .) rather than the
20898 function descriptor name. This saves a lot of overriding code to
20899 read the prefixes. */
20900
20901 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
20902 static void
20903 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
20904 {
20905 default_encode_section_info (decl, rtl, first);
20906
20907 if (first
20908 && TREE_CODE (decl) == FUNCTION_DECL
20909 && !TARGET_AIX
20910 && DEFAULT_ABI == ABI_AIX)
20911 {
20912 rtx sym_ref = XEXP (rtl, 0);
20913 size_t len = strlen (XSTR (sym_ref, 0));
20914 char *str = XALLOCAVEC (char, len + 2);
20915 str[0] = '.';
20916 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
20917 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
20918 }
20919 }
20920
20921 static inline bool
20922 compare_section_name (const char *section, const char *templ)
20923 {
20924 int len;
20925
20926 len = strlen (templ);
20927 return (strncmp (section, templ, len) == 0
20928 && (section[len] == 0 || section[len] == '.'));
20929 }
20930
20931 bool
20932 rs6000_elf_in_small_data_p (const_tree decl)
20933 {
20934 if (rs6000_sdata == SDATA_NONE)
20935 return false;
20936
20937 /* We want to merge strings, so we never consider them small data. */
20938 if (TREE_CODE (decl) == STRING_CST)
20939 return false;
20940
20941 /* Functions are never in the small data area. */
20942 if (TREE_CODE (decl) == FUNCTION_DECL)
20943 return false;
20944
20945 if (VAR_P (decl) && DECL_SECTION_NAME (decl))
20946 {
20947 const char *section = DECL_SECTION_NAME (decl);
20948 if (compare_section_name (section, ".sdata")
20949 || compare_section_name (section, ".sdata2")
20950 || compare_section_name (section, ".gnu.linkonce.s")
20951 || compare_section_name (section, ".sbss")
20952 || compare_section_name (section, ".sbss2")
20953 || compare_section_name (section, ".gnu.linkonce.sb")
20954 || strcmp (section, ".PPC.EMB.sdata0") == 0
20955 || strcmp (section, ".PPC.EMB.sbss0") == 0)
20956 return true;
20957 }
20958 else
20959 {
20960 /* If we are told not to put readonly data in sdata, then don't. */
20961 if (TREE_READONLY (decl) && rs6000_sdata != SDATA_EABI
20962 && !rs6000_readonly_in_sdata)
20963 return false;
20964
20965 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
20966
20967 if (size > 0
20968 && size <= g_switch_value
20969 /* If it's not public, and we're not going to reference it there,
20970 there's no need to put it in the small data section. */
20971 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
20972 return true;
20973 }
20974
20975 return false;
20976 }
20977
20978 #endif /* USING_ELFOS_H */
20979 \f
20980 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
20981
20982 static bool
20983 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
20984 {
20985 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
20986 }
20987
20988 /* Do not place thread-local symbols refs in the object blocks. */
20989
20990 static bool
20991 rs6000_use_blocks_for_decl_p (const_tree decl)
20992 {
20993 return !DECL_THREAD_LOCAL_P (decl);
20994 }
20995 \f
20996 /* Return a REG that occurs in ADDR with coefficient 1.
20997 ADDR can be effectively incremented by incrementing REG.
20998
20999 r0 is special and we must not select it as an address
21000 register by this routine since our caller will try to
21001 increment the returned register via an "la" instruction. */
21002
21003 rtx
21004 find_addr_reg (rtx addr)
21005 {
21006 while (GET_CODE (addr) == PLUS)
21007 {
21008 if (REG_P (XEXP (addr, 0))
21009 && REGNO (XEXP (addr, 0)) != 0)
21010 addr = XEXP (addr, 0);
21011 else if (REG_P (XEXP (addr, 1))
21012 && REGNO (XEXP (addr, 1)) != 0)
21013 addr = XEXP (addr, 1);
21014 else if (CONSTANT_P (XEXP (addr, 0)))
21015 addr = XEXP (addr, 1);
21016 else if (CONSTANT_P (XEXP (addr, 1)))
21017 addr = XEXP (addr, 0);
21018 else
21019 gcc_unreachable ();
21020 }
21021 gcc_assert (REG_P (addr) && REGNO (addr) != 0);
21022 return addr;
21023 }
21024
21025 void
21026 rs6000_fatal_bad_address (rtx op)
21027 {
21028 fatal_insn ("bad address", op);
21029 }
21030
21031 #if TARGET_MACHO
21032
21033 vec<branch_island, va_gc> *branch_islands;
21034
21035 /* Remember to generate a branch island for far calls to the given
21036 function. */
21037
21038 static void
21039 add_compiler_branch_island (tree label_name, tree function_name,
21040 int line_number)
21041 {
21042 branch_island bi = {function_name, label_name, line_number};
21043 vec_safe_push (branch_islands, bi);
21044 }
21045
21046 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
21047 already there or not. */
21048
21049 static int
21050 no_previous_def (tree function_name)
21051 {
21052 branch_island *bi;
21053 unsigned ix;
21054
21055 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
21056 if (function_name == bi->function_name)
21057 return 0;
21058 return 1;
21059 }
21060
21061 /* GET_PREV_LABEL gets the label name from the previous definition of
21062 the function. */
21063
21064 static tree
21065 get_prev_label (tree function_name)
21066 {
21067 branch_island *bi;
21068 unsigned ix;
21069
21070 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
21071 if (function_name == bi->function_name)
21072 return bi->label_name;
21073 return NULL_TREE;
21074 }
21075
21076 /* Generate external symbol indirection stubs (PIC and non-PIC). */
21077
21078 void
21079 machopic_output_stub (FILE *file, const char *symb, const char *stub)
21080 {
21081 unsigned int length;
21082 char *symbol_name, *lazy_ptr_name;
21083 char *local_label_0;
21084 static unsigned label = 0;
21085
21086 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
21087 symb = (*targetm.strip_name_encoding) (symb);
21088
21089 length = strlen (symb);
21090 symbol_name = XALLOCAVEC (char, length + 32);
21091 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
21092
21093 lazy_ptr_name = XALLOCAVEC (char, length + 32);
21094 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
21095
21096 if (MACHOPIC_PURE)
21097 {
21098 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
21099 fprintf (file, "\t.align 5\n");
21100
21101 fprintf (file, "%s:\n", stub);
21102 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
21103
21104 label++;
21105 local_label_0 = XALLOCAVEC (char, 16);
21106 sprintf (local_label_0, "L%u$spb", label);
21107
21108 fprintf (file, "\tmflr r0\n");
21109 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
21110 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
21111 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
21112 lazy_ptr_name, local_label_0);
21113 fprintf (file, "\tmtlr r0\n");
21114 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
21115 (TARGET_64BIT ? "ldu" : "lwzu"),
21116 lazy_ptr_name, local_label_0);
21117 fprintf (file, "\tmtctr r12\n");
21118 fprintf (file, "\tbctr\n");
21119 }
21120 else /* mdynamic-no-pic or mkernel. */
21121 {
21122 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
21123 fprintf (file, "\t.align 4\n");
21124
21125 fprintf (file, "%s:\n", stub);
21126 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
21127
21128 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
21129 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
21130 (TARGET_64BIT ? "ldu" : "lwzu"),
21131 lazy_ptr_name);
21132 fprintf (file, "\tmtctr r12\n");
21133 fprintf (file, "\tbctr\n");
21134 }
21135
21136 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
21137 fprintf (file, "%s:\n", lazy_ptr_name);
21138 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
21139 fprintf (file, "%sdyld_stub_binding_helper\n",
21140 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
21141 }
21142
21143 /* Legitimize PIC addresses. If the address is already
21144 position-independent, we return ORIG. Newly generated
21145 position-independent addresses go into a reg. This is REG if non
21146 zero, otherwise we allocate register(s) as necessary. */
21147
21148 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
21149
21150 rtx
21151 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
21152 rtx reg)
21153 {
21154 rtx base, offset;
21155
21156 if (reg == NULL && !reload_completed)
21157 reg = gen_reg_rtx (Pmode);
21158
21159 if (GET_CODE (orig) == CONST)
21160 {
21161 rtx reg_temp;
21162
21163 if (GET_CODE (XEXP (orig, 0)) == PLUS
21164 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
21165 return orig;
21166
21167 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
21168
21169 /* Use a different reg for the intermediate value, as
21170 it will be marked UNCHANGING. */
21171 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
21172 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
21173 Pmode, reg_temp);
21174 offset =
21175 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
21176 Pmode, reg);
21177
21178 if (CONST_INT_P (offset))
21179 {
21180 if (SMALL_INT (offset))
21181 return plus_constant (Pmode, base, INTVAL (offset));
21182 else if (!reload_completed)
21183 offset = force_reg (Pmode, offset);
21184 else
21185 {
21186 rtx mem = force_const_mem (Pmode, orig);
21187 return machopic_legitimize_pic_address (mem, Pmode, reg);
21188 }
21189 }
21190 return gen_rtx_PLUS (Pmode, base, offset);
21191 }
21192
21193 /* Fall back on generic machopic code. */
21194 return machopic_legitimize_pic_address (orig, mode, reg);
21195 }
21196
21197 /* Output a .machine directive for the Darwin assembler, and call
21198 the generic start_file routine. */
21199
21200 static void
21201 rs6000_darwin_file_start (void)
21202 {
21203 static const struct
21204 {
21205 const char *arg;
21206 const char *name;
21207 HOST_WIDE_INT if_set;
21208 } mapping[] = {
21209 { "ppc64", "ppc64", MASK_64BIT },
21210 { "970", "ppc970", OPTION_MASK_PPC_GPOPT | OPTION_MASK_MFCRF \
21211 | MASK_POWERPC64 },
21212 { "power4", "ppc970", 0 },
21213 { "G5", "ppc970", 0 },
21214 { "7450", "ppc7450", 0 },
21215 { "7400", "ppc7400", OPTION_MASK_ALTIVEC },
21216 { "G4", "ppc7400", 0 },
21217 { "750", "ppc750", 0 },
21218 { "740", "ppc750", 0 },
21219 { "G3", "ppc750", 0 },
21220 { "604e", "ppc604e", 0 },
21221 { "604", "ppc604", 0 },
21222 { "603e", "ppc603", 0 },
21223 { "603", "ppc603", 0 },
21224 { "601", "ppc601", 0 },
21225 { NULL, "ppc", 0 } };
21226 const char *cpu_id = "";
21227 size_t i;
21228
21229 rs6000_file_start ();
21230 darwin_file_start ();
21231
21232 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
21233
21234 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
21235 cpu_id = rs6000_default_cpu;
21236
21237 if (OPTION_SET_P (rs6000_cpu_index))
21238 cpu_id = processor_target_table[rs6000_cpu_index].name;
21239
21240 /* Look through the mapping array. Pick the first name that either
21241 matches the argument, has a bit set in IF_SET that is also set
21242 in the target flags, or has a NULL name. */
21243
21244 i = 0;
21245 while (mapping[i].arg != NULL
21246 && strcmp (mapping[i].arg, cpu_id) != 0
21247 && (mapping[i].if_set & rs6000_isa_flags) == 0)
21248 i++;
21249
21250 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
21251 }
21252
21253 #endif /* TARGET_MACHO */
21254
21255 #if TARGET_ELF
21256 static int
21257 rs6000_elf_reloc_rw_mask (void)
21258 {
21259 if (flag_pic)
21260 return 3;
21261 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
21262 return 2;
21263 else
21264 return 0;
21265 }
21266
21267 /* Record an element in the table of global constructors. SYMBOL is
21268 a SYMBOL_REF of the function to be called; PRIORITY is a number
21269 between 0 and MAX_INIT_PRIORITY.
21270
21271 This differs from default_named_section_asm_out_constructor in
21272 that we have special handling for -mrelocatable. */
21273
21274 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
21275 static void
21276 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
21277 {
21278 const char *section = ".ctors";
21279 char buf[18];
21280
21281 if (priority != DEFAULT_INIT_PRIORITY)
21282 {
21283 sprintf (buf, ".ctors.%.5u",
21284 /* Invert the numbering so the linker puts us in the proper
21285 order; constructors are run from right to left, and the
21286 linker sorts in increasing order. */
21287 MAX_INIT_PRIORITY - priority);
21288 section = buf;
21289 }
21290
21291 switch_to_section (get_section (section, SECTION_WRITE, NULL));
21292 assemble_align (POINTER_SIZE);
21293
21294 if (DEFAULT_ABI == ABI_V4
21295 && (TARGET_RELOCATABLE || flag_pic > 1))
21296 {
21297 fputs ("\t.long (", asm_out_file);
21298 output_addr_const (asm_out_file, symbol);
21299 fputs (")@fixup\n", asm_out_file);
21300 }
21301 else
21302 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
21303 }
21304
21305 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
21306 static void
21307 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
21308 {
21309 const char *section = ".dtors";
21310 char buf[18];
21311
21312 if (priority != DEFAULT_INIT_PRIORITY)
21313 {
21314 sprintf (buf, ".dtors.%.5u",
21315 /* Invert the numbering so the linker puts us in the proper
21316 order; constructors are run from right to left, and the
21317 linker sorts in increasing order. */
21318 MAX_INIT_PRIORITY - priority);
21319 section = buf;
21320 }
21321
21322 switch_to_section (get_section (section, SECTION_WRITE, NULL));
21323 assemble_align (POINTER_SIZE);
21324
21325 if (DEFAULT_ABI == ABI_V4
21326 && (TARGET_RELOCATABLE || flag_pic > 1))
21327 {
21328 fputs ("\t.long (", asm_out_file);
21329 output_addr_const (asm_out_file, symbol);
21330 fputs (")@fixup\n", asm_out_file);
21331 }
21332 else
21333 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
21334 }
21335
21336 void
21337 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
21338 {
21339 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
21340 {
21341 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
21342 ASM_OUTPUT_LABEL (file, name);
21343 fputs (DOUBLE_INT_ASM_OP, file);
21344 rs6000_output_function_entry (file, name);
21345 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
21346 if (DOT_SYMBOLS)
21347 {
21348 fputs ("\t.size\t", file);
21349 assemble_name (file, name);
21350 fputs (",24\n\t.type\t.", file);
21351 assemble_name (file, name);
21352 fputs (",@function\n", file);
21353 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
21354 {
21355 fputs ("\t.globl\t.", file);
21356 assemble_name (file, name);
21357 putc ('\n', file);
21358 }
21359 }
21360 else
21361 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
21362 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
21363 rs6000_output_function_entry (file, name);
21364 fputs (":\n", file);
21365 assemble_function_label_final ();
21366 return;
21367 }
21368
21369 int uses_toc;
21370 if (DEFAULT_ABI == ABI_V4
21371 && (TARGET_RELOCATABLE || flag_pic > 1)
21372 && !TARGET_SECURE_PLT
21373 && (!constant_pool_empty_p () || crtl->profile)
21374 && (uses_toc = uses_TOC ()))
21375 {
21376 char buf[256];
21377
21378 if (uses_toc == 2)
21379 switch_to_other_text_partition ();
21380 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
21381
21382 fprintf (file, "\t.long ");
21383 assemble_name (file, toc_label_name);
21384 need_toc_init = 1;
21385 putc ('-', file);
21386 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
21387 assemble_name (file, buf);
21388 putc ('\n', file);
21389 if (uses_toc == 2)
21390 switch_to_other_text_partition ();
21391 }
21392
21393 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
21394 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
21395
21396 if (TARGET_CMODEL == CMODEL_LARGE
21397 && rs6000_global_entry_point_prologue_needed_p ())
21398 {
21399 char buf[256];
21400
21401 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
21402
21403 fprintf (file, "\t.quad .TOC.-");
21404 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
21405 assemble_name (file, buf);
21406 putc ('\n', file);
21407 }
21408
21409 if (DEFAULT_ABI == ABI_AIX)
21410 {
21411 const char *desc_name, *orig_name;
21412
21413 orig_name = (*targetm.strip_name_encoding) (name);
21414 desc_name = orig_name;
21415 while (*desc_name == '.')
21416 desc_name++;
21417
21418 if (TREE_PUBLIC (decl))
21419 fprintf (file, "\t.globl %s\n", desc_name);
21420
21421 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
21422 fprintf (file, "%s:\n", desc_name);
21423 fprintf (file, "\t.long %s\n", orig_name);
21424 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
21425 fputs ("\t.long 0\n", file);
21426 fprintf (file, "\t.previous\n");
21427 }
21428 ASM_OUTPUT_FUNCTION_LABEL (file, name, decl);
21429 }
21430
21431 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
21432 static void
21433 rs6000_elf_file_end (void)
21434 {
21435 #ifdef HAVE_AS_GNU_ATTRIBUTE
21436 /* ??? The value emitted depends on options active at file end.
21437 Assume anyone using #pragma or attributes that might change
21438 options knows what they are doing. */
21439 if ((TARGET_64BIT || DEFAULT_ABI == ABI_V4)
21440 && rs6000_passes_float)
21441 {
21442 int fp;
21443
21444 if (TARGET_HARD_FLOAT)
21445 fp = 1;
21446 else
21447 fp = 2;
21448 if (rs6000_passes_long_double)
21449 {
21450 if (!TARGET_LONG_DOUBLE_128)
21451 fp |= 2 * 4;
21452 else if (TARGET_IEEEQUAD)
21453 fp |= 3 * 4;
21454 else
21455 fp |= 1 * 4;
21456 }
21457 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", fp);
21458 }
21459 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
21460 {
21461 if (rs6000_passes_vector)
21462 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
21463 (TARGET_ALTIVEC_ABI ? 2 : 1));
21464 if (rs6000_returns_struct)
21465 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
21466 aix_struct_return ? 2 : 1);
21467 }
21468 #endif
21469 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
21470 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
21471 file_end_indicate_exec_stack ();
21472 #endif
21473
21474 if (flag_split_stack)
21475 file_end_indicate_split_stack ();
21476
21477 if (cpu_builtin_p)
21478 {
21479 /* We have expanded a CPU builtin, so we need to emit a reference to
21480 the special symbol that LIBC uses to declare it supports the
21481 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
21482 switch_to_section (data_section);
21483 fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3);
21484 fprintf (asm_out_file, "\t%s %s\n",
21485 TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol);
21486 }
21487 }
21488 #endif
21489
21490 #if TARGET_XCOFF
21491
21492 #ifndef HAVE_XCOFF_DWARF_EXTRAS
21493 #define HAVE_XCOFF_DWARF_EXTRAS 0
21494 #endif
21495
21496
21497 /* Names of bss and data sections. These should be unique names for each
21498 compilation unit. */
21499
21500 char *xcoff_bss_section_name;
21501 char *xcoff_private_data_section_name;
21502 char *xcoff_private_rodata_section_name;
21503 char *xcoff_tls_data_section_name;
21504 char *xcoff_read_only_section_name;
21505
21506 static enum unwind_info_type
21507 rs6000_xcoff_debug_unwind_info (void)
21508 {
21509 return UI_NONE;
21510 }
21511
21512 static void
21513 rs6000_xcoff_asm_output_anchor (rtx symbol)
21514 {
21515 char buffer[100];
21516
21517 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
21518 SYMBOL_REF_BLOCK_OFFSET (symbol));
21519 fprintf (asm_out_file, "%s", SET_ASM_OP);
21520 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
21521 fprintf (asm_out_file, ",");
21522 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
21523 fprintf (asm_out_file, "\n");
21524 }
21525
21526 static void
21527 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
21528 {
21529 fputs (GLOBAL_ASM_OP, stream);
21530 RS6000_OUTPUT_BASENAME (stream, name);
21531 putc ('\n', stream);
21532 }
21533
21534 /* A get_unnamed_decl callback, used for read-only sections. PTR
21535 points to the section string variable. */
21536
21537 static void
21538 rs6000_xcoff_output_readonly_section_asm_op (const char *directive)
21539 {
21540 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
21541 directive
21542 ? xcoff_private_rodata_section_name
21543 : xcoff_read_only_section_name,
21544 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
21545 }
21546
21547 /* Likewise for read-write sections. */
21548
21549 static void
21550 rs6000_xcoff_output_readwrite_section_asm_op (const char *)
21551 {
21552 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
21553 xcoff_private_data_section_name,
21554 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
21555 }
21556
21557 static void
21558 rs6000_xcoff_output_tls_section_asm_op (const char *directive)
21559 {
21560 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
21561 directive
21562 ? xcoff_private_data_section_name
21563 : xcoff_tls_data_section_name,
21564 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
21565 }
21566
21567 /* A get_unnamed_section callback, used for switching to toc_section. */
21568
21569 static void
21570 rs6000_xcoff_output_toc_section_asm_op (const char *data ATTRIBUTE_UNUSED)
21571 {
21572 if (TARGET_MINIMAL_TOC)
21573 {
21574 /* toc_section is always selected at least once from
21575 rs6000_xcoff_file_start, so this is guaranteed to
21576 always be defined once and only once in each file. */
21577 if (!toc_initialized)
21578 {
21579 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
21580 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
21581 toc_initialized = 1;
21582 }
21583 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
21584 (TARGET_32BIT ? "" : ",3"));
21585 }
21586 else
21587 fputs ("\t.toc\n", asm_out_file);
21588 }
21589
21590 /* Implement TARGET_ASM_INIT_SECTIONS. */
21591
21592 static void
21593 rs6000_xcoff_asm_init_sections (void)
21594 {
21595 read_only_data_section
21596 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
21597 NULL);
21598
21599 private_data_section
21600 = get_unnamed_section (SECTION_WRITE,
21601 rs6000_xcoff_output_readwrite_section_asm_op,
21602 NULL);
21603
21604 read_only_private_data_section
21605 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
21606 "");
21607
21608 tls_data_section
21609 = get_unnamed_section (SECTION_TLS,
21610 rs6000_xcoff_output_tls_section_asm_op,
21611 NULL);
21612
21613 tls_private_data_section
21614 = get_unnamed_section (SECTION_TLS,
21615 rs6000_xcoff_output_tls_section_asm_op,
21616 "");
21617
21618 toc_section
21619 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
21620
21621 readonly_data_section = read_only_data_section;
21622 }
21623
21624 static int
21625 rs6000_xcoff_reloc_rw_mask (void)
21626 {
21627 return 3;
21628 }
21629
21630 static void
21631 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
21632 tree decl ATTRIBUTE_UNUSED)
21633 {
21634 int smclass;
21635 static const char * const suffix[7]
21636 = { "PR", "RO", "RW", "BS", "TL", "UL", "XO" };
21637
21638 if (flags & SECTION_EXCLUDE)
21639 smclass = 6;
21640 else if (flags & SECTION_DEBUG)
21641 {
21642 fprintf (asm_out_file, "\t.dwsect %s\n", name);
21643 return;
21644 }
21645 else if (flags & SECTION_CODE)
21646 smclass = 0;
21647 else if (flags & SECTION_TLS)
21648 {
21649 if (flags & SECTION_BSS)
21650 smclass = 5;
21651 else
21652 smclass = 4;
21653 }
21654 else if (flags & SECTION_WRITE)
21655 {
21656 if (flags & SECTION_BSS)
21657 smclass = 3;
21658 else
21659 smclass = 2;
21660 }
21661 else
21662 smclass = 1;
21663
21664 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
21665 (flags & SECTION_CODE) ? "." : "",
21666 name, suffix[smclass], flags & SECTION_ENTSIZE);
21667 }
21668
21669 #define IN_NAMED_SECTION(DECL) \
21670 ((TREE_CODE (DECL) == FUNCTION_DECL || VAR_P (DECL)) \
21671 && DECL_SECTION_NAME (DECL) != NULL)
21672
21673 static section *
21674 rs6000_xcoff_select_section (tree decl, int reloc,
21675 unsigned HOST_WIDE_INT align)
21676 {
21677 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
21678 named section. */
21679 if (align > BIGGEST_ALIGNMENT && VAR_OR_FUNCTION_DECL_P (decl))
21680 {
21681 resolve_unique_section (decl, reloc, true);
21682 if (IN_NAMED_SECTION (decl))
21683 return get_named_section (decl, NULL, reloc);
21684 }
21685
21686 if (decl_readonly_section (decl, reloc))
21687 {
21688 if (TREE_PUBLIC (decl))
21689 return read_only_data_section;
21690 else
21691 return read_only_private_data_section;
21692 }
21693 else
21694 {
21695 #if HAVE_AS_TLS
21696 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
21697 {
21698 if (bss_initializer_p (decl))
21699 return tls_comm_section;
21700 else if (TREE_PUBLIC (decl))
21701 return tls_data_section;
21702 else
21703 return tls_private_data_section;
21704 }
21705 else
21706 #endif
21707 if (TREE_PUBLIC (decl))
21708 return data_section;
21709 else
21710 return private_data_section;
21711 }
21712 }
21713
21714 static void
21715 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
21716 {
21717 const char *name;
21718
21719 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
21720 name = (*targetm.strip_name_encoding) (name);
21721 set_decl_section_name (decl, name);
21722 }
21723
21724 /* Select section for constant in constant pool.
21725
21726 On RS/6000, all constants are in the private read-only data area.
21727 However, if this is being placed in the TOC it must be output as a
21728 toc entry. */
21729
21730 static section *
21731 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
21732 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
21733 {
21734 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
21735 return toc_section;
21736 else
21737 return read_only_private_data_section;
21738 }
21739
21740 /* Remove any trailing [DS] or the like from the symbol name. */
21741
21742 static const char *
21743 rs6000_xcoff_strip_name_encoding (const char *name)
21744 {
21745 size_t len;
21746 if (*name == '*')
21747 name++;
21748 len = strlen (name);
21749 if (name[len - 1] == ']')
21750 return ggc_alloc_string (name, len - 4);
21751 else
21752 return name;
21753 }
21754
21755 /* Section attributes. AIX is always PIC. */
21756
21757 static unsigned int
21758 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
21759 {
21760 unsigned int align;
21761 unsigned int flags = default_section_type_flags (decl, name, reloc);
21762
21763 if (decl && DECL_P (decl) && VAR_P (decl) && bss_initializer_p (decl))
21764 flags |= SECTION_BSS;
21765
21766 /* Align to at least UNIT size. */
21767 if (!decl || !DECL_P (decl))
21768 align = MIN_UNITS_PER_WORD;
21769 /* Align code CSECT to at least 32 bytes. */
21770 else if ((flags & SECTION_CODE) != 0)
21771 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT), 32);
21772 else
21773 /* Increase alignment of large objects if not already stricter. */
21774 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
21775 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
21776 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
21777
21778 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
21779 }
21780
21781 /* Output at beginning of assembler file.
21782
21783 Initialize the section names for the RS/6000 at this point.
21784
21785 Specify filename, including full path, to assembler.
21786
21787 We want to go into the TOC section so at least one .toc will be emitted.
21788 Also, in order to output proper .bs/.es pairs, we need at least one static
21789 [RW] section emitted.
21790
21791 Finally, declare mcount when profiling to make the assembler happy. */
21792
21793 static void
21794 rs6000_xcoff_file_start (void)
21795 {
21796 rs6000_gen_section_name (&xcoff_bss_section_name,
21797 main_input_filename, ".bss_");
21798 rs6000_gen_section_name (&xcoff_private_data_section_name,
21799 main_input_filename, ".rw_");
21800 rs6000_gen_section_name (&xcoff_private_rodata_section_name,
21801 main_input_filename, ".rop_");
21802 rs6000_gen_section_name (&xcoff_read_only_section_name,
21803 main_input_filename, ".ro_");
21804 rs6000_gen_section_name (&xcoff_tls_data_section_name,
21805 main_input_filename, ".tls_");
21806
21807 fputs ("\t.file\t", asm_out_file);
21808 output_quoted_string (asm_out_file, main_input_filename);
21809 fputc ('\n', asm_out_file);
21810 if (write_symbols != NO_DEBUG)
21811 switch_to_section (private_data_section);
21812 switch_to_section (toc_section);
21813 switch_to_section (text_section);
21814 if (profile_flag)
21815 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
21816 rs6000_file_start ();
21817 }
21818
21819 /* Output at end of assembler file.
21820 On the RS/6000, referencing data should automatically pull in text. */
21821
21822 static void
21823 rs6000_xcoff_file_end (void)
21824 {
21825 switch_to_section (text_section);
21826 if (xcoff_tls_exec_model_detected)
21827 {
21828 /* Add a .ref to __tls_get_addr to force libpthread dependency. */
21829 fputs ("\t.extern __tls_get_addr\n\t.ref __tls_get_addr\n", asm_out_file);
21830 }
21831 fputs ("_section_.text:\n", asm_out_file);
21832 switch_to_section (data_section);
21833 fputs (TARGET_32BIT
21834 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
21835 asm_out_file);
21836
21837 }
21838
21839 struct declare_alias_data
21840 {
21841 FILE *file;
21842 bool function_descriptor;
21843 };
21844
21845 /* Declare alias N. A helper function for for_node_and_aliases. */
21846
21847 static bool
21848 rs6000_declare_alias (struct symtab_node *n, void *d)
21849 {
21850 struct declare_alias_data *data = (struct declare_alias_data *)d;
21851 /* Main symbol is output specially, because varasm machinery does part of
21852 the job for us - we do not need to declare .globl/lglobs and such. */
21853 if (!n->alias || n->weakref)
21854 return false;
21855
21856 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
21857 return false;
21858
21859 /* Prevent assemble_alias from trying to use .set pseudo operation
21860 that does not behave as expected by the middle-end. */
21861 TREE_ASM_WRITTEN (n->decl) = true;
21862
21863 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
21864 char *buffer = (char *) alloca (strlen (name) + 2);
21865 char *p;
21866 int dollar_inside = 0;
21867
21868 strcpy (buffer, name);
21869 p = strchr (buffer, '$');
21870 while (p) {
21871 *p = '_';
21872 dollar_inside++;
21873 p = strchr (p + 1, '$');
21874 }
21875 if (TREE_PUBLIC (n->decl))
21876 {
21877 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
21878 {
21879 if (dollar_inside) {
21880 if (data->function_descriptor)
21881 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
21882 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
21883 }
21884 if (data->function_descriptor)
21885 {
21886 fputs ("\t.globl .", data->file);
21887 RS6000_OUTPUT_BASENAME (data->file, buffer);
21888 putc ('\n', data->file);
21889 }
21890 fputs ("\t.globl ", data->file);
21891 assemble_name (data->file, buffer);
21892 putc ('\n', data->file);
21893 }
21894 #ifdef ASM_WEAKEN_DECL
21895 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
21896 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
21897 #endif
21898 }
21899 else
21900 {
21901 if (dollar_inside)
21902 {
21903 if (data->function_descriptor)
21904 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
21905 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
21906 }
21907 if (data->function_descriptor)
21908 {
21909 fputs ("\t.lglobl .", data->file);
21910 RS6000_OUTPUT_BASENAME (data->file, buffer);
21911 putc ('\n', data->file);
21912 }
21913 fputs ("\t.lglobl ", data->file);
21914 assemble_name (data->file, buffer);
21915 putc ('\n', data->file);
21916 }
21917 if (data->function_descriptor)
21918 putc ('.', data->file);
21919 ASM_OUTPUT_LABEL (data->file, buffer);
21920 return false;
21921 }
21922
21923
21924 #ifdef HAVE_GAS_HIDDEN
21925 /* Helper function to calculate visibility of a DECL
21926 and return the value as a const string. */
21927
21928 static const char *
21929 rs6000_xcoff_visibility (tree decl)
21930 {
21931 static const char * const visibility_types[] = {
21932 "", ",protected", ",hidden", ",internal"
21933 };
21934
21935 enum symbol_visibility vis = DECL_VISIBILITY (decl);
21936 return visibility_types[vis];
21937 }
21938 #endif
21939
21940
21941 /* This macro produces the initial definition of a function name.
21942 On the RS/6000, we need to place an extra '.' in the function name and
21943 output the function descriptor.
21944 Dollar signs are converted to underscores.
21945
21946 The csect for the function will have already been created when
21947 text_section was selected. We do have to go back to that csect, however.
21948
21949 The third and fourth parameters to the .function pseudo-op (16 and 044)
21950 are placeholders which no longer have any use.
21951
21952 Because AIX assembler's .set command has unexpected semantics, we output
21953 all aliases as alternative labels in front of the definition. */
21954
21955 void
21956 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
21957 {
21958 char *buffer = (char *) alloca (strlen (name) + 1);
21959 char *p;
21960 int dollar_inside = 0;
21961 struct declare_alias_data data = {file, false};
21962
21963 strcpy (buffer, name);
21964 p = strchr (buffer, '$');
21965 while (p) {
21966 *p = '_';
21967 dollar_inside++;
21968 p = strchr (p + 1, '$');
21969 }
21970 if (TREE_PUBLIC (decl))
21971 {
21972 if (!RS6000_WEAK || !DECL_WEAK (decl))
21973 {
21974 if (dollar_inside) {
21975 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
21976 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
21977 }
21978 fputs ("\t.globl .", file);
21979 RS6000_OUTPUT_BASENAME (file, buffer);
21980 #ifdef HAVE_GAS_HIDDEN
21981 fputs (rs6000_xcoff_visibility (decl), file);
21982 #endif
21983 putc ('\n', file);
21984 }
21985 }
21986 else
21987 {
21988 if (dollar_inside) {
21989 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
21990 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
21991 }
21992 fputs ("\t.lglobl .", file);
21993 RS6000_OUTPUT_BASENAME (file, buffer);
21994 putc ('\n', file);
21995 }
21996
21997 fputs ("\t.csect ", file);
21998 assemble_name (file, buffer);
21999 fputs (TARGET_32BIT ? "\n" : ",3\n", file);
22000
22001 ASM_OUTPUT_FUNCTION_LABEL (file, buffer, decl);
22002
22003 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
22004 &data, true);
22005 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
22006 RS6000_OUTPUT_BASENAME (file, buffer);
22007 fputs (", TOC[tc0], 0\n", file);
22008
22009 in_section = NULL;
22010 switch_to_section (function_section (decl));
22011 putc ('.', file);
22012 ASM_OUTPUT_LABEL (file, buffer);
22013
22014 data.function_descriptor = true;
22015 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
22016 &data, true);
22017 if (!DECL_IGNORED_P (decl))
22018 {
22019 if (dwarf_debuginfo_p ())
22020 {
22021 name = (*targetm.strip_name_encoding) (name);
22022 fprintf (file, "\t.function .%s,.%s,2,0\n", name, name);
22023 }
22024 }
22025 return;
22026 }
22027
22028
22029 /* Output assembly language to globalize a symbol from a DECL,
22030 possibly with visibility. */
22031
22032 void
22033 rs6000_xcoff_asm_globalize_decl_name (FILE *stream, tree decl)
22034 {
22035 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
22036 fputs (GLOBAL_ASM_OP, stream);
22037 assemble_name (stream, name);
22038 #ifdef HAVE_GAS_HIDDEN
22039 fputs (rs6000_xcoff_visibility (decl), stream);
22040 #endif
22041 putc ('\n', stream);
22042 }
22043
22044 /* Output assembly language to define a symbol as COMMON from a DECL,
22045 possibly with visibility. */
22046
22047 void
22048 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream,
22049 tree decl ATTRIBUTE_UNUSED,
22050 const char *name,
22051 unsigned HOST_WIDE_INT size,
22052 unsigned int align)
22053 {
22054 unsigned int align2 = 2;
22055
22056 if (align == 0)
22057 align = DATA_ABI_ALIGNMENT (TREE_TYPE (decl), DECL_ALIGN (decl));
22058
22059 if (align > 32)
22060 align2 = floor_log2 (align / BITS_PER_UNIT);
22061 else if (size > 4)
22062 align2 = 3;
22063
22064 if (! DECL_COMMON (decl))
22065 {
22066 /* Forget section. */
22067 in_section = NULL;
22068
22069 /* Globalize TLS BSS. */
22070 if (TREE_PUBLIC (decl) && DECL_THREAD_LOCAL_P (decl))
22071 {
22072 fputs (GLOBAL_ASM_OP, stream);
22073 assemble_name (stream, name);
22074 fputc ('\n', stream);
22075 }
22076
22077 /* Switch to section and skip space. */
22078 fputs ("\t.csect ", stream);
22079 assemble_name (stream, name);
22080 fprintf (stream, ",%u\n", align2);
22081 ASM_DECLARE_OBJECT_NAME (stream, name, decl);
22082 ASM_OUTPUT_SKIP (stream, size ? size : 1);
22083 return;
22084 }
22085
22086 if (TREE_PUBLIC (decl))
22087 {
22088 fprintf (stream,
22089 "\t.comm %s," HOST_WIDE_INT_PRINT_UNSIGNED ",%u" ,
22090 name, size, align2);
22091
22092 #ifdef HAVE_GAS_HIDDEN
22093 if (decl != NULL)
22094 fputs (rs6000_xcoff_visibility (decl), stream);
22095 #endif
22096 putc ('\n', stream);
22097 }
22098 else
22099 fprintf (stream,
22100 "\t.lcomm %s," HOST_WIDE_INT_PRINT_UNSIGNED ",%s,%u\n",
22101 (*targetm.strip_name_encoding) (name), size, name, align2);
22102 }
22103
22104 /* This macro produces the initial definition of a object (variable) name.
22105 Because AIX assembler's .set command has unexpected semantics, we output
22106 all aliases as alternative labels in front of the definition. */
22107
22108 void
22109 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
22110 {
22111 struct declare_alias_data data = {file, false};
22112 ASM_OUTPUT_LABEL (file, name);
22113 symtab_node::get_create (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
22114 &data, true);
22115 }
22116
22117 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
22118
22119 void
22120 rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label)
22121 {
22122 fputs (integer_asm_op (size, FALSE), file);
22123 assemble_name (file, label);
22124 fputs ("-$", file);
22125 }
22126
22127 /* Output a symbol offset relative to the dbase for the current object.
22128 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
22129 signed offsets.
22130
22131 __gcc_unwind_dbase is embedded in all executables/libraries through
22132 libgcc/config/rs6000/crtdbase.S. */
22133
22134 void
22135 rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label)
22136 {
22137 fputs (integer_asm_op (size, FALSE), file);
22138 assemble_name (file, label);
22139 fputs("-__gcc_unwind_dbase", file);
22140 }
22141
22142 #ifdef HAVE_AS_TLS
22143 static void
22144 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
22145 {
22146 rtx symbol;
22147 int flags;
22148 const char *symname;
22149
22150 default_encode_section_info (decl, rtl, first);
22151
22152 /* Careful not to prod global register variables. */
22153 if (!MEM_P (rtl))
22154 return;
22155 symbol = XEXP (rtl, 0);
22156 if (!SYMBOL_REF_P (symbol))
22157 return;
22158
22159 flags = SYMBOL_REF_FLAGS (symbol);
22160
22161 if (VAR_P (decl) && DECL_THREAD_LOCAL_P (decl))
22162 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
22163
22164 SYMBOL_REF_FLAGS (symbol) = flags;
22165
22166 symname = XSTR (symbol, 0);
22167
22168 /* Append CSECT mapping class, unless the symbol already is qualified.
22169 Aliases are implemented as labels, so the symbol name should not add
22170 a mapping class. */
22171 if (decl
22172 && DECL_P (decl)
22173 && VAR_OR_FUNCTION_DECL_P (decl)
22174 && (symtab_node::get (decl) == NULL
22175 || symtab_node::get (decl)->alias == 0)
22176 && symname[strlen (symname) - 1] != ']')
22177 {
22178 const char *smclass = NULL;
22179
22180 if (TREE_CODE (decl) == FUNCTION_DECL)
22181 smclass = "[DS]";
22182 else if (DECL_THREAD_LOCAL_P (decl))
22183 {
22184 if (bss_initializer_p (decl))
22185 smclass = "[UL]";
22186 else if (flag_data_sections)
22187 smclass = "[TL]";
22188 }
22189 else if (DECL_EXTERNAL (decl))
22190 smclass = "[UA]";
22191 else if (bss_initializer_p (decl))
22192 smclass = "[BS]";
22193 else if (flag_data_sections)
22194 {
22195 /* This must exactly match the logic of select section. */
22196 if (decl_readonly_section (decl, compute_reloc_for_var (decl)))
22197 smclass = "[RO]";
22198 else
22199 smclass = "[RW]";
22200 }
22201
22202 if (smclass != NULL)
22203 {
22204 char *newname = XALLOCAVEC (char, strlen (symname) + 5);
22205
22206 strcpy (newname, symname);
22207 strcat (newname, smclass);
22208 XSTR (symbol, 0) = ggc_strdup (newname);
22209 }
22210 }
22211 }
22212 #endif /* HAVE_AS_TLS */
22213 #endif /* TARGET_XCOFF */
22214
22215 void
22216 rs6000_asm_weaken_decl (FILE *stream, tree decl,
22217 const char *name, const char *val)
22218 {
22219 fputs ("\t.weak\t", stream);
22220 assemble_name (stream, name);
22221 if (decl && TREE_CODE (decl) == FUNCTION_DECL
22222 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
22223 {
22224 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
22225 if (TARGET_XCOFF)
22226 fputs (rs6000_xcoff_visibility (decl), stream);
22227 #endif
22228 fputs ("\n\t.weak\t.", stream);
22229 RS6000_OUTPUT_BASENAME (stream, name);
22230 }
22231 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
22232 if (TARGET_XCOFF)
22233 fputs (rs6000_xcoff_visibility (decl), stream);
22234 #endif
22235 fputc ('\n', stream);
22236
22237 if (val)
22238 {
22239 #ifdef ASM_OUTPUT_DEF
22240 ASM_OUTPUT_DEF (stream, name, val);
22241 #endif
22242 if (decl && TREE_CODE (decl) == FUNCTION_DECL
22243 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
22244 {
22245 fputs ("\t.set\t.", stream);
22246 RS6000_OUTPUT_BASENAME (stream, name);
22247 fputs (",.", stream);
22248 RS6000_OUTPUT_BASENAME (stream, val);
22249 fputc ('\n', stream);
22250 }
22251 }
22252 }
22253
22254
22255 /* Return true if INSN should not be copied. */
22256
22257 static bool
22258 rs6000_cannot_copy_insn_p (rtx_insn *insn)
22259 {
22260 return recog_memoized (insn) >= 0
22261 && get_attr_cannot_copy (insn);
22262 }
22263
22264 /* Compute a (partial) cost for rtx X. Return true if the complete
22265 cost has been computed, and false if subexpressions should be
22266 scanned. In either case, *TOTAL contains the cost result. */
22267
22268 static bool
22269 rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
22270 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
22271 {
22272 int code = GET_CODE (x);
22273
22274 switch (code)
22275 {
22276 /* On the RS/6000, if it is valid in the insn, it is free. */
22277 case CONST_INT:
22278 if (((outer_code == SET
22279 || outer_code == PLUS
22280 || outer_code == MINUS)
22281 && (satisfies_constraint_I (x)
22282 || satisfies_constraint_L (x)))
22283 || (outer_code == AND
22284 && (satisfies_constraint_K (x)
22285 || (mode == SImode
22286 ? satisfies_constraint_L (x)
22287 : satisfies_constraint_J (x))))
22288 || ((outer_code == IOR || outer_code == XOR)
22289 && (satisfies_constraint_K (x)
22290 || (mode == SImode
22291 ? satisfies_constraint_L (x)
22292 : satisfies_constraint_J (x))))
22293 || outer_code == ASHIFT
22294 || outer_code == ASHIFTRT
22295 || outer_code == LSHIFTRT
22296 || outer_code == ROTATE
22297 || outer_code == ROTATERT
22298 || outer_code == ZERO_EXTRACT
22299 || (outer_code == MULT
22300 && satisfies_constraint_I (x))
22301 || ((outer_code == DIV || outer_code == UDIV
22302 || outer_code == MOD || outer_code == UMOD)
22303 && exact_log2 (INTVAL (x)) >= 0)
22304 || (outer_code == COMPARE
22305 && (satisfies_constraint_I (x)
22306 || satisfies_constraint_K (x)))
22307 || ((outer_code == EQ || outer_code == NE)
22308 && (satisfies_constraint_I (x)
22309 || satisfies_constraint_K (x)
22310 || (mode == SImode
22311 ? satisfies_constraint_L (x)
22312 : satisfies_constraint_J (x))))
22313 || (outer_code == GTU
22314 && satisfies_constraint_I (x))
22315 || (outer_code == LTU
22316 && satisfies_constraint_P (x)))
22317 {
22318 *total = 0;
22319 return true;
22320 }
22321 else if ((outer_code == PLUS
22322 && reg_or_add_cint_operand (x, mode))
22323 || (outer_code == MINUS
22324 && reg_or_sub_cint_operand (x, mode))
22325 || ((outer_code == SET
22326 || outer_code == IOR
22327 || outer_code == XOR)
22328 && (INTVAL (x)
22329 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
22330 {
22331 *total = COSTS_N_INSNS (1);
22332 return true;
22333 }
22334 /* FALLTHRU */
22335
22336 case CONST_DOUBLE:
22337 case CONST_WIDE_INT:
22338 case CONST:
22339 case HIGH:
22340 case SYMBOL_REF:
22341 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
22342 return true;
22343
22344 case MEM:
22345 /* When optimizing for size, MEM should be slightly more expensive
22346 than generating address, e.g., (plus (reg) (const)).
22347 L1 cache latency is about two instructions. */
22348 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
22349 if (rs6000_slow_unaligned_access (mode, MEM_ALIGN (x)))
22350 *total += COSTS_N_INSNS (100);
22351 return true;
22352
22353 case LABEL_REF:
22354 *total = 0;
22355 return true;
22356
22357 case PLUS:
22358 case MINUS:
22359 if (FLOAT_MODE_P (mode))
22360 *total = rs6000_cost->fp;
22361 else
22362 *total = COSTS_N_INSNS (1);
22363 return false;
22364
22365 case MULT:
22366 if (CONST_INT_P (XEXP (x, 1))
22367 && satisfies_constraint_I (XEXP (x, 1)))
22368 {
22369 if (INTVAL (XEXP (x, 1)) >= -256
22370 && INTVAL (XEXP (x, 1)) <= 255)
22371 *total = rs6000_cost->mulsi_const9;
22372 else
22373 *total = rs6000_cost->mulsi_const;
22374 }
22375 else if (mode == SFmode)
22376 *total = rs6000_cost->fp;
22377 else if (FLOAT_MODE_P (mode))
22378 *total = rs6000_cost->dmul;
22379 else if (mode == DImode)
22380 *total = rs6000_cost->muldi;
22381 else
22382 *total = rs6000_cost->mulsi;
22383 return false;
22384
22385 case FMA:
22386 if (mode == SFmode)
22387 *total = rs6000_cost->fp;
22388 else
22389 *total = rs6000_cost->dmul;
22390 break;
22391
22392 case DIV:
22393 case MOD:
22394 if (FLOAT_MODE_P (mode))
22395 {
22396 *total = mode == DFmode ? rs6000_cost->ddiv
22397 : rs6000_cost->sdiv;
22398 return false;
22399 }
22400 /* FALLTHRU */
22401
22402 case UDIV:
22403 case UMOD:
22404 if (CONST_INT_P (XEXP (x, 1))
22405 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
22406 {
22407 if (code == DIV || code == MOD)
22408 /* Shift, addze */
22409 *total = COSTS_N_INSNS (2);
22410 else
22411 /* Shift */
22412 *total = COSTS_N_INSNS (1);
22413 }
22414 else
22415 {
22416 if (GET_MODE (XEXP (x, 1)) == DImode)
22417 *total = rs6000_cost->divdi;
22418 else
22419 *total = rs6000_cost->divsi;
22420 }
22421 /* Add in shift and subtract for MOD unless we have a mod instruction. */
22422 if ((!TARGET_MODULO
22423 || (RS6000_DISABLE_SCALAR_MODULO && SCALAR_INT_MODE_P (mode)))
22424 && (code == MOD || code == UMOD))
22425 *total += COSTS_N_INSNS (2);
22426 return false;
22427
22428 case CTZ:
22429 *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4);
22430 return false;
22431
22432 case FFS:
22433 *total = COSTS_N_INSNS (4);
22434 return false;
22435
22436 case POPCOUNT:
22437 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
22438 return false;
22439
22440 case PARITY:
22441 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
22442 return false;
22443
22444 case NOT:
22445 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
22446 *total = 0;
22447 else
22448 *total = COSTS_N_INSNS (1);
22449 return false;
22450
22451 case AND:
22452 if (CONST_INT_P (XEXP (x, 1)))
22453 {
22454 rtx left = XEXP (x, 0);
22455 rtx_code left_code = GET_CODE (left);
22456
22457 /* rotate-and-mask: 1 insn. */
22458 if ((left_code == ROTATE
22459 || left_code == ASHIFT
22460 || left_code == LSHIFTRT)
22461 && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
22462 {
22463 *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
22464 if (!CONST_INT_P (XEXP (left, 1)))
22465 *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
22466 *total += COSTS_N_INSNS (1);
22467 return true;
22468 }
22469
22470 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
22471 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
22472 if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
22473 || (val & 0xffff) == val
22474 || (val & 0xffff0000) == val
22475 || ((val & 0xffff) == 0 && mode == SImode))
22476 {
22477 *total = rtx_cost (left, mode, AND, 0, speed);
22478 *total += COSTS_N_INSNS (1);
22479 return true;
22480 }
22481
22482 /* 2 insns. */
22483 if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
22484 {
22485 *total = rtx_cost (left, mode, AND, 0, speed);
22486 *total += COSTS_N_INSNS (2);
22487 return true;
22488 }
22489 }
22490
22491 *total = COSTS_N_INSNS (1);
22492 return false;
22493
22494 case IOR:
22495 /* FIXME */
22496 *total = COSTS_N_INSNS (1);
22497 return true;
22498
22499 case CLZ:
22500 case XOR:
22501 case ZERO_EXTRACT:
22502 *total = COSTS_N_INSNS (1);
22503 return false;
22504
22505 case ASHIFT:
22506 /* The EXTSWSLI instruction is a combined instruction. Don't count both
22507 the sign extend and shift separately within the insn. */
22508 if (TARGET_EXTSWSLI && mode == DImode
22509 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
22510 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
22511 {
22512 *total = 0;
22513 return false;
22514 }
22515 /* fall through */
22516
22517 case ASHIFTRT:
22518 case LSHIFTRT:
22519 case ROTATE:
22520 case ROTATERT:
22521 /* Handle mul_highpart. */
22522 if (outer_code == TRUNCATE
22523 && GET_CODE (XEXP (x, 0)) == MULT)
22524 {
22525 if (mode == DImode)
22526 *total = rs6000_cost->muldi;
22527 else
22528 *total = rs6000_cost->mulsi;
22529 return true;
22530 }
22531 else if (outer_code == AND)
22532 *total = 0;
22533 else
22534 *total = COSTS_N_INSNS (1);
22535 return false;
22536
22537 case SIGN_EXTEND:
22538 case ZERO_EXTEND:
22539 if (MEM_P (XEXP (x, 0)))
22540 *total = 0;
22541 else
22542 *total = COSTS_N_INSNS (1);
22543 return false;
22544
22545 case COMPARE:
22546 case NEG:
22547 case ABS:
22548 if (!FLOAT_MODE_P (mode))
22549 {
22550 *total = COSTS_N_INSNS (1);
22551 return false;
22552 }
22553 /* FALLTHRU */
22554
22555 case FLOAT:
22556 case UNSIGNED_FLOAT:
22557 case FIX:
22558 case UNSIGNED_FIX:
22559 case FLOAT_TRUNCATE:
22560 *total = rs6000_cost->fp;
22561 return false;
22562
22563 case FLOAT_EXTEND:
22564 if (mode == DFmode)
22565 *total = rs6000_cost->sfdf_convert;
22566 else
22567 *total = rs6000_cost->fp;
22568 return false;
22569
22570 case CALL:
22571 case IF_THEN_ELSE:
22572 if (!speed)
22573 {
22574 *total = COSTS_N_INSNS (1);
22575 return true;
22576 }
22577 else if (FLOAT_MODE_P (mode) && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT)
22578 {
22579 *total = rs6000_cost->fp;
22580 return false;
22581 }
22582 break;
22583
22584 case NE:
22585 case EQ:
22586 case GTU:
22587 case LTU:
22588 /* Carry bit requires mode == Pmode.
22589 NEG or PLUS already counted so only add one. */
22590 if (mode == Pmode
22591 && (outer_code == NEG || outer_code == PLUS))
22592 {
22593 *total = COSTS_N_INSNS (1);
22594 return true;
22595 }
22596 /* FALLTHRU */
22597
22598 case GT:
22599 case LT:
22600 case UNORDERED:
22601 if (outer_code == SET)
22602 {
22603 if (XEXP (x, 1) == const0_rtx)
22604 {
22605 *total = COSTS_N_INSNS (2);
22606 return true;
22607 }
22608 else
22609 {
22610 *total = COSTS_N_INSNS (3);
22611 return false;
22612 }
22613 }
22614 /* CC COMPARE. */
22615 if (outer_code == COMPARE)
22616 {
22617 *total = 0;
22618 return true;
22619 }
22620 break;
22621
22622 case UNSPEC:
22623 if (XINT (x, 1) == UNSPECV_MMA_XXSETACCZ)
22624 {
22625 *total = 0;
22626 return true;
22627 }
22628 break;
22629
22630 default:
22631 break;
22632 }
22633
22634 return false;
22635 }
22636
22637 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
22638
22639 static bool
22640 rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
22641 int opno, int *total, bool speed)
22642 {
22643 bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed);
22644
22645 fprintf (stderr,
22646 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
22647 "opno = %d, total = %d, speed = %s, x:\n",
22648 ret ? "complete" : "scan inner",
22649 GET_MODE_NAME (mode),
22650 GET_RTX_NAME (outer_code),
22651 opno,
22652 *total,
22653 speed ? "true" : "false");
22654
22655 debug_rtx (x);
22656
22657 return ret;
22658 }
22659
22660 static int
22661 rs6000_insn_cost (rtx_insn *insn, bool speed)
22662 {
22663 if (recog_memoized (insn) < 0)
22664 return 0;
22665
22666 /* If we are optimizing for size, just use the length. */
22667 if (!speed)
22668 return get_attr_length (insn);
22669
22670 /* Use the cost if provided. */
22671 int cost = get_attr_cost (insn);
22672 if (cost > 0)
22673 return cost;
22674
22675 /* If the insn tells us how many insns there are, use that. Otherwise use
22676 the length/4. Adjust the insn length to remove the extra size that
22677 prefixed instructions take. */
22678 int n = get_attr_num_insns (insn);
22679 if (n == 0)
22680 {
22681 int length = get_attr_length (insn);
22682 if (get_attr_prefixed (insn) == PREFIXED_YES)
22683 {
22684 int adjust = 0;
22685 ADJUST_INSN_LENGTH (insn, adjust);
22686 length -= adjust;
22687 }
22688
22689 n = length / 4;
22690 }
22691
22692 enum attr_type type = get_attr_type (insn);
22693
22694 switch (type)
22695 {
22696 case TYPE_LOAD:
22697 case TYPE_FPLOAD:
22698 case TYPE_VECLOAD:
22699 cost = COSTS_N_INSNS (n + 1);
22700 break;
22701
22702 case TYPE_MUL:
22703 switch (get_attr_size (insn))
22704 {
22705 case SIZE_8:
22706 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const9;
22707 break;
22708 case SIZE_16:
22709 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const;
22710 break;
22711 case SIZE_32:
22712 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi;
22713 break;
22714 case SIZE_64:
22715 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->muldi;
22716 break;
22717 default:
22718 gcc_unreachable ();
22719 }
22720 break;
22721 case TYPE_DIV:
22722 switch (get_attr_size (insn))
22723 {
22724 case SIZE_32:
22725 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divsi;
22726 break;
22727 case SIZE_64:
22728 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divdi;
22729 break;
22730 default:
22731 gcc_unreachable ();
22732 }
22733 break;
22734
22735 case TYPE_FP:
22736 cost = n * rs6000_cost->fp;
22737 break;
22738 case TYPE_DMUL:
22739 cost = n * rs6000_cost->dmul;
22740 break;
22741 case TYPE_SDIV:
22742 cost = n * rs6000_cost->sdiv;
22743 break;
22744 case TYPE_DDIV:
22745 cost = n * rs6000_cost->ddiv;
22746 break;
22747
22748 case TYPE_SYNC:
22749 case TYPE_LOAD_L:
22750 case TYPE_MFCR:
22751 case TYPE_MFCRF:
22752 cost = COSTS_N_INSNS (n + 2);
22753 break;
22754
22755 default:
22756 cost = COSTS_N_INSNS (n);
22757 }
22758
22759 return cost;
22760 }
22761
22762 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
22763
22764 static int
22765 rs6000_debug_address_cost (rtx x, machine_mode mode,
22766 addr_space_t as, bool speed)
22767 {
22768 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
22769
22770 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
22771 ret, speed ? "true" : "false");
22772 debug_rtx (x);
22773
22774 return ret;
22775 }
22776
22777
22778 /* A C expression returning the cost of moving data from a register of class
22779 CLASS1 to one of CLASS2. */
22780
22781 static int
22782 rs6000_register_move_cost (machine_mode mode,
22783 reg_class_t from, reg_class_t to)
22784 {
22785 int ret;
22786 reg_class_t rclass;
22787
22788 if (TARGET_DEBUG_COST)
22789 dbg_cost_ctrl++;
22790
22791 /* If we have VSX, we can easily move between FPR or Altivec registers,
22792 otherwise we can only easily move within classes.
22793 Do this first so we give best-case answers for union classes
22794 containing both gprs and vsx regs. */
22795 HARD_REG_SET to_vsx, from_vsx;
22796 to_vsx = reg_class_contents[to] & reg_class_contents[VSX_REGS];
22797 from_vsx = reg_class_contents[from] & reg_class_contents[VSX_REGS];
22798 if (!hard_reg_set_empty_p (to_vsx)
22799 && !hard_reg_set_empty_p (from_vsx)
22800 && (TARGET_VSX
22801 || hard_reg_set_intersect_p (to_vsx, from_vsx)))
22802 {
22803 int reg = FIRST_FPR_REGNO;
22804 if (TARGET_VSX
22805 || (TEST_HARD_REG_BIT (to_vsx, FIRST_ALTIVEC_REGNO)
22806 && TEST_HARD_REG_BIT (from_vsx, FIRST_ALTIVEC_REGNO)))
22807 reg = FIRST_ALTIVEC_REGNO;
22808 ret = 2 * hard_regno_nregs (reg, mode);
22809 }
22810
22811 /* Moves from/to GENERAL_REGS. */
22812 else if ((rclass = from, reg_classes_intersect_p (to, GENERAL_REGS))
22813 || (rclass = to, reg_classes_intersect_p (from, GENERAL_REGS)))
22814 {
22815 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
22816 {
22817 if (TARGET_DIRECT_MOVE)
22818 {
22819 /* Keep the cost for direct moves above that for within
22820 a register class even if the actual processor cost is
22821 comparable. We do this because a direct move insn
22822 can't be a nop, whereas with ideal register
22823 allocation a move within the same class might turn
22824 out to be a nop. */
22825 if (rs6000_tune == PROCESSOR_POWER9
22826 || rs6000_tune == PROCESSOR_POWER10
22827 || rs6000_tune == PROCESSOR_POWER11)
22828 ret = 3 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22829 else
22830 ret = 4 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22831 /* SFmode requires a conversion when moving between gprs
22832 and vsx. */
22833 if (mode == SFmode)
22834 ret += 2;
22835 }
22836 else
22837 ret = (rs6000_memory_move_cost (mode, rclass, false)
22838 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
22839 }
22840
22841 /* It's more expensive to move CR_REGS than CR0_REGS because of the
22842 shift. */
22843 else if (rclass == CR_REGS)
22844 ret = 4;
22845
22846 /* For those processors that have slow LR/CTR moves, make them more
22847 expensive than memory in order to bias spills to memory .*/
22848 else if ((rs6000_tune == PROCESSOR_POWER6
22849 || rs6000_tune == PROCESSOR_POWER7
22850 || rs6000_tune == PROCESSOR_POWER8
22851 || rs6000_tune == PROCESSOR_POWER9)
22852 && reg_class_subset_p (rclass, SPECIAL_REGS))
22853 ret = 6 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22854
22855 else
22856 /* A move will cost one instruction per GPR moved. */
22857 ret = 2 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22858 }
22859
22860 /* Everything else has to go through GENERAL_REGS. */
22861 else
22862 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
22863 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
22864
22865 if (TARGET_DEBUG_COST)
22866 {
22867 if (dbg_cost_ctrl == 1)
22868 fprintf (stderr,
22869 "rs6000_register_move_cost: ret=%d, mode=%s, from=%s, to=%s\n",
22870 ret, GET_MODE_NAME (mode), reg_class_names[from],
22871 reg_class_names[to]);
22872 dbg_cost_ctrl--;
22873 }
22874
22875 return ret;
22876 }
22877
22878 /* A C expressions returning the cost of moving data of MODE from a register to
22879 or from memory. */
22880
22881 static int
22882 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
22883 bool in ATTRIBUTE_UNUSED)
22884 {
22885 int ret;
22886
22887 if (TARGET_DEBUG_COST)
22888 dbg_cost_ctrl++;
22889
22890 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
22891 ret = 4 * hard_regno_nregs (0, mode);
22892 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
22893 || reg_classes_intersect_p (rclass, VSX_REGS)))
22894 ret = 4 * hard_regno_nregs (32, mode);
22895 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
22896 ret = 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO, mode);
22897 else
22898 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
22899
22900 if (TARGET_DEBUG_COST)
22901 {
22902 if (dbg_cost_ctrl == 1)
22903 fprintf (stderr,
22904 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
22905 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
22906 dbg_cost_ctrl--;
22907 }
22908
22909 return ret;
22910 }
22911
22912 /* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
22913
22914 The register allocator chooses GEN_OR_VSX_REGS for the allocno
22915 class if GENERAL_REGS and VSX_REGS cost is lower than the memory
22916 cost. This happens a lot when TARGET_DIRECT_MOVE makes the register
22917 move cost between GENERAL_REGS and VSX_REGS low.
22918
22919 It might seem reasonable to use a union class. After all, if usage
22920 of vsr is low and gpr high, it might make sense to spill gpr to vsr
22921 rather than memory. However, in cases where register pressure of
22922 both is high, like the cactus_adm spec test, allowing
22923 GEN_OR_VSX_REGS as the allocno class results in bad decisions in
22924 the first scheduling pass. This is partly due to an allocno of
22925 GEN_OR_VSX_REGS wrongly contributing to the GENERAL_REGS pressure
22926 class, which gives too high a pressure for GENERAL_REGS and too low
22927 for VSX_REGS. So, force a choice of the subclass here.
22928
22929 The best class is also the union if GENERAL_REGS and VSX_REGS have
22930 the same cost. In that case we do use GEN_OR_VSX_REGS as the
22931 allocno class, since trying to narrow down the class by regno mode
22932 is prone to error. For example, SImode is allowed in VSX regs and
22933 in some cases (eg. gcc.target/powerpc/p9-xxbr-3.c do_bswap32_vect)
22934 it would be wrong to choose an allocno of GENERAL_REGS based on
22935 SImode. */
22936
22937 static reg_class_t
22938 rs6000_ira_change_pseudo_allocno_class (int regno ATTRIBUTE_UNUSED,
22939 reg_class_t allocno_class,
22940 reg_class_t best_class)
22941 {
22942 switch (allocno_class)
22943 {
22944 case GEN_OR_VSX_REGS:
22945 /* best_class must be a subset of allocno_class. */
22946 gcc_checking_assert (best_class == GEN_OR_VSX_REGS
22947 || best_class == GEN_OR_FLOAT_REGS
22948 || best_class == VSX_REGS
22949 || best_class == ALTIVEC_REGS
22950 || best_class == FLOAT_REGS
22951 || best_class == GENERAL_REGS
22952 || best_class == BASE_REGS);
22953 /* Use best_class but choose wider classes when copying from the
22954 wider class to best_class is cheap. This mimics IRA choice
22955 of allocno class. */
22956 if (best_class == BASE_REGS)
22957 return GENERAL_REGS;
22958 if (TARGET_VSX && best_class == FLOAT_REGS)
22959 return VSX_REGS;
22960 return best_class;
22961
22962 case VSX_REGS:
22963 if (best_class == ALTIVEC_REGS)
22964 return ALTIVEC_REGS;
22965
22966 default:
22967 break;
22968 }
22969
22970 return allocno_class;
22971 }
22972
22973 /* Load up a constant. If the mode is a vector mode, splat the value across
22974 all of the vector elements. */
22975
22976 static rtx
22977 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
22978 {
22979 rtx reg;
22980
22981 if (mode == SFmode || mode == DFmode)
22982 {
22983 rtx d = const_double_from_real_value (dconst, mode);
22984 reg = force_reg (mode, d);
22985 }
22986 else if (mode == V4SFmode)
22987 {
22988 rtx d = const_double_from_real_value (dconst, SFmode);
22989 rtvec v = gen_rtvec (4, d, d, d, d);
22990 reg = gen_reg_rtx (mode);
22991 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
22992 }
22993 else if (mode == V2DFmode)
22994 {
22995 rtx d = const_double_from_real_value (dconst, DFmode);
22996 rtvec v = gen_rtvec (2, d, d);
22997 reg = gen_reg_rtx (mode);
22998 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
22999 }
23000 else
23001 gcc_unreachable ();
23002
23003 return reg;
23004 }
23005
23006 /* Generate an FMA instruction. */
23007
23008 static void
23009 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
23010 {
23011 machine_mode mode = GET_MODE (target);
23012 rtx dst;
23013
23014 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
23015 gcc_assert (dst != NULL);
23016
23017 if (dst != target)
23018 emit_move_insn (target, dst);
23019 }
23020
23021 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
23022
23023 static void
23024 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
23025 {
23026 machine_mode mode = GET_MODE (dst);
23027 rtx r;
23028
23029 /* This is a tad more complicated, since the fnma_optab is for
23030 a different expression: fma(-m1, m2, a), which is the same
23031 thing except in the case of signed zeros.
23032
23033 Fortunately we know that if FMA is supported that FNMSUB is
23034 also supported in the ISA. Just expand it directly. */
23035
23036 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
23037
23038 r = gen_rtx_NEG (mode, a);
23039 r = gen_rtx_FMA (mode, m1, m2, r);
23040 r = gen_rtx_NEG (mode, r);
23041 emit_insn (gen_rtx_SET (dst, r));
23042 }
23043
23044 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
23045 add a reg_note saying that this was a division. Support both scalar and
23046 vector divide. Assumes no trapping math and finite arguments. */
23047
23048 void
23049 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
23050 {
23051 machine_mode mode = GET_MODE (dst);
23052 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
23053 int i;
23054
23055 /* Low precision estimates guarantee 5 bits of accuracy. High
23056 precision estimates guarantee 14 bits of accuracy. SFmode
23057 requires 23 bits of accuracy. DFmode requires 52 bits of
23058 accuracy. Each pass at least doubles the accuracy, leading
23059 to the following. */
23060 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
23061 if (mode == DFmode || mode == V2DFmode)
23062 passes++;
23063
23064 enum insn_code code = optab_handler (smul_optab, mode);
23065 insn_gen_fn gen_mul = GEN_FCN (code);
23066
23067 gcc_assert (code != CODE_FOR_nothing);
23068
23069 one = rs6000_load_constant_and_splat (mode, dconst1);
23070
23071 /* x0 = 1./d estimate */
23072 x0 = gen_reg_rtx (mode);
23073 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
23074 UNSPEC_FRES)));
23075
23076 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
23077 if (passes > 1) {
23078
23079 /* e0 = 1. - d * x0 */
23080 e0 = gen_reg_rtx (mode);
23081 rs6000_emit_nmsub (e0, d, x0, one);
23082
23083 /* x1 = x0 + e0 * x0 */
23084 x1 = gen_reg_rtx (mode);
23085 rs6000_emit_madd (x1, e0, x0, x0);
23086
23087 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
23088 ++i, xprev = xnext, eprev = enext) {
23089
23090 /* enext = eprev * eprev */
23091 enext = gen_reg_rtx (mode);
23092 emit_insn (gen_mul (enext, eprev, eprev));
23093
23094 /* xnext = xprev + enext * xprev */
23095 xnext = gen_reg_rtx (mode);
23096 rs6000_emit_madd (xnext, enext, xprev, xprev);
23097 }
23098
23099 } else
23100 xprev = x0;
23101
23102 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
23103
23104 /* u = n * xprev */
23105 u = gen_reg_rtx (mode);
23106 emit_insn (gen_mul (u, n, xprev));
23107
23108 /* v = n - (d * u) */
23109 v = gen_reg_rtx (mode);
23110 rs6000_emit_nmsub (v, d, u, n);
23111
23112 /* dst = (v * xprev) + u */
23113 rs6000_emit_madd (dst, v, xprev, u);
23114
23115 if (note_p)
23116 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
23117 }
23118
23119 /* Goldschmidt's Algorithm for single/double-precision floating point
23120 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
23121
23122 void
23123 rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
23124 {
23125 machine_mode mode = GET_MODE (src);
23126 rtx e = gen_reg_rtx (mode);
23127 rtx g = gen_reg_rtx (mode);
23128 rtx h = gen_reg_rtx (mode);
23129
23130 /* Low precision estimates guarantee 5 bits of accuracy. High
23131 precision estimates guarantee 14 bits of accuracy. SFmode
23132 requires 23 bits of accuracy. DFmode requires 52 bits of
23133 accuracy. Each pass at least doubles the accuracy, leading
23134 to the following. */
23135 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
23136 if (mode == DFmode || mode == V2DFmode)
23137 passes++;
23138
23139 int i;
23140 rtx mhalf;
23141 enum insn_code code = optab_handler (smul_optab, mode);
23142 insn_gen_fn gen_mul = GEN_FCN (code);
23143
23144 gcc_assert (code != CODE_FOR_nothing);
23145
23146 mhalf = rs6000_load_constant_and_splat (mode, dconsthalf);
23147
23148 /* e = rsqrt estimate */
23149 emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
23150 UNSPEC_RSQRT)));
23151
23152 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
23153 if (!recip)
23154 {
23155 rtx zero = force_reg (mode, CONST0_RTX (mode));
23156
23157 if (mode == SFmode)
23158 {
23159 rtx target = emit_conditional_move (e, { GT, src, zero, mode },
23160 e, zero, mode, 0);
23161 if (target != e)
23162 emit_move_insn (e, target);
23163 }
23164 else
23165 {
23166 rtx cond = gen_rtx_GT (VOIDmode, e, zero);
23167 rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero);
23168 }
23169 }
23170
23171 /* g = sqrt estimate. */
23172 emit_insn (gen_mul (g, e, src));
23173 /* h = 1/(2*sqrt) estimate. */
23174 emit_insn (gen_mul (h, e, mhalf));
23175
23176 if (recip)
23177 {
23178 if (passes == 1)
23179 {
23180 rtx t = gen_reg_rtx (mode);
23181 rs6000_emit_nmsub (t, g, h, mhalf);
23182 /* Apply correction directly to 1/rsqrt estimate. */
23183 rs6000_emit_madd (dst, e, t, e);
23184 }
23185 else
23186 {
23187 for (i = 0; i < passes; i++)
23188 {
23189 rtx t1 = gen_reg_rtx (mode);
23190 rtx g1 = gen_reg_rtx (mode);
23191 rtx h1 = gen_reg_rtx (mode);
23192
23193 rs6000_emit_nmsub (t1, g, h, mhalf);
23194 rs6000_emit_madd (g1, g, t1, g);
23195 rs6000_emit_madd (h1, h, t1, h);
23196
23197 g = g1;
23198 h = h1;
23199 }
23200 /* Multiply by 2 for 1/rsqrt. */
23201 emit_insn (gen_add3_insn (dst, h, h));
23202 }
23203 }
23204 else
23205 {
23206 rtx t = gen_reg_rtx (mode);
23207 rs6000_emit_nmsub (t, g, h, mhalf);
23208 rs6000_emit_madd (dst, g, t, g);
23209 }
23210
23211 return;
23212 }
23213
23214 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
23215 (Power7) targets. DST is the target, and SRC is the argument operand. */
23216
23217 void
23218 rs6000_emit_popcount (rtx dst, rtx src)
23219 {
23220 machine_mode mode = GET_MODE (dst);
23221 rtx tmp1, tmp2;
23222
23223 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
23224 if (TARGET_POPCNTD)
23225 {
23226 if (mode == SImode)
23227 emit_insn (gen_popcntdsi2 (dst, src));
23228 else
23229 emit_insn (gen_popcntddi2 (dst, src));
23230 return;
23231 }
23232
23233 tmp1 = gen_reg_rtx (mode);
23234
23235 if (mode == SImode)
23236 {
23237 emit_insn (gen_popcntbsi2 (tmp1, src));
23238 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
23239 NULL_RTX, 0);
23240 tmp2 = force_reg (SImode, tmp2);
23241 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
23242 }
23243 else
23244 {
23245 emit_insn (gen_popcntbdi2 (tmp1, src));
23246 tmp2 = expand_mult (DImode, tmp1,
23247 GEN_INT ((HOST_WIDE_INT)
23248 0x01010101 << 32 | 0x01010101),
23249 NULL_RTX, 0);
23250 tmp2 = force_reg (DImode, tmp2);
23251 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
23252 }
23253 }
23254
23255
23256 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
23257 target, and SRC is the argument operand. */
23258
23259 void
23260 rs6000_emit_parity (rtx dst, rtx src)
23261 {
23262 machine_mode mode = GET_MODE (dst);
23263 rtx tmp;
23264
23265 tmp = gen_reg_rtx (mode);
23266
23267 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
23268 if (TARGET_CMPB)
23269 {
23270 if (mode == SImode)
23271 {
23272 emit_insn (gen_popcntbsi2 (tmp, src));
23273 emit_insn (gen_paritysi2_cmpb (dst, tmp));
23274 }
23275 else
23276 {
23277 emit_insn (gen_popcntbdi2 (tmp, src));
23278 emit_insn (gen_paritydi2_cmpb (dst, tmp));
23279 }
23280 return;
23281 }
23282
23283 if (mode == SImode)
23284 {
23285 /* Is mult+shift >= shift+xor+shift+xor? */
23286 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
23287 {
23288 rtx tmp1, tmp2, tmp3, tmp4;
23289
23290 tmp1 = gen_reg_rtx (SImode);
23291 emit_insn (gen_popcntbsi2 (tmp1, src));
23292
23293 tmp2 = gen_reg_rtx (SImode);
23294 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
23295 tmp3 = gen_reg_rtx (SImode);
23296 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
23297
23298 tmp4 = gen_reg_rtx (SImode);
23299 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
23300 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
23301 }
23302 else
23303 rs6000_emit_popcount (tmp, src);
23304 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
23305 }
23306 else
23307 {
23308 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
23309 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
23310 {
23311 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
23312
23313 tmp1 = gen_reg_rtx (DImode);
23314 emit_insn (gen_popcntbdi2 (tmp1, src));
23315
23316 tmp2 = gen_reg_rtx (DImode);
23317 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
23318 tmp3 = gen_reg_rtx (DImode);
23319 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
23320
23321 tmp4 = gen_reg_rtx (DImode);
23322 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
23323 tmp5 = gen_reg_rtx (DImode);
23324 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
23325
23326 tmp6 = gen_reg_rtx (DImode);
23327 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
23328 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
23329 }
23330 else
23331 rs6000_emit_popcount (tmp, src);
23332 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
23333 }
23334 }
23335
23336 /* Expand an Altivec constant permutation for little endian mode.
23337 OP0 and OP1 are the input vectors and TARGET is the output vector.
23338 SEL specifies the constant permutation vector.
23339
23340 There are two issues: First, the two input operands must be
23341 swapped so that together they form a double-wide array in LE
23342 order. Second, the vperm instruction has surprising behavior
23343 in LE mode: it interprets the elements of the source vectors
23344 in BE mode ("left to right") and interprets the elements of
23345 the destination vector in LE mode ("right to left"). To
23346 correct for this, we must subtract each element of the permute
23347 control vector from 31.
23348
23349 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
23350 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
23351 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
23352 serve as the permute control vector. Then, in BE mode,
23353
23354 vperm 9,10,11,12
23355
23356 places the desired result in vr9. However, in LE mode the
23357 vector contents will be
23358
23359 vr10 = 00000003 00000002 00000001 00000000
23360 vr11 = 00000007 00000006 00000005 00000004
23361
23362 The result of the vperm using the same permute control vector is
23363
23364 vr9 = 05000000 07000000 01000000 03000000
23365
23366 That is, the leftmost 4 bytes of vr10 are interpreted as the
23367 source for the rightmost 4 bytes of vr9, and so on.
23368
23369 If we change the permute control vector to
23370
23371 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
23372
23373 and issue
23374
23375 vperm 9,11,10,12
23376
23377 we get the desired
23378
23379 vr9 = 00000006 00000004 00000002 00000000. */
23380
23381 static void
23382 altivec_expand_vec_perm_const_le (rtx target, rtx op0, rtx op1,
23383 const vec_perm_indices &sel)
23384 {
23385 unsigned int i;
23386 rtx perm[16];
23387 rtx constv, unspec;
23388
23389 /* Unpack and adjust the constant selector. */
23390 for (i = 0; i < 16; ++i)
23391 {
23392 unsigned int elt = 31 - (sel[i] & 31);
23393 perm[i] = GEN_INT (elt);
23394 }
23395
23396 /* Expand to a permute, swapping the inputs and using the
23397 adjusted selector. */
23398 if (!REG_P (op0))
23399 op0 = force_reg (V16QImode, op0);
23400 if (!REG_P (op1))
23401 op1 = force_reg (V16QImode, op1);
23402
23403 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
23404 constv = force_reg (V16QImode, constv);
23405 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
23406 UNSPEC_VPERM);
23407 if (!REG_P (target))
23408 {
23409 rtx tmp = gen_reg_rtx (V16QImode);
23410 emit_move_insn (tmp, unspec);
23411 unspec = tmp;
23412 }
23413
23414 emit_move_insn (target, unspec);
23415 }
23416
23417 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
23418 permute control vector. But here it's not a constant, so we must
23419 generate a vector NAND or NOR to do the adjustment. */
23420
23421 void
23422 altivec_expand_vec_perm_le (rtx operands[4])
23423 {
23424 rtx notx, iorx, unspec;
23425 rtx target = operands[0];
23426 rtx op0 = operands[1];
23427 rtx op1 = operands[2];
23428 rtx sel = operands[3];
23429 rtx tmp = target;
23430 rtx norreg = gen_reg_rtx (V16QImode);
23431 machine_mode mode = GET_MODE (target);
23432
23433 /* Get everything in regs so the pattern matches. */
23434 if (!REG_P (op0))
23435 op0 = force_reg (mode, op0);
23436 if (!REG_P (op1))
23437 op1 = force_reg (mode, op1);
23438 if (!REG_P (sel))
23439 sel = force_reg (V16QImode, sel);
23440 if (!REG_P (target))
23441 tmp = gen_reg_rtx (mode);
23442
23443 if (TARGET_P9_VECTOR)
23444 {
23445 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, sel),
23446 UNSPEC_VPERMR);
23447 }
23448 else
23449 {
23450 /* Invert the selector with a VNAND if available, else a VNOR.
23451 The VNAND is preferred for future fusion opportunities. */
23452 notx = gen_rtx_NOT (V16QImode, sel);
23453 iorx = (TARGET_P8_VECTOR
23454 ? gen_rtx_IOR (V16QImode, notx, notx)
23455 : gen_rtx_AND (V16QImode, notx, notx));
23456 emit_insn (gen_rtx_SET (norreg, iorx));
23457
23458 /* Permute with operands reversed and adjusted selector. */
23459 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
23460 UNSPEC_VPERM);
23461 }
23462
23463 /* Copy into target, possibly by way of a register. */
23464 if (!REG_P (target))
23465 {
23466 emit_move_insn (tmp, unspec);
23467 unspec = tmp;
23468 }
23469
23470 emit_move_insn (target, unspec);
23471 }
23472
23473 /* Expand an Altivec constant permutation. Return true if we match
23474 an efficient implementation; false to fall back to VPERM.
23475
23476 OP0 and OP1 are the input vectors and TARGET is the output vector.
23477 SEL specifies the constant permutation vector. */
23478
23479 static bool
23480 altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1,
23481 const vec_perm_indices &sel)
23482 {
23483 struct altivec_perm_insn {
23484 HOST_WIDE_INT mask;
23485 enum insn_code impl;
23486 unsigned char perm[16];
23487 };
23488 static const struct altivec_perm_insn patterns[] = {
23489 {OPTION_MASK_ALTIVEC,
23490 CODE_FOR_altivec_vpkuhum_direct,
23491 {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}},
23492 {OPTION_MASK_ALTIVEC,
23493 CODE_FOR_altivec_vpkuwum_direct,
23494 {2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31}},
23495 {OPTION_MASK_ALTIVEC,
23496 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct_be
23497 : CODE_FOR_altivec_vmrglb_direct_le,
23498 {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}},
23499 {OPTION_MASK_ALTIVEC,
23500 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct_be
23501 : CODE_FOR_altivec_vmrglh_direct_le,
23502 {0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23}},
23503 {OPTION_MASK_ALTIVEC,
23504 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct_v4si_be
23505 : CODE_FOR_altivec_vmrglw_direct_v4si_le,
23506 {0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23}},
23507 {OPTION_MASK_ALTIVEC,
23508 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct_be
23509 : CODE_FOR_altivec_vmrghb_direct_le,
23510 {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}},
23511 {OPTION_MASK_ALTIVEC,
23512 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct_be
23513 : CODE_FOR_altivec_vmrghh_direct_le,
23514 {8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31}},
23515 {OPTION_MASK_ALTIVEC,
23516 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct_v4si_be
23517 : CODE_FOR_altivec_vmrghw_direct_v4si_le,
23518 {8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31}},
23519 {OPTION_MASK_P8_VECTOR,
23520 BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgew_v4sf_direct
23521 : CODE_FOR_p8_vmrgow_v4sf_direct,
23522 {0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27}},
23523 {OPTION_MASK_P8_VECTOR,
23524 BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgow_v4sf_direct
23525 : CODE_FOR_p8_vmrgew_v4sf_direct,
23526 {4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31}},
23527 {OPTION_MASK_VSX, CODE_FOR_vsx_xxpermdi_v16qi,
23528 {0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23}},
23529 {OPTION_MASK_VSX, CODE_FOR_vsx_xxpermdi_v16qi,
23530 {8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}},
23531 {OPTION_MASK_VSX, CODE_FOR_vsx_xxpermdi_v16qi,
23532 {0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31}},
23533 {OPTION_MASK_VSX, CODE_FOR_vsx_xxpermdi_v16qi,
23534 {8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31}}};
23535
23536 unsigned int i, j, elt, which;
23537 unsigned char perm[16];
23538 rtx x;
23539 bool one_vec;
23540
23541 /* Unpack the constant selector. */
23542 for (i = which = 0; i < 16; ++i)
23543 {
23544 elt = sel[i] & 31;
23545 which |= (elt < 16 ? 1 : 2);
23546 perm[i] = elt;
23547 }
23548
23549 /* Simplify the constant selector based on operands. */
23550 switch (which)
23551 {
23552 default:
23553 gcc_unreachable ();
23554
23555 case 3:
23556 one_vec = false;
23557 if (!rtx_equal_p (op0, op1))
23558 break;
23559 /* FALLTHRU */
23560
23561 case 2:
23562 for (i = 0; i < 16; ++i)
23563 perm[i] &= 15;
23564 op0 = op1;
23565 one_vec = true;
23566 break;
23567
23568 case 1:
23569 op1 = op0;
23570 one_vec = true;
23571 break;
23572 }
23573
23574 /* Look for splat patterns. */
23575 if (one_vec)
23576 {
23577 elt = perm[0];
23578
23579 for (i = 0; i < 16; ++i)
23580 if (perm[i] != elt)
23581 break;
23582 if (i == 16)
23583 {
23584 if (!BYTES_BIG_ENDIAN)
23585 elt = 15 - elt;
23586 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
23587 return true;
23588 }
23589
23590 if (elt % 2 == 0)
23591 {
23592 for (i = 0; i < 16; i += 2)
23593 if (perm[i] != elt || perm[i + 1] != elt + 1)
23594 break;
23595 if (i == 16)
23596 {
23597 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
23598 x = gen_reg_rtx (V8HImode);
23599 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
23600 GEN_INT (field)));
23601 emit_move_insn (target, gen_lowpart (V16QImode, x));
23602 return true;
23603 }
23604 }
23605
23606 if (elt % 4 == 0)
23607 {
23608 for (i = 0; i < 16; i += 4)
23609 if (perm[i] != elt
23610 || perm[i + 1] != elt + 1
23611 || perm[i + 2] != elt + 2
23612 || perm[i + 3] != elt + 3)
23613 break;
23614 if (i == 16)
23615 {
23616 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
23617 x = gen_reg_rtx (V4SImode);
23618 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
23619 GEN_INT (field)));
23620 emit_move_insn (target, gen_lowpart (V16QImode, x));
23621 return true;
23622 }
23623 }
23624 }
23625
23626 /* Look for merge and pack patterns. */
23627 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
23628 {
23629 bool swapped;
23630
23631 if ((patterns[j].mask & rs6000_isa_flags) == 0)
23632 continue;
23633
23634 elt = patterns[j].perm[0];
23635 if (perm[0] == elt)
23636 swapped = false;
23637 else if (perm[0] == elt + 16)
23638 swapped = true;
23639 else
23640 continue;
23641 for (i = 1; i < 16; ++i)
23642 {
23643 elt = patterns[j].perm[i];
23644 if (swapped)
23645 elt = (elt >= 16 ? elt - 16 : elt + 16);
23646 else if (one_vec && elt >= 16)
23647 elt -= 16;
23648 if (perm[i] != elt)
23649 break;
23650 }
23651 if (i == 16)
23652 {
23653 enum insn_code icode = patterns[j].impl;
23654 machine_mode omode = insn_data[icode].operand[0].mode;
23655 machine_mode imode = insn_data[icode].operand[1].mode;
23656
23657 rtx perm_idx = GEN_INT (0);
23658 if (icode == CODE_FOR_vsx_xxpermdi_v16qi)
23659 {
23660 int perm_val = 0;
23661 if (one_vec)
23662 {
23663 if (perm[0] == 8)
23664 perm_val |= 2;
23665 if (perm[8] == 8)
23666 perm_val |= 1;
23667 }
23668 else
23669 {
23670 if (perm[0] != 0)
23671 perm_val |= 2;
23672 if (perm[8] != 16)
23673 perm_val |= 1;
23674 }
23675 perm_idx = GEN_INT (perm_val);
23676 }
23677
23678 /* For little-endian, don't use vpkuwum and vpkuhum if the
23679 underlying vector type is not V4SI and V8HI, respectively.
23680 For example, using vpkuwum with a V8HI picks up the even
23681 halfwords (BE numbering) when the even halfwords (LE
23682 numbering) are what we need. */
23683 if (!BYTES_BIG_ENDIAN
23684 && icode == CODE_FOR_altivec_vpkuwum_direct
23685 && ((REG_P (op0)
23686 && GET_MODE (op0) != V4SImode)
23687 || (SUBREG_P (op0)
23688 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
23689 continue;
23690 if (!BYTES_BIG_ENDIAN
23691 && icode == CODE_FOR_altivec_vpkuhum_direct
23692 && ((REG_P (op0)
23693 && GET_MODE (op0) != V8HImode)
23694 || (SUBREG_P (op0)
23695 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
23696 continue;
23697
23698 /* For little-endian, the two input operands must be swapped
23699 (or swapped back) to ensure proper right-to-left numbering
23700 from 0 to 2N-1. */
23701 if (swapped == BYTES_BIG_ENDIAN
23702 && icode != CODE_FOR_vsx_xxpermdi_v16qi)
23703 std::swap (op0, op1);
23704 if (imode != V16QImode)
23705 {
23706 op0 = gen_lowpart (imode, op0);
23707 op1 = gen_lowpart (imode, op1);
23708 }
23709 if (omode == V16QImode)
23710 x = target;
23711 else
23712 x = gen_reg_rtx (omode);
23713 if (icode == CODE_FOR_vsx_xxpermdi_v16qi)
23714 emit_insn (GEN_FCN (icode) (x, op0, op1, perm_idx));
23715 else
23716 emit_insn (GEN_FCN (icode) (x, op0, op1));
23717 if (omode != V16QImode)
23718 emit_move_insn (target, gen_lowpart (V16QImode, x));
23719 return true;
23720 }
23721 }
23722
23723 if (!BYTES_BIG_ENDIAN)
23724 {
23725 altivec_expand_vec_perm_const_le (target, op0, op1, sel);
23726 return true;
23727 }
23728
23729 return false;
23730 }
23731
23732 /* Expand a VSX Permute Doubleword constant permutation.
23733 Return true if we match an efficient implementation. */
23734
23735 static bool
23736 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
23737 unsigned char perm0, unsigned char perm1)
23738 {
23739 rtx x;
23740
23741 /* If both selectors come from the same operand, fold to single op. */
23742 if ((perm0 & 2) == (perm1 & 2))
23743 {
23744 if (perm0 & 2)
23745 op0 = op1;
23746 else
23747 op1 = op0;
23748 }
23749 /* If both operands are equal, fold to simpler permutation. */
23750 if (rtx_equal_p (op0, op1))
23751 {
23752 perm0 = perm0 & 1;
23753 perm1 = (perm1 & 1) + 2;
23754 }
23755 /* If the first selector comes from the second operand, swap. */
23756 else if (perm0 & 2)
23757 {
23758 if (perm1 & 2)
23759 return false;
23760 perm0 -= 2;
23761 perm1 += 2;
23762 std::swap (op0, op1);
23763 }
23764 /* If the second selector does not come from the second operand, fail. */
23765 else if ((perm1 & 2) == 0)
23766 return false;
23767
23768 /* Success! */
23769 if (target != NULL)
23770 {
23771 machine_mode vmode, dmode;
23772 rtvec v;
23773
23774 vmode = GET_MODE (target);
23775 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
23776 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4).require ();
23777 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
23778 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
23779 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
23780 emit_insn (gen_rtx_SET (target, x));
23781 }
23782 return true;
23783 }
23784
23785 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
23786
23787 static bool
23788 rs6000_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
23789 rtx target, rtx op0, rtx op1,
23790 const vec_perm_indices &sel)
23791 {
23792 if (vmode != op_mode)
23793 return false;
23794
23795 bool testing_p = !target;
23796
23797 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
23798 if (TARGET_ALTIVEC && testing_p)
23799 return true;
23800
23801 if (op0)
23802 {
23803 rtx nop0 = force_reg (vmode, op0);
23804 if (op0 == op1)
23805 op1 = nop0;
23806 op0 = nop0;
23807 }
23808 if (op1)
23809 op1 = force_reg (vmode, op1);
23810
23811 /* Check for ps_merge* or xxpermdi insns. */
23812 if ((vmode == V2DFmode || vmode == V2DImode) && VECTOR_MEM_VSX_P (vmode))
23813 {
23814 if (testing_p)
23815 {
23816 op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
23817 op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
23818 }
23819 if (rs6000_expand_vec_perm_const_1 (target, op0, op1, sel[0], sel[1]))
23820 return true;
23821 }
23822
23823 if (TARGET_ALTIVEC)
23824 {
23825 /* Force the target-independent code to lower to V16QImode. */
23826 if (vmode != V16QImode)
23827 return false;
23828 if (altivec_expand_vec_perm_const (target, op0, op1, sel))
23829 return true;
23830 }
23831
23832 return false;
23833 }
23834
23835 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave.
23836 OP0 and OP1 are the input vectors and TARGET is the output vector.
23837 PERM specifies the constant permutation vector. */
23838
23839 static void
23840 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
23841 machine_mode vmode, const vec_perm_builder &perm)
23842 {
23843 rtx x = expand_vec_perm_const (vmode, op0, op1, perm, BLKmode, target);
23844 if (x != target)
23845 emit_move_insn (target, x);
23846 }
23847
23848 /* Expand an extract even operation. */
23849
23850 void
23851 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
23852 {
23853 machine_mode vmode = GET_MODE (target);
23854 unsigned i, nelt = GET_MODE_NUNITS (vmode);
23855 vec_perm_builder perm (nelt, nelt, 1);
23856
23857 for (i = 0; i < nelt; i++)
23858 perm.quick_push (i * 2);
23859
23860 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
23861 }
23862
23863 /* Expand a vector interleave operation. */
23864
23865 void
23866 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
23867 {
23868 machine_mode vmode = GET_MODE (target);
23869 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
23870 vec_perm_builder perm (nelt, nelt, 1);
23871
23872 high = (highp ? 0 : nelt / 2);
23873 for (i = 0; i < nelt / 2; i++)
23874 {
23875 perm.quick_push (i + high);
23876 perm.quick_push (i + nelt + high);
23877 }
23878
23879 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
23880 }
23881
23882 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
23883 void
23884 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
23885 {
23886 HOST_WIDE_INT hwi_scale (scale);
23887 REAL_VALUE_TYPE r_pow;
23888 rtvec v = rtvec_alloc (2);
23889 rtx elt;
23890 rtx scale_vec = gen_reg_rtx (V2DFmode);
23891 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
23892 elt = const_double_from_real_value (r_pow, DFmode);
23893 RTVEC_ELT (v, 0) = elt;
23894 RTVEC_ELT (v, 1) = elt;
23895 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
23896 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
23897 }
23898
23899 /* Return an RTX representing where to find the function value of a
23900 function returning MODE. */
23901 static rtx
23902 rs6000_complex_function_value (machine_mode mode)
23903 {
23904 unsigned int regno;
23905 rtx r1, r2;
23906 machine_mode inner = GET_MODE_INNER (mode);
23907 unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode);
23908
23909 if (TARGET_FLOAT128_TYPE
23910 && (mode == KCmode
23911 || (mode == TCmode && TARGET_IEEEQUAD)))
23912 regno = ALTIVEC_ARG_RETURN;
23913
23914 else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
23915 regno = FP_ARG_RETURN;
23916
23917 else
23918 {
23919 regno = GP_ARG_RETURN;
23920
23921 /* 32-bit is OK since it'll go in r3/r4. */
23922 if (TARGET_32BIT && inner_bytes >= 4)
23923 return gen_rtx_REG (mode, regno);
23924 }
23925
23926 if (inner_bytes >= 8)
23927 return gen_rtx_REG (mode, regno);
23928
23929 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
23930 const0_rtx);
23931 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
23932 GEN_INT (inner_bytes));
23933 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
23934 }
23935
23936 /* Return an rtx describing a return value of MODE as a PARALLEL
23937 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
23938 stride REG_STRIDE. */
23939
23940 static rtx
23941 rs6000_parallel_return (machine_mode mode,
23942 int n_elts, machine_mode elt_mode,
23943 unsigned int regno, unsigned int reg_stride)
23944 {
23945 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
23946
23947 int i;
23948 for (i = 0; i < n_elts; i++)
23949 {
23950 rtx r = gen_rtx_REG (elt_mode, regno);
23951 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
23952 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
23953 regno += reg_stride;
23954 }
23955
23956 return par;
23957 }
23958
23959 /* Target hook for TARGET_FUNCTION_VALUE.
23960
23961 An integer value is in r3 and a floating-point value is in fp1,
23962 unless -msoft-float. */
23963
23964 static rtx
23965 rs6000_function_value (const_tree valtype,
23966 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
23967 bool outgoing ATTRIBUTE_UNUSED)
23968 {
23969 machine_mode mode;
23970 unsigned int regno;
23971 machine_mode elt_mode;
23972 int n_elts;
23973
23974 /* Special handling for structs in darwin64. */
23975 if (TARGET_MACHO
23976 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
23977 {
23978 CUMULATIVE_ARGS valcum;
23979 rtx valret;
23980
23981 valcum.words = 0;
23982 valcum.fregno = FP_ARG_MIN_REG;
23983 valcum.vregno = ALTIVEC_ARG_MIN_REG;
23984 /* Do a trial code generation as if this were going to be passed as
23985 an argument; if any part goes in memory, we return NULL. */
23986 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
23987 if (valret)
23988 return valret;
23989 /* Otherwise fall through to standard ABI rules. */
23990 }
23991
23992 mode = TYPE_MODE (valtype);
23993
23994 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
23995 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
23996 {
23997 int first_reg, n_regs;
23998
23999 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode))
24000 {
24001 /* _Decimal128 must use even/odd register pairs. */
24002 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
24003 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
24004 }
24005 else
24006 {
24007 first_reg = ALTIVEC_ARG_RETURN;
24008 n_regs = 1;
24009 }
24010
24011 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
24012 }
24013
24014 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
24015 if (TARGET_32BIT && TARGET_POWERPC64)
24016 switch (mode)
24017 {
24018 default:
24019 break;
24020 case E_DImode:
24021 case E_SCmode:
24022 case E_DCmode:
24023 case E_TCmode:
24024 int count = GET_MODE_SIZE (mode) / 4;
24025 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
24026 }
24027
24028 if ((INTEGRAL_TYPE_P (valtype)
24029 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
24030 || POINTER_TYPE_P (valtype))
24031 mode = TARGET_32BIT ? SImode : DImode;
24032
24033 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
24034 /* _Decimal128 must use an even/odd register pair. */
24035 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
24036 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT
24037 && !FLOAT128_VECTOR_P (mode))
24038 regno = FP_ARG_RETURN;
24039 else if (TREE_CODE (valtype) == COMPLEX_TYPE
24040 && targetm.calls.split_complex_arg)
24041 return rs6000_complex_function_value (mode);
24042 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
24043 return register is used in both cases, and we won't see V2DImode/V2DFmode
24044 for pure altivec, combine the two cases. */
24045 else if ((VECTOR_TYPE_P (valtype) || VECTOR_ALIGNMENT_P (mode))
24046 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
24047 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
24048 regno = ALTIVEC_ARG_RETURN;
24049 else
24050 regno = GP_ARG_RETURN;
24051
24052 return gen_rtx_REG (mode, regno);
24053 }
24054
24055 /* Define how to find the value returned by a library function
24056 assuming the value has mode MODE. */
24057 rtx
24058 rs6000_libcall_value (machine_mode mode)
24059 {
24060 unsigned int regno;
24061
24062 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
24063 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
24064 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
24065
24066 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
24067 /* _Decimal128 must use an even/odd register pair. */
24068 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
24069 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && TARGET_HARD_FLOAT)
24070 regno = FP_ARG_RETURN;
24071 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
24072 return register is used in both cases, and we won't see V2DImode/V2DFmode
24073 for pure altivec, combine the two cases. */
24074 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
24075 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
24076 regno = ALTIVEC_ARG_RETURN;
24077 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
24078 return rs6000_complex_function_value (mode);
24079 else
24080 regno = GP_ARG_RETURN;
24081
24082 return gen_rtx_REG (mode, regno);
24083 }
24084
24085 /* Compute register pressure classes. We implement the target hook to avoid
24086 IRA picking something like GEN_OR_FLOAT_REGS as a pressure class, which can
24087 lead to incorrect estimates of number of available registers and therefor
24088 increased register pressure/spill. */
24089 static int
24090 rs6000_compute_pressure_classes (enum reg_class *pressure_classes)
24091 {
24092 int n;
24093
24094 n = 0;
24095 pressure_classes[n++] = GENERAL_REGS;
24096 if (TARGET_ALTIVEC)
24097 pressure_classes[n++] = ALTIVEC_REGS;
24098 if (TARGET_VSX)
24099 pressure_classes[n++] = VSX_REGS;
24100 else
24101 {
24102 if (TARGET_HARD_FLOAT)
24103 pressure_classes[n++] = FLOAT_REGS;
24104 }
24105 pressure_classes[n++] = CR_REGS;
24106 pressure_classes[n++] = SPECIAL_REGS;
24107
24108 return n;
24109 }
24110
24111 /* Given FROM and TO register numbers, say whether this elimination is allowed.
24112 Frame pointer elimination is automatically handled.
24113
24114 For the RS/6000, if frame pointer elimination is being done, we would like
24115 to convert ap into fp, not sp.
24116
24117 We need r30 if -mminimal-toc was specified, and there are constant pool
24118 references. */
24119
24120 static bool
24121 rs6000_can_eliminate (const int from, const int to)
24122 {
24123 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
24124 ? ! frame_pointer_needed
24125 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
24126 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC_OR_PCREL
24127 || constant_pool_empty_p ()
24128 : true);
24129 }
24130
24131 /* Define the offset between two registers, FROM to be eliminated and its
24132 replacement TO, at the start of a routine. */
24133 HOST_WIDE_INT
24134 rs6000_initial_elimination_offset (int from, int to)
24135 {
24136 rs6000_stack_t *info = rs6000_stack_info ();
24137 HOST_WIDE_INT offset;
24138
24139 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
24140 offset = info->push_p ? 0 : -info->total_size;
24141 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
24142 {
24143 offset = info->push_p ? 0 : -info->total_size;
24144 if (FRAME_GROWS_DOWNWARD)
24145 offset += info->fixed_size + info->vars_size + info->parm_size;
24146 }
24147 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
24148 offset = FRAME_GROWS_DOWNWARD
24149 ? info->fixed_size + info->vars_size + info->parm_size
24150 : 0;
24151 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
24152 offset = info->total_size;
24153 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
24154 offset = info->push_p ? info->total_size : 0;
24155 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
24156 offset = 0;
24157 else
24158 gcc_unreachable ();
24159
24160 return offset;
24161 }
24162
24163 /* Fill in sizes of registers used by unwinder. */
24164
24165 static void
24166 rs6000_init_dwarf_reg_sizes_extra (tree address)
24167 {
24168 if (TARGET_MACHO && ! TARGET_ALTIVEC)
24169 {
24170 int i;
24171 machine_mode mode = TYPE_MODE (char_type_node);
24172 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
24173 rtx mem = gen_rtx_MEM (BLKmode, addr);
24174 rtx value = gen_int_mode (16, mode);
24175
24176 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
24177 The unwinder still needs to know the size of Altivec registers. */
24178
24179 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
24180 {
24181 int column = DWARF_REG_TO_UNWIND_COLUMN
24182 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
24183 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
24184
24185 emit_move_insn (adjust_address (mem, mode, offset), value);
24186 }
24187 }
24188 }
24189
24190 /* Map internal gcc register numbers to debug format register numbers.
24191 FORMAT specifies the type of debug register number to use:
24192 0 -- debug information, except for frame-related sections
24193 1 -- DWARF .debug_frame section
24194 2 -- DWARF .eh_frame section */
24195
24196 unsigned int
24197 rs6000_debugger_regno (unsigned int regno, unsigned int format)
24198 {
24199 /* On some platforms, we use the standard DWARF register
24200 numbering for .debug_info and .debug_frame. */
24201 if ((format == 0 && dwarf_debuginfo_p ()) || format == 1)
24202 {
24203 #ifdef RS6000_USE_DWARF_NUMBERING
24204 if (regno <= 31)
24205 return regno;
24206 if (FP_REGNO_P (regno))
24207 return regno - FIRST_FPR_REGNO + 32;
24208 if (ALTIVEC_REGNO_P (regno))
24209 return regno - FIRST_ALTIVEC_REGNO + 1124;
24210 if (regno == LR_REGNO)
24211 return 108;
24212 if (regno == CTR_REGNO)
24213 return 109;
24214 if (regno == CA_REGNO)
24215 return 101; /* XER */
24216 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
24217 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
24218 The actual code emitted saves the whole of CR, so we map CR2_REGNO
24219 to the DWARF reg for CR. */
24220 if (format == 1 && regno == CR2_REGNO)
24221 return 64;
24222 if (CR_REGNO_P (regno))
24223 return regno - CR0_REGNO + 86;
24224 if (regno == VRSAVE_REGNO)
24225 return 356;
24226 if (regno == VSCR_REGNO)
24227 return 67;
24228
24229 /* These do not make much sense. */
24230 if (regno == FRAME_POINTER_REGNUM)
24231 return 111;
24232 if (regno == ARG_POINTER_REGNUM)
24233 return 67;
24234 if (regno == 64)
24235 return 100;
24236
24237 gcc_unreachable ();
24238 #endif
24239 }
24240
24241 /* We use the GCC 7 (and before) internal number for non-DWARF debug
24242 information, and also for .eh_frame. */
24243 /* Translate the regnos to their numbers in GCC 7 (and before). */
24244 if (regno <= 31)
24245 return regno;
24246 if (FP_REGNO_P (regno))
24247 return regno - FIRST_FPR_REGNO + 32;
24248 if (ALTIVEC_REGNO_P (regno))
24249 return regno - FIRST_ALTIVEC_REGNO + 77;
24250 if (regno == LR_REGNO)
24251 return 65;
24252 if (regno == CTR_REGNO)
24253 return 66;
24254 if (regno == CA_REGNO)
24255 return 76; /* XER */
24256 if (CR_REGNO_P (regno))
24257 return regno - CR0_REGNO + 68;
24258 if (regno == VRSAVE_REGNO)
24259 return 109;
24260 if (regno == VSCR_REGNO)
24261 return 110;
24262
24263 if (regno == FRAME_POINTER_REGNUM)
24264 return 111;
24265 if (regno == ARG_POINTER_REGNUM)
24266 return 67;
24267 if (regno == 64)
24268 return 64;
24269
24270 gcc_unreachable ();
24271 }
24272
24273 /* target hook eh_return_filter_mode */
24274 static scalar_int_mode
24275 rs6000_eh_return_filter_mode (void)
24276 {
24277 return TARGET_32BIT ? SImode : word_mode;
24278 }
24279
24280 /* Target hook for translate_mode_attribute. */
24281 static machine_mode
24282 rs6000_translate_mode_attribute (machine_mode mode)
24283 {
24284 if ((FLOAT128_IEEE_P (mode)
24285 && ieee128_float_type_node == long_double_type_node)
24286 || (FLOAT128_IBM_P (mode)
24287 && ibm128_float_type_node == long_double_type_node))
24288 return COMPLEX_MODE_P (mode) ? E_TCmode : E_TFmode;
24289 return mode;
24290 }
24291
24292 /* Target hook for scalar_mode_supported_p. */
24293 static bool
24294 rs6000_scalar_mode_supported_p (scalar_mode mode)
24295 {
24296 /* -m32 does not support TImode. This is the default, from
24297 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
24298 same ABI as for -m32. But default_scalar_mode_supported_p allows
24299 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
24300 for -mpowerpc64. */
24301 if (TARGET_32BIT && mode == TImode)
24302 return false;
24303
24304 if (DECIMAL_FLOAT_MODE_P (mode))
24305 return default_decimal_float_supported_p ();
24306 else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode))
24307 return true;
24308 else
24309 return default_scalar_mode_supported_p (mode);
24310 }
24311
24312 /* Target hook for libgcc_floating_mode_supported_p. */
24313
24314 static bool
24315 rs6000_libgcc_floating_mode_supported_p (scalar_float_mode mode)
24316 {
24317 switch (mode)
24318 {
24319 case E_SFmode:
24320 case E_DFmode:
24321 case E_TFmode:
24322 return true;
24323
24324 /* We only return true for KFmode if IEEE 128-bit types are supported, and
24325 if long double does not use the IEEE 128-bit format. If long double
24326 uses the IEEE 128-bit format, it will use TFmode and not KFmode.
24327 Because the code will not use KFmode in that case, there will be aborts
24328 because it can't find KFmode in the Floatn types. */
24329 case E_KFmode:
24330 return TARGET_FLOAT128_TYPE && !TARGET_IEEEQUAD;
24331
24332 default:
24333 return false;
24334 }
24335 }
24336
24337 /* Target hook for vector_mode_supported_p. */
24338 static bool
24339 rs6000_vector_mode_supported_p (machine_mode mode)
24340 {
24341 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
24342 128-bit, the compiler might try to widen IEEE 128-bit to IBM
24343 double-double. */
24344 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode))
24345 return true;
24346
24347 else
24348 return false;
24349 }
24350
24351 /* Target hook for floatn_mode. */
24352 static opt_scalar_float_mode
24353 rs6000_floatn_mode (int n, bool extended)
24354 {
24355 if (extended)
24356 {
24357 switch (n)
24358 {
24359 case 32:
24360 return DFmode;
24361
24362 case 64:
24363 if (TARGET_FLOAT128_TYPE)
24364 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
24365 else
24366 return opt_scalar_float_mode ();
24367
24368 case 128:
24369 return opt_scalar_float_mode ();
24370
24371 default:
24372 /* Those are the only valid _FloatNx types. */
24373 gcc_unreachable ();
24374 }
24375 }
24376 else
24377 {
24378 switch (n)
24379 {
24380 case 32:
24381 return SFmode;
24382
24383 case 64:
24384 return DFmode;
24385
24386 case 128:
24387 if (TARGET_FLOAT128_TYPE)
24388 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
24389 else
24390 return opt_scalar_float_mode ();
24391
24392 default:
24393 return opt_scalar_float_mode ();
24394 }
24395 }
24396
24397 }
24398
24399 /* Target hook for c_mode_for_suffix. */
24400 static machine_mode
24401 rs6000_c_mode_for_suffix (char suffix)
24402 {
24403 if (TARGET_FLOAT128_TYPE)
24404 {
24405 if (suffix == 'q' || suffix == 'Q')
24406 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
24407
24408 /* At the moment, we are not defining a suffix for IBM extended double.
24409 If/when the default for -mabi=ieeelongdouble is changed, and we want
24410 to support __ibm128 constants in legacy library code, we may need to
24411 re-evalaute this decision. Currently, c-lex.cc only supports 'w' and
24412 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
24413 __float80 constants. */
24414 }
24415
24416 return VOIDmode;
24417 }
24418
24419 /* Implement TARGET_C_MODE_FOR_FLOATING_TYPE. Return TFmode for
24420 TI_LONG_DOUBLE_TYPE which is for long double type, go with the default
24421 one for the others. */
24422
24423 static machine_mode
24424 rs6000_c_mode_for_floating_type (enum tree_index ti)
24425 {
24426 if (ti == TI_LONG_DOUBLE_TYPE)
24427 return rs6000_long_double_type_size == 128 ? TFmode : DFmode;
24428 return default_mode_for_floating_type (ti);
24429 }
24430
24431 /* Target hook for invalid_arg_for_unprototyped_fn. */
24432 static const char *
24433 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
24434 {
24435 return (!rs6000_darwin64_abi
24436 && typelist == 0
24437 && VECTOR_TYPE_P (TREE_TYPE (val))
24438 && (funcdecl == NULL_TREE
24439 || (TREE_CODE (funcdecl) == FUNCTION_DECL
24440 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD
24441 && !fndecl_built_in_p (funcdecl, BUILT_IN_CLASSIFY_TYPE))))
24442 ? N_("AltiVec argument passed to unprototyped function")
24443 : NULL;
24444 }
24445
24446 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
24447 setup by using __stack_chk_fail_local hidden function instead of
24448 calling __stack_chk_fail directly. Otherwise it is better to call
24449 __stack_chk_fail directly. */
24450
24451 static tree ATTRIBUTE_UNUSED
24452 rs6000_stack_protect_fail (void)
24453 {
24454 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
24455 ? default_hidden_stack_protect_fail ()
24456 : default_external_stack_protect_fail ();
24457 }
24458
24459 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
24460
24461 #if TARGET_ELF
24462 static unsigned HOST_WIDE_INT
24463 rs6000_asan_shadow_offset (void)
24464 {
24465 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
24466 }
24467 #endif
24468 \f
24469 /* Mask options that we want to support inside of attribute((target)) and
24470 #pragma GCC target operations. Note, we do not include things like
24471 64/32-bit, endianness, hard/soft floating point, etc. that would have
24472 different calling sequences. */
24473
24474 struct rs6000_opt_mask {
24475 const char *name; /* option name */
24476 HOST_WIDE_INT mask; /* mask to set */
24477 bool invert; /* invert sense of mask */
24478 bool valid_target; /* option is a target option */
24479 };
24480
24481 static struct rs6000_opt_mask const rs6000_opt_masks[] =
24482 {
24483 { "altivec", OPTION_MASK_ALTIVEC, false, true },
24484 { "block-ops-unaligned-vsx", OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX,
24485 false, true },
24486 { "block-ops-vector-pair", OPTION_MASK_BLOCK_OPS_VECTOR_PAIR,
24487 false, true },
24488 { "cmpb", OPTION_MASK_CMPB, false, true },
24489 { "crypto", OPTION_MASK_CRYPTO, false, true },
24490 { "direct-move", 0, false, true },
24491 { "dlmzb", OPTION_MASK_DLMZB, false, true },
24492 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
24493 false, true },
24494 { "float128", OPTION_MASK_FLOAT128_KEYWORD, false, true },
24495 { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, true },
24496 { "fprnd", OPTION_MASK_FPRND, false, true },
24497 { "power10", OPTION_MASK_POWER10, false, true },
24498 { "power11", OPTION_MASK_POWER11, false, false },
24499 { "hard-dfp", OPTION_MASK_DFP, false, true },
24500 { "htm", OPTION_MASK_HTM, false, true },
24501 { "isel", OPTION_MASK_ISEL, false, true },
24502 { "mfcrf", OPTION_MASK_MFCRF, false, true },
24503 { "mfpgpr", 0, false, true },
24504 { "mma", OPTION_MASK_MMA, false, true },
24505 { "modulo", OPTION_MASK_MODULO, false, true },
24506 { "mulhw", OPTION_MASK_MULHW, false, true },
24507 { "multiple", OPTION_MASK_MULTIPLE, false, true },
24508 { "pcrel", OPTION_MASK_PCREL, false, true },
24509 { "pcrel-opt", OPTION_MASK_PCREL_OPT, false, true },
24510 { "popcntb", OPTION_MASK_POPCNTB, false, true },
24511 { "popcntd", OPTION_MASK_POPCNTD, false, true },
24512 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
24513 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
24514 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
24515 { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true },
24516 { "power9-misc", OPTION_MASK_P9_MISC, false, true },
24517 { "power9-vector", OPTION_MASK_P9_VECTOR, false, true },
24518 { "power10-fusion", OPTION_MASK_P10_FUSION, false, true },
24519 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
24520 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
24521 { "prefixed", OPTION_MASK_PREFIXED, false, true },
24522 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
24523 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
24524 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
24525 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
24526 { "string", 0, false, true },
24527 { "update", OPTION_MASK_NO_UPDATE, true , true },
24528 { "vsx", OPTION_MASK_VSX, false, true },
24529 #ifdef OPTION_MASK_64BIT
24530 #if TARGET_AIX_OS
24531 { "aix64", OPTION_MASK_64BIT, false, false },
24532 { "aix32", OPTION_MASK_64BIT, true, false },
24533 #else
24534 { "64", OPTION_MASK_64BIT, false, false },
24535 { "32", OPTION_MASK_64BIT, true, false },
24536 #endif
24537 #endif
24538 #ifdef OPTION_MASK_EABI
24539 { "eabi", OPTION_MASK_EABI, false, false },
24540 #endif
24541 #ifdef OPTION_MASK_LITTLE_ENDIAN
24542 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
24543 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
24544 #endif
24545 #ifdef OPTION_MASK_RELOCATABLE
24546 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
24547 #endif
24548 #ifdef OPTION_MASK_STRICT_ALIGN
24549 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
24550 #endif
24551 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
24552 { "string", 0, false, false },
24553 };
24554
24555 /* Option variables that we want to support inside attribute((target)) and
24556 #pragma GCC target operations. */
24557
24558 struct rs6000_opt_var {
24559 const char *name; /* option name */
24560 size_t global_offset; /* offset of the option in global_options. */
24561 size_t target_offset; /* offset of the option in target options. */
24562 };
24563
24564 static struct rs6000_opt_var const rs6000_opt_vars[] =
24565 {
24566 { "friz",
24567 offsetof (struct gcc_options, x_TARGET_FRIZ),
24568 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
24569 { "avoid-indexed-addresses",
24570 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
24571 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
24572 { "longcall",
24573 offsetof (struct gcc_options, x_rs6000_default_long_calls),
24574 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
24575 { "optimize-swaps",
24576 offsetof (struct gcc_options, x_rs6000_optimize_swaps),
24577 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
24578 { "allow-movmisalign",
24579 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
24580 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
24581 { "sched-groups",
24582 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
24583 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
24584 { "always-hint",
24585 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
24586 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
24587 { "align-branch-targets",
24588 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
24589 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
24590 { "sched-prolog",
24591 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
24592 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
24593 { "sched-epilog",
24594 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
24595 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
24596 { "speculate-indirect-jumps",
24597 offsetof (struct gcc_options, x_rs6000_speculate_indirect_jumps),
24598 offsetof (struct cl_target_option, x_rs6000_speculate_indirect_jumps), },
24599 };
24600
24601 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
24602 parsing. Return true if there were no errors. */
24603
24604 static bool
24605 rs6000_inner_target_options (tree args, bool attr_p)
24606 {
24607 bool ret = true;
24608
24609 if (args == NULL_TREE)
24610 ;
24611
24612 else if (TREE_CODE (args) == STRING_CST)
24613 {
24614 char *p = ASTRDUP (TREE_STRING_POINTER (args));
24615 char *q;
24616
24617 while ((q = strtok (p, ",")) != NULL)
24618 {
24619 bool error_p = false;
24620 bool not_valid_p = false;
24621 const char *cpu_opt = NULL;
24622
24623 p = NULL;
24624 if (startswith (q, "cpu="))
24625 {
24626 int cpu_index = rs6000_cpu_name_lookup (q+4);
24627 if (cpu_index >= 0)
24628 rs6000_cpu_index = cpu_index;
24629 else
24630 {
24631 error_p = true;
24632 cpu_opt = q+4;
24633 }
24634 }
24635 else if (startswith (q, "tune="))
24636 {
24637 int tune_index = rs6000_cpu_name_lookup (q+5);
24638 if (tune_index >= 0)
24639 rs6000_tune_index = tune_index;
24640 else
24641 {
24642 error_p = true;
24643 cpu_opt = q+5;
24644 }
24645 }
24646 else
24647 {
24648 size_t i;
24649 bool invert = false;
24650 char *r = q;
24651
24652 error_p = true;
24653 if (startswith (r, "no-"))
24654 {
24655 invert = true;
24656 r += 3;
24657 }
24658
24659 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
24660 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
24661 {
24662 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
24663
24664 if (!rs6000_opt_masks[i].valid_target)
24665 not_valid_p = true;
24666 else
24667 {
24668 error_p = false;
24669 rs6000_isa_flags_explicit |= mask;
24670
24671 /* VSX needs altivec, so -mvsx automagically sets
24672 altivec and disables -mavoid-indexed-addresses. */
24673 if (!invert)
24674 {
24675 if (mask == OPTION_MASK_VSX)
24676 {
24677 if (!(rs6000_isa_flags_explicit
24678 & OPTION_MASK_ALTIVEC))
24679 mask |= OPTION_MASK_ALTIVEC;
24680 if (!OPTION_SET_P (TARGET_AVOID_XFORM))
24681 TARGET_AVOID_XFORM = 0;
24682 }
24683 }
24684
24685 if (rs6000_opt_masks[i].invert)
24686 invert = !invert;
24687
24688 if (invert)
24689 rs6000_isa_flags &= ~mask;
24690 else
24691 rs6000_isa_flags |= mask;
24692 }
24693 break;
24694 }
24695
24696 if (error_p && !not_valid_p)
24697 {
24698 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
24699 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
24700 {
24701 size_t j = rs6000_opt_vars[i].global_offset;
24702 *((int *) ((char *) &global_options + j)) = !invert;
24703 *((int *) ((char *) &global_options_set + j)) = 1;
24704 error_p = false;
24705 not_valid_p = false;
24706 break;
24707 }
24708 }
24709 }
24710
24711 if (error_p)
24712 {
24713 const char *eprefix, *esuffix;
24714
24715 ret = false;
24716 if (attr_p)
24717 {
24718 eprefix = "__attribute__((__target__(";
24719 esuffix = ")))";
24720 }
24721 else
24722 {
24723 eprefix = "#pragma GCC target ";
24724 esuffix = "";
24725 }
24726
24727 if (cpu_opt)
24728 error ("invalid cpu %qs for %s%qs%s", cpu_opt, eprefix,
24729 q, esuffix);
24730 else if (not_valid_p)
24731 error ("%s%qs%s is not allowed", eprefix, q, esuffix);
24732 else
24733 error ("%s%qs%s is invalid", eprefix, q, esuffix);
24734 }
24735 }
24736 }
24737
24738 else if (TREE_CODE (args) == TREE_LIST)
24739 {
24740 do
24741 {
24742 tree value = TREE_VALUE (args);
24743 if (value)
24744 {
24745 bool ret2 = rs6000_inner_target_options (value, attr_p);
24746 if (!ret2)
24747 ret = false;
24748 }
24749 args = TREE_CHAIN (args);
24750 }
24751 while (args != NULL_TREE);
24752 }
24753
24754 else
24755 {
24756 error ("attribute %<target%> argument not a string");
24757 return false;
24758 }
24759
24760 return ret;
24761 }
24762
24763 /* Print out the target options as a list for -mdebug=target. */
24764
24765 static void
24766 rs6000_debug_target_options (tree args, const char *prefix)
24767 {
24768 if (args == NULL_TREE)
24769 fprintf (stderr, "%s<NULL>", prefix);
24770
24771 else if (TREE_CODE (args) == STRING_CST)
24772 {
24773 char *p = ASTRDUP (TREE_STRING_POINTER (args));
24774 char *q;
24775
24776 while ((q = strtok (p, ",")) != NULL)
24777 {
24778 p = NULL;
24779 fprintf (stderr, "%s\"%s\"", prefix, q);
24780 prefix = ", ";
24781 }
24782 }
24783
24784 else if (TREE_CODE (args) == TREE_LIST)
24785 {
24786 do
24787 {
24788 tree value = TREE_VALUE (args);
24789 if (value)
24790 {
24791 rs6000_debug_target_options (value, prefix);
24792 prefix = ", ";
24793 }
24794 args = TREE_CHAIN (args);
24795 }
24796 while (args != NULL_TREE);
24797 }
24798
24799 else
24800 gcc_unreachable ();
24801
24802 return;
24803 }
24804
24805 \f
24806 /* Hook to validate attribute((target("..."))). */
24807
24808 static bool
24809 rs6000_valid_attribute_p (tree fndecl,
24810 tree ARG_UNUSED (name),
24811 tree args,
24812 int flags)
24813 {
24814 struct cl_target_option cur_target;
24815 bool ret;
24816 tree old_optimize;
24817 tree new_target, new_optimize;
24818 tree func_optimize;
24819
24820 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
24821
24822 if (TARGET_DEBUG_TARGET)
24823 {
24824 tree tname = DECL_NAME (fndecl);
24825 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
24826 if (tname)
24827 fprintf (stderr, "function: %.*s\n",
24828 (int) IDENTIFIER_LENGTH (tname),
24829 IDENTIFIER_POINTER (tname));
24830 else
24831 fprintf (stderr, "function: unknown\n");
24832
24833 fprintf (stderr, "args:");
24834 rs6000_debug_target_options (args, " ");
24835 fprintf (stderr, "\n");
24836
24837 if (flags)
24838 fprintf (stderr, "flags: 0x%x\n", flags);
24839
24840 fprintf (stderr, "--------------------\n");
24841 }
24842
24843 /* attribute((target("default"))) does nothing, beyond
24844 affecting multi-versioning. */
24845 if (TREE_VALUE (args)
24846 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
24847 && TREE_CHAIN (args) == NULL_TREE
24848 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
24849 return true;
24850
24851 old_optimize = build_optimization_node (&global_options,
24852 &global_options_set);
24853 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
24854
24855 /* If the function changed the optimization levels as well as setting target
24856 options, start with the optimizations specified. */
24857 if (func_optimize && func_optimize != old_optimize)
24858 cl_optimization_restore (&global_options, &global_options_set,
24859 TREE_OPTIMIZATION (func_optimize));
24860
24861 /* The target attributes may also change some optimization flags, so update
24862 the optimization options if necessary. */
24863 cl_target_option_save (&cur_target, &global_options, &global_options_set);
24864 rs6000_cpu_index = rs6000_tune_index = -1;
24865 ret = rs6000_inner_target_options (args, true);
24866
24867 /* Set up any additional state. */
24868 if (ret)
24869 {
24870 ret = rs6000_option_override_internal (false);
24871 new_target = build_target_option_node (&global_options,
24872 &global_options_set);
24873 }
24874 else
24875 new_target = NULL;
24876
24877 new_optimize = build_optimization_node (&global_options,
24878 &global_options_set);
24879
24880 if (!new_target)
24881 ret = false;
24882
24883 else if (fndecl)
24884 {
24885 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
24886
24887 if (old_optimize != new_optimize)
24888 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
24889 }
24890
24891 cl_target_option_restore (&global_options, &global_options_set, &cur_target);
24892
24893 if (old_optimize != new_optimize)
24894 cl_optimization_restore (&global_options, &global_options_set,
24895 TREE_OPTIMIZATION (old_optimize));
24896
24897 return ret;
24898 }
24899
24900 \f
24901 /* Hook to validate the current #pragma GCC target and set the state, and
24902 update the macros based on what was changed. If ARGS is NULL, then
24903 POP_TARGET is used to reset the options. */
24904
24905 bool
24906 rs6000_pragma_target_parse (tree args, tree pop_target)
24907 {
24908 tree prev_tree = build_target_option_node (&global_options,
24909 &global_options_set);
24910 tree cur_tree;
24911 struct cl_target_option *prev_opt, *cur_opt;
24912 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
24913
24914 if (TARGET_DEBUG_TARGET)
24915 {
24916 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
24917 fprintf (stderr, "args:");
24918 rs6000_debug_target_options (args, " ");
24919 fprintf (stderr, "\n");
24920
24921 if (pop_target)
24922 {
24923 fprintf (stderr, "pop_target:\n");
24924 debug_tree (pop_target);
24925 }
24926 else
24927 fprintf (stderr, "pop_target: <NULL>\n");
24928
24929 fprintf (stderr, "--------------------\n");
24930 }
24931
24932 if (! args)
24933 {
24934 cur_tree = ((pop_target)
24935 ? pop_target
24936 : target_option_default_node);
24937 cl_target_option_restore (&global_options, &global_options_set,
24938 TREE_TARGET_OPTION (cur_tree));
24939 }
24940 else
24941 {
24942 rs6000_cpu_index = rs6000_tune_index = -1;
24943 if (!rs6000_inner_target_options (args, false)
24944 || !rs6000_option_override_internal (false)
24945 || (cur_tree = build_target_option_node (&global_options,
24946 &global_options_set))
24947 == NULL_TREE)
24948 {
24949 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
24950 fprintf (stderr, "invalid pragma\n");
24951
24952 return false;
24953 }
24954 }
24955
24956 target_option_current_node = cur_tree;
24957 rs6000_activate_target_options (target_option_current_node);
24958
24959 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
24960 change the macros that are defined. */
24961 if (rs6000_target_modify_macros_ptr)
24962 {
24963 prev_opt = TREE_TARGET_OPTION (prev_tree);
24964 prev_flags = prev_opt->x_rs6000_isa_flags;
24965
24966 cur_opt = TREE_TARGET_OPTION (cur_tree);
24967 cur_flags = cur_opt->x_rs6000_isa_flags;
24968
24969 diff_flags = (prev_flags ^ cur_flags);
24970
24971 if (diff_flags != 0)
24972 {
24973 /* Delete old macros. */
24974 rs6000_target_modify_macros_ptr (false,
24975 prev_flags & diff_flags);
24976
24977 /* Define new macros. */
24978 rs6000_target_modify_macros_ptr (true,
24979 cur_flags & diff_flags);
24980 }
24981 }
24982
24983 return true;
24984 }
24985
24986 \f
24987 /* Remember the last target of rs6000_set_current_function. */
24988 static GTY(()) tree rs6000_previous_fndecl;
24989
24990 /* Restore target's globals from NEW_TREE and invalidate the
24991 rs6000_previous_fndecl cache. */
24992
24993 void
24994 rs6000_activate_target_options (tree new_tree)
24995 {
24996 cl_target_option_restore (&global_options, &global_options_set,
24997 TREE_TARGET_OPTION (new_tree));
24998 if (TREE_TARGET_GLOBALS (new_tree))
24999 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
25000 else if (new_tree == target_option_default_node)
25001 restore_target_globals (&default_target_globals);
25002 else
25003 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
25004 rs6000_previous_fndecl = NULL_TREE;
25005 }
25006
25007 /* Establish appropriate back-end context for processing the function
25008 FNDECL. The argument might be NULL to indicate processing at top
25009 level, outside of any function scope. */
25010 static void
25011 rs6000_set_current_function (tree fndecl)
25012 {
25013 if (TARGET_DEBUG_TARGET)
25014 {
25015 fprintf (stderr, "\n==================== rs6000_set_current_function");
25016
25017 if (fndecl)
25018 fprintf (stderr, ", fndecl %s (%p)",
25019 (DECL_NAME (fndecl)
25020 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
25021 : "<unknown>"), (void *)fndecl);
25022
25023 if (rs6000_previous_fndecl)
25024 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
25025
25026 fprintf (stderr, "\n");
25027 }
25028
25029 /* Only change the context if the function changes. This hook is called
25030 several times in the course of compiling a function, and we don't want to
25031 slow things down too much or call target_reinit when it isn't safe. */
25032 if (fndecl == rs6000_previous_fndecl)
25033 return;
25034
25035 tree old_tree;
25036 if (rs6000_previous_fndecl == NULL_TREE)
25037 old_tree = target_option_current_node;
25038 else if (DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl))
25039 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl);
25040 else
25041 old_tree = target_option_default_node;
25042
25043 tree new_tree;
25044 if (fndecl == NULL_TREE)
25045 {
25046 if (old_tree != target_option_current_node)
25047 new_tree = target_option_current_node;
25048 else
25049 new_tree = NULL_TREE;
25050 }
25051 else
25052 {
25053 new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
25054 if (new_tree == NULL_TREE)
25055 new_tree = target_option_default_node;
25056 }
25057
25058 if (TARGET_DEBUG_TARGET)
25059 {
25060 if (new_tree)
25061 {
25062 fprintf (stderr, "\nnew fndecl target specific options:\n");
25063 debug_tree (new_tree);
25064 }
25065
25066 if (old_tree)
25067 {
25068 fprintf (stderr, "\nold fndecl target specific options:\n");
25069 debug_tree (old_tree);
25070 }
25071
25072 if (old_tree != NULL_TREE || new_tree != NULL_TREE)
25073 fprintf (stderr, "--------------------\n");
25074 }
25075
25076 if (new_tree && old_tree != new_tree)
25077 rs6000_activate_target_options (new_tree);
25078
25079 if (fndecl)
25080 rs6000_previous_fndecl = fndecl;
25081 }
25082
25083 \f
25084 /* Save the current options */
25085
25086 static void
25087 rs6000_function_specific_save (struct cl_target_option *ptr,
25088 struct gcc_options *opts,
25089 struct gcc_options */* opts_set */)
25090 {
25091 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
25092 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
25093 }
25094
25095 /* Restore the current options */
25096
25097 static void
25098 rs6000_function_specific_restore (struct gcc_options *opts,
25099 struct gcc_options */* opts_set */,
25100 struct cl_target_option *ptr)
25101
25102 {
25103 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
25104 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
25105 (void) rs6000_option_override_internal (false);
25106 }
25107
25108 /* Print the current options */
25109
25110 static void
25111 rs6000_function_specific_print (FILE *file, int indent,
25112 struct cl_target_option *ptr)
25113 {
25114 rs6000_print_isa_options (file, indent, "Isa options set",
25115 ptr->x_rs6000_isa_flags);
25116
25117 rs6000_print_isa_options (file, indent, "Isa options explicit",
25118 ptr->x_rs6000_isa_flags_explicit);
25119 }
25120
25121 /* Helper function to print the current isa or misc options on a line. */
25122
25123 static void
25124 rs6000_print_options_internal (FILE *file,
25125 int indent,
25126 const char *string,
25127 HOST_WIDE_INT flags,
25128 const char *prefix,
25129 const struct rs6000_opt_mask *opts,
25130 size_t num_elements)
25131 {
25132 size_t i;
25133 size_t start_column = 0;
25134 size_t cur_column;
25135 size_t max_column = 120;
25136 size_t prefix_len = strlen (prefix);
25137 size_t comma_len = 0;
25138 const char *comma = "";
25139
25140 if (indent)
25141 start_column += fprintf (file, "%*s", indent, "");
25142
25143 if (!flags)
25144 {
25145 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
25146 return;
25147 }
25148
25149 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
25150
25151 /* Print the various mask options. */
25152 cur_column = start_column;
25153 for (i = 0; i < num_elements; i++)
25154 {
25155 bool invert = opts[i].invert;
25156 const char *name = opts[i].name;
25157 const char *no_str = "";
25158 HOST_WIDE_INT mask = opts[i].mask;
25159 size_t len = comma_len + prefix_len + strlen (name);
25160
25161 if (!invert)
25162 {
25163 if ((flags & mask) == 0)
25164 {
25165 no_str = "no-";
25166 len += strlen ("no-");
25167 }
25168
25169 flags &= ~mask;
25170 }
25171
25172 else
25173 {
25174 if ((flags & mask) != 0)
25175 {
25176 no_str = "no-";
25177 len += strlen ("no-");
25178 }
25179
25180 flags |= mask;
25181 }
25182
25183 cur_column += len;
25184 if (cur_column > max_column)
25185 {
25186 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
25187 cur_column = start_column + len;
25188 comma = "";
25189 }
25190
25191 fprintf (file, "%s%s%s%s", comma, prefix, no_str, name);
25192 comma = ", ";
25193 comma_len = strlen (", ");
25194 }
25195
25196 fputs ("\n", file);
25197 }
25198
25199 /* Helper function to print the current isa options on a line. */
25200
25201 static void
25202 rs6000_print_isa_options (FILE *file, int indent, const char *string,
25203 HOST_WIDE_INT flags)
25204 {
25205 rs6000_print_options_internal (file, indent, string, flags, "-m",
25206 &rs6000_opt_masks[0],
25207 ARRAY_SIZE (rs6000_opt_masks));
25208 }
25209
25210 /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06,
25211 2.07, and 3.0 options that relate to the vector unit (-mdirect-move,
25212 -mupper-regs-df, etc.).
25213
25214 This function does not handle explicit options such as the user specifying
25215 -mdirect-move. These are handled in rs6000_option_override_internal, and
25216 the appropriate error is given if needed.
25217
25218 We return a mask of all of the implicit options that should not be enabled
25219 by default. */
25220
25221 static HOST_WIDE_INT
25222 rs6000_disable_incompatible_switches (void)
25223 {
25224 HOST_WIDE_INT ignore_masks = rs6000_isa_flags_explicit;
25225 size_t i, j;
25226
25227 static const struct {
25228 const HOST_WIDE_INT no_flag; /* flag explicitly turned off. */
25229 const HOST_WIDE_INT dep_flags; /* flags that depend on this option. */
25230 const char *const name; /* name of the switch. */
25231 } flags[] = {
25232 { OPTION_MASK_VSX, OTHER_VSX_VECTOR_MASKS, "vsx" },
25233 { OPTION_MASK_ALTIVEC, OTHER_ALTIVEC_MASKS, "altivec" },
25234 };
25235
25236 for (i = 0; i < ARRAY_SIZE (flags); i++)
25237 {
25238 HOST_WIDE_INT no_flag = flags[i].no_flag;
25239
25240 if ((rs6000_isa_flags & no_flag) == 0
25241 && (rs6000_isa_flags_explicit & no_flag) != 0)
25242 {
25243 HOST_WIDE_INT dep_flags = flags[i].dep_flags;
25244 HOST_WIDE_INT set_flags = (rs6000_isa_flags_explicit
25245 & rs6000_isa_flags
25246 & dep_flags);
25247
25248 if (set_flags)
25249 {
25250 for (j = 0; j < ARRAY_SIZE (rs6000_opt_masks); j++)
25251 if ((set_flags & rs6000_opt_masks[j].mask) != 0)
25252 {
25253 set_flags &= ~rs6000_opt_masks[j].mask;
25254 error ("%<-mno-%s%> turns off %<-m%s%>",
25255 flags[i].name,
25256 rs6000_opt_masks[j].name);
25257 }
25258
25259 gcc_assert (!set_flags);
25260 }
25261
25262 rs6000_isa_flags &= ~dep_flags;
25263 ignore_masks |= no_flag | dep_flags;
25264 }
25265 }
25266
25267 return ignore_masks;
25268 }
25269
25270 \f
25271 /* Helper function for printing the function name when debugging. */
25272
25273 static const char *
25274 get_decl_name (tree fn)
25275 {
25276 tree name;
25277
25278 if (!fn)
25279 return "<null>";
25280
25281 name = DECL_NAME (fn);
25282 if (!name)
25283 return "<no-name>";
25284
25285 return IDENTIFIER_POINTER (name);
25286 }
25287
25288 /* Return the clone id of the target we are compiling code for in a target
25289 clone. The clone id is ordered from 0 (default) to CLONE_MAX-1 and gives
25290 the priority list for the target clones (ordered from lowest to
25291 highest). */
25292
25293 static int
25294 rs6000_clone_priority (tree fndecl)
25295 {
25296 tree fn_opts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
25297 HOST_WIDE_INT isa_masks;
25298 int ret = CLONE_DEFAULT;
25299 tree attrs = lookup_attribute ("target", DECL_ATTRIBUTES (fndecl));
25300 const char *attrs_str = NULL;
25301
25302 attrs = TREE_VALUE (TREE_VALUE (attrs));
25303 attrs_str = TREE_STRING_POINTER (attrs);
25304
25305 /* Return priority zero for default function. Return the ISA needed for the
25306 function if it is not the default. */
25307 if (strcmp (attrs_str, "default") != 0)
25308 {
25309 if (fn_opts == NULL_TREE)
25310 fn_opts = target_option_default_node;
25311
25312 if (!fn_opts || !TREE_TARGET_OPTION (fn_opts))
25313 isa_masks = rs6000_isa_flags;
25314 else
25315 isa_masks = TREE_TARGET_OPTION (fn_opts)->x_rs6000_isa_flags;
25316
25317 for (ret = CLONE_MAX - 1; ret != 0; ret--)
25318 if ((rs6000_clone_map[ret].isa_mask & isa_masks) != 0)
25319 break;
25320 }
25321
25322 if (TARGET_DEBUG_TARGET)
25323 fprintf (stderr, "rs6000_get_function_version_priority (%s) => %d\n",
25324 get_decl_name (fndecl), ret);
25325
25326 return ret;
25327 }
25328
25329 /* This compares the priority of target features in function DECL1 and DECL2.
25330 It returns positive value if DECL1 is higher priority, negative value if
25331 DECL2 is higher priority and 0 if they are the same. Note, priorities are
25332 ordered from lowest (CLONE_DEFAULT) to highest (currently CLONE_ISA_3_0). */
25333
25334 static int
25335 rs6000_compare_version_priority (tree decl1, tree decl2)
25336 {
25337 int priority1 = rs6000_clone_priority (decl1);
25338 int priority2 = rs6000_clone_priority (decl2);
25339 int ret = priority1 - priority2;
25340
25341 if (TARGET_DEBUG_TARGET)
25342 fprintf (stderr, "rs6000_compare_version_priority (%s, %s) => %d\n",
25343 get_decl_name (decl1), get_decl_name (decl2), ret);
25344
25345 return ret;
25346 }
25347
25348 /* Make a dispatcher declaration for the multi-versioned function DECL.
25349 Calls to DECL function will be replaced with calls to the dispatcher
25350 by the front-end. Returns the decl of the dispatcher function. */
25351
25352 static tree
25353 rs6000_get_function_versions_dispatcher (void *decl)
25354 {
25355 tree fn = (tree) decl;
25356 struct cgraph_node *node = NULL;
25357 struct cgraph_node *default_node = NULL;
25358 struct cgraph_function_version_info *node_v = NULL;
25359 struct cgraph_function_version_info *first_v = NULL;
25360
25361 tree dispatch_decl = NULL;
25362
25363 struct cgraph_function_version_info *default_version_info = NULL;
25364 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
25365
25366 if (TARGET_DEBUG_TARGET)
25367 fprintf (stderr, "rs6000_get_function_versions_dispatcher (%s)\n",
25368 get_decl_name (fn));
25369
25370 node = cgraph_node::get (fn);
25371 gcc_assert (node != NULL);
25372
25373 node_v = node->function_version ();
25374 gcc_assert (node_v != NULL);
25375
25376 if (node_v->dispatcher_resolver != NULL)
25377 return node_v->dispatcher_resolver;
25378
25379 /* Find the default version and make it the first node. */
25380 first_v = node_v;
25381 /* Go to the beginning of the chain. */
25382 while (first_v->prev != NULL)
25383 first_v = first_v->prev;
25384
25385 default_version_info = first_v;
25386 while (default_version_info != NULL)
25387 {
25388 const tree decl2 = default_version_info->this_node->decl;
25389 if (is_function_default_version (decl2))
25390 break;
25391 default_version_info = default_version_info->next;
25392 }
25393
25394 /* If there is no default node, just return NULL. */
25395 if (default_version_info == NULL)
25396 return NULL;
25397
25398 /* Make default info the first node. */
25399 if (first_v != default_version_info)
25400 {
25401 default_version_info->prev->next = default_version_info->next;
25402 if (default_version_info->next)
25403 default_version_info->next->prev = default_version_info->prev;
25404 first_v->prev = default_version_info;
25405 default_version_info->next = first_v;
25406 default_version_info->prev = NULL;
25407 }
25408
25409 default_node = default_version_info->this_node;
25410
25411 #ifndef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
25412 error_at (DECL_SOURCE_LOCATION (default_node->decl),
25413 "%<target_clones%> attribute needs GLIBC (2.23 and newer) that "
25414 "exports hardware capability bits");
25415 #else
25416
25417 if (targetm.has_ifunc_p ())
25418 {
25419 struct cgraph_function_version_info *it_v = NULL;
25420 struct cgraph_node *dispatcher_node = NULL;
25421 struct cgraph_function_version_info *dispatcher_version_info = NULL;
25422
25423 /* Right now, the dispatching is done via ifunc. */
25424 dispatch_decl = make_dispatcher_decl (default_node->decl);
25425 TREE_NOTHROW (dispatch_decl) = TREE_NOTHROW (fn);
25426
25427 dispatcher_node = cgraph_node::get_create (dispatch_decl);
25428 gcc_assert (dispatcher_node != NULL);
25429 dispatcher_node->dispatcher_function = 1;
25430 dispatcher_version_info
25431 = dispatcher_node->insert_new_function_version ();
25432 dispatcher_version_info->next = default_version_info;
25433 dispatcher_node->definition = 1;
25434
25435 /* Set the dispatcher for all the versions. */
25436 it_v = default_version_info;
25437 while (it_v != NULL)
25438 {
25439 it_v->dispatcher_resolver = dispatch_decl;
25440 it_v = it_v->next;
25441 }
25442 }
25443 else
25444 {
25445 error_at (DECL_SOURCE_LOCATION (default_node->decl),
25446 "multiversioning needs %<ifunc%> which is not supported "
25447 "on this target");
25448 }
25449 #endif
25450
25451 return dispatch_decl;
25452 }
25453
25454 /* Make the resolver function decl to dispatch the versions of a multi-
25455 versioned function, DEFAULT_DECL. Create an empty basic block in the
25456 resolver and store the pointer in EMPTY_BB. Return the decl of the resolver
25457 function. */
25458
25459 static tree
25460 make_resolver_func (const tree default_decl,
25461 const tree dispatch_decl,
25462 basic_block *empty_bb)
25463 {
25464 /* Make the resolver function static. The resolver function returns
25465 void *. */
25466 tree decl_name = clone_function_name (default_decl, "resolver");
25467 const char *resolver_name = IDENTIFIER_POINTER (decl_name);
25468 tree type = build_function_type_list (ptr_type_node, NULL_TREE);
25469 tree decl = build_fn_decl (resolver_name, type);
25470 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
25471
25472 DECL_NAME (decl) = decl_name;
25473 TREE_USED (decl) = 1;
25474 DECL_ARTIFICIAL (decl) = 1;
25475 DECL_IGNORED_P (decl) = 0;
25476 TREE_PUBLIC (decl) = 0;
25477 DECL_UNINLINABLE (decl) = 1;
25478
25479 /* Resolver is not external, body is generated. */
25480 DECL_EXTERNAL (decl) = 0;
25481 DECL_EXTERNAL (dispatch_decl) = 0;
25482
25483 DECL_CONTEXT (decl) = NULL_TREE;
25484 DECL_INITIAL (decl) = make_node (BLOCK);
25485 DECL_STATIC_CONSTRUCTOR (decl) = 0;
25486
25487 if (DECL_COMDAT_GROUP (default_decl)
25488 || TREE_PUBLIC (default_decl))
25489 {
25490 /* In this case, each translation unit with a call to this
25491 versioned function will put out a resolver. Ensure it
25492 is comdat to keep just one copy. */
25493 DECL_COMDAT (decl) = 1;
25494 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
25495 }
25496 else
25497 TREE_PUBLIC (dispatch_decl) = 0;
25498
25499 /* Build result decl and add to function_decl. */
25500 tree t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
25501 DECL_CONTEXT (t) = decl;
25502 DECL_ARTIFICIAL (t) = 1;
25503 DECL_IGNORED_P (t) = 1;
25504 DECL_RESULT (decl) = t;
25505
25506 gimplify_function_tree (decl);
25507 push_cfun (DECL_STRUCT_FUNCTION (decl));
25508 *empty_bb = init_lowered_empty_function (decl, false,
25509 profile_count::uninitialized ());
25510
25511 cgraph_node::add_new_function (decl, true);
25512 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
25513
25514 pop_cfun ();
25515
25516 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
25517 DECL_ATTRIBUTES (dispatch_decl)
25518 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
25519
25520 cgraph_node::create_same_body_alias (dispatch_decl, decl);
25521
25522 return decl;
25523 }
25524
25525 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL to
25526 return a pointer to VERSION_DECL if we are running on a machine that
25527 supports the index CLONE_ISA hardware architecture bits. This function will
25528 be called during version dispatch to decide which function version to
25529 execute. It returns the basic block at the end, to which more conditions
25530 can be added. */
25531
25532 static basic_block
25533 add_condition_to_bb (tree function_decl, tree version_decl,
25534 int clone_isa, basic_block new_bb)
25535 {
25536 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
25537
25538 gcc_assert (new_bb != NULL);
25539 gimple_seq gseq = bb_seq (new_bb);
25540
25541
25542 tree convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
25543 build_fold_addr_expr (version_decl));
25544 tree result_var = create_tmp_var (ptr_type_node);
25545 gimple *convert_stmt = gimple_build_assign (result_var, convert_expr);
25546 gimple *return_stmt = gimple_build_return (result_var);
25547
25548 if (clone_isa == CLONE_DEFAULT)
25549 {
25550 gimple_seq_add_stmt (&gseq, convert_stmt);
25551 gimple_seq_add_stmt (&gseq, return_stmt);
25552 set_bb_seq (new_bb, gseq);
25553 gimple_set_bb (convert_stmt, new_bb);
25554 gimple_set_bb (return_stmt, new_bb);
25555 pop_cfun ();
25556 return new_bb;
25557 }
25558
25559 tree bool_zero = build_int_cst (bool_int_type_node, 0);
25560 tree cond_var = create_tmp_var (bool_int_type_node);
25561 tree predicate_decl = rs6000_builtin_decls[(int) RS6000_BIF_CPU_SUPPORTS];
25562 const char *arg_str = rs6000_clone_map[clone_isa].name;
25563 tree predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
25564 gimple *call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
25565 gimple_call_set_lhs (call_cond_stmt, cond_var);
25566
25567 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
25568 gimple_set_bb (call_cond_stmt, new_bb);
25569 gimple_seq_add_stmt (&gseq, call_cond_stmt);
25570
25571 gimple *if_else_stmt = gimple_build_cond (NE_EXPR, cond_var, bool_zero,
25572 NULL_TREE, NULL_TREE);
25573 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
25574 gimple_set_bb (if_else_stmt, new_bb);
25575 gimple_seq_add_stmt (&gseq, if_else_stmt);
25576
25577 gimple_seq_add_stmt (&gseq, convert_stmt);
25578 gimple_seq_add_stmt (&gseq, return_stmt);
25579 set_bb_seq (new_bb, gseq);
25580
25581 basic_block bb1 = new_bb;
25582 edge e12 = split_block (bb1, if_else_stmt);
25583 basic_block bb2 = e12->dest;
25584 e12->flags &= ~EDGE_FALLTHRU;
25585 e12->flags |= EDGE_TRUE_VALUE;
25586
25587 edge e23 = split_block (bb2, return_stmt);
25588 gimple_set_bb (convert_stmt, bb2);
25589 gimple_set_bb (return_stmt, bb2);
25590
25591 basic_block bb3 = e23->dest;
25592 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
25593
25594 remove_edge (e23);
25595 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
25596
25597 pop_cfun ();
25598 return bb3;
25599 }
25600
25601 /* This function generates the dispatch function for multi-versioned functions.
25602 DISPATCH_DECL is the function which will contain the dispatch logic.
25603 FNDECLS are the function choices for dispatch, and is a tree chain.
25604 EMPTY_BB is the basic block pointer in DISPATCH_DECL in which the dispatch
25605 code is generated. */
25606
25607 static int
25608 dispatch_function_versions (tree dispatch_decl,
25609 void *fndecls_p,
25610 basic_block *empty_bb)
25611 {
25612 int ix;
25613 tree ele;
25614 vec<tree> *fndecls;
25615 tree clones[CLONE_MAX];
25616
25617 if (TARGET_DEBUG_TARGET)
25618 fputs ("dispatch_function_versions, top\n", stderr);
25619
25620 gcc_assert (dispatch_decl != NULL
25621 && fndecls_p != NULL
25622 && empty_bb != NULL);
25623
25624 /* fndecls_p is actually a vector. */
25625 fndecls = static_cast<vec<tree> *> (fndecls_p);
25626
25627 /* At least one more version other than the default. */
25628 gcc_assert (fndecls->length () >= 2);
25629
25630 /* The first version in the vector is the default decl. */
25631 memset ((void *) clones, '\0', sizeof (clones));
25632 clones[CLONE_DEFAULT] = (*fndecls)[0];
25633
25634 /* On the PowerPC, we do not need to call __builtin_cpu_init, which is a NOP
25635 on the PowerPC (on the x86_64, it is not a NOP). The builtin function
25636 __builtin_cpu_support ensures that the TOC fields are setup by requiring a
25637 recent glibc. If we ever need to call __builtin_cpu_init, we would need
25638 to insert the code here to do the call. */
25639
25640 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
25641 {
25642 int priority = rs6000_clone_priority (ele);
25643 if (!clones[priority])
25644 clones[priority] = ele;
25645 }
25646
25647 for (ix = CLONE_MAX - 1; ix >= 0; ix--)
25648 if (clones[ix])
25649 {
25650 if (TARGET_DEBUG_TARGET)
25651 fprintf (stderr, "dispatch_function_versions, clone %d, %s\n",
25652 ix, get_decl_name (clones[ix]));
25653
25654 *empty_bb = add_condition_to_bb (dispatch_decl, clones[ix], ix,
25655 *empty_bb);
25656 }
25657
25658 return 0;
25659 }
25660
25661 /* Generate the dispatching code body to dispatch multi-versioned function
25662 DECL. The target hook is called to process the "target" attributes and
25663 provide the code to dispatch the right function at run-time. NODE points
25664 to the dispatcher decl whose body will be created. */
25665
25666 static tree
25667 rs6000_generate_version_dispatcher_body (void *node_p)
25668 {
25669 tree resolver;
25670 basic_block empty_bb;
25671 struct cgraph_node *node = (cgraph_node *) node_p;
25672 struct cgraph_function_version_info *ninfo = node->function_version ();
25673
25674 if (ninfo->dispatcher_resolver)
25675 return ninfo->dispatcher_resolver;
25676
25677 /* node is going to be an alias, so remove the finalized bit. */
25678 node->definition = false;
25679
25680 /* The first version in the chain corresponds to the default version. */
25681 ninfo->dispatcher_resolver = resolver
25682 = make_resolver_func (ninfo->next->this_node->decl, node->decl, &empty_bb);
25683
25684 if (TARGET_DEBUG_TARGET)
25685 fprintf (stderr, "rs6000_get_function_versions_dispatcher, %s\n",
25686 get_decl_name (resolver));
25687
25688 push_cfun (DECL_STRUCT_FUNCTION (resolver));
25689 auto_vec<tree, 2> fn_ver_vec;
25690
25691 for (struct cgraph_function_version_info *vinfo = ninfo->next;
25692 vinfo;
25693 vinfo = vinfo->next)
25694 {
25695 struct cgraph_node *version = vinfo->this_node;
25696 /* Check for virtual functions here again, as by this time it should
25697 have been determined if this function needs a vtable index or
25698 not. This happens for methods in derived classes that override
25699 virtual methods in base classes but are not explicitly marked as
25700 virtual. */
25701 if (DECL_VINDEX (version->decl))
25702 sorry ("Virtual function multiversioning not supported");
25703
25704 fn_ver_vec.safe_push (version->decl);
25705 }
25706
25707 dispatch_function_versions (resolver, &fn_ver_vec, &empty_bb);
25708 cgraph_edge::rebuild_edges ();
25709 pop_cfun ();
25710 return resolver;
25711 }
25712
25713 /* Hook to decide if we need to scan function gimple statements to
25714 collect target specific information for inlining, and update the
25715 corresponding RS6000_FN_TARGET_INFO_* bit in INFO if we are able
25716 to predict which ISA feature is used at this time. Return true
25717 if we need to scan, otherwise return false. */
25718
25719 static bool
25720 rs6000_need_ipa_fn_target_info (const_tree decl,
25721 unsigned int &info ATTRIBUTE_UNUSED)
25722 {
25723 tree target = DECL_FUNCTION_SPECIFIC_TARGET (decl);
25724 if (!target)
25725 target = target_option_default_node;
25726 struct cl_target_option *opts = TREE_TARGET_OPTION (target);
25727
25728 /* See PR102059, we only handle HTM for now, so will only do
25729 the consequent scannings when HTM feature enabled. */
25730 if (opts->x_rs6000_isa_flags & OPTION_MASK_HTM)
25731 return true;
25732
25733 return false;
25734 }
25735
25736 /* Hook to update target specific information INFO for inlining by
25737 checking the given STMT. Return false if we don't need to scan
25738 any more, otherwise return true. */
25739
25740 static bool
25741 rs6000_update_ipa_fn_target_info (unsigned int &info, const gimple *stmt)
25742 {
25743 #ifndef HAVE_AS_POWER10_HTM
25744 /* Assume inline asm can use any instruction features. */
25745 if (gimple_code (stmt) == GIMPLE_ASM)
25746 {
25747 const char *asm_str = gimple_asm_string (as_a<const gasm *> (stmt));
25748 /* Ignore empty inline asm string. */
25749 if (strlen (asm_str) > 0)
25750 /* Should set any bits we concerned, for now OPTION_MASK_HTM is
25751 the only bit we care about. */
25752 info |= RS6000_FN_TARGET_INFO_HTM;
25753 return false;
25754 }
25755 #endif
25756
25757 if (gimple_code (stmt) == GIMPLE_CALL)
25758 {
25759 tree fndecl = gimple_call_fndecl (stmt);
25760 if (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD))
25761 {
25762 enum rs6000_gen_builtins fcode
25763 = (enum rs6000_gen_builtins) DECL_MD_FUNCTION_CODE (fndecl);
25764 /* HTM bifs definitely exploit HTM insns. */
25765 if (bif_is_htm (rs6000_builtin_info[fcode]))
25766 {
25767 info |= RS6000_FN_TARGET_INFO_HTM;
25768 return false;
25769 }
25770 }
25771 }
25772
25773 return true;
25774 }
25775
25776 /* Hook to determine if one function can safely inline another. */
25777
25778 static bool
25779 rs6000_can_inline_p (tree caller, tree callee)
25780 {
25781 bool ret = false;
25782 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
25783 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
25784
25785 /* If the caller/callee has option attributes, then use them.
25786 Otherwise, use the command line options. */
25787 if (!callee_tree)
25788 callee_tree = target_option_default_node;
25789 if (!caller_tree)
25790 caller_tree = target_option_default_node;
25791
25792 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
25793 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
25794
25795 HOST_WIDE_INT callee_isa = callee_opts->x_rs6000_isa_flags;
25796 HOST_WIDE_INT caller_isa = caller_opts->x_rs6000_isa_flags;
25797 HOST_WIDE_INT explicit_isa = callee_opts->x_rs6000_isa_flags_explicit;
25798
25799 cgraph_node *callee_node = cgraph_node::get (callee);
25800 if (ipa_fn_summaries && ipa_fn_summaries->get (callee_node) != NULL)
25801 {
25802 unsigned int info = ipa_fn_summaries->get (callee_node)->target_info;
25803 if ((info & RS6000_FN_TARGET_INFO_HTM) == 0)
25804 {
25805 callee_isa &= ~OPTION_MASK_HTM;
25806 explicit_isa &= ~OPTION_MASK_HTM;
25807 }
25808 }
25809
25810 /* Ignore -mpower8-fusion and -mpower10-fusion options for inlining
25811 purposes. */
25812 callee_isa &= ~(OPTION_MASK_P8_FUSION | OPTION_MASK_P10_FUSION);
25813 explicit_isa &= ~(OPTION_MASK_P8_FUSION | OPTION_MASK_P10_FUSION);
25814
25815 /* The callee's options must be a subset of the caller's options, i.e.
25816 a vsx function may inline an altivec function, but a no-vsx function
25817 must not inline a vsx function. However, for those options that the
25818 callee has explicitly enabled or disabled, then we must enforce that
25819 the callee's and caller's options match exactly; see PR70010. */
25820 if (((caller_isa & callee_isa) == callee_isa)
25821 && (caller_isa & explicit_isa) == (callee_isa & explicit_isa))
25822 ret = true;
25823
25824 if (TARGET_DEBUG_TARGET)
25825 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
25826 get_decl_name (caller), get_decl_name (callee),
25827 (ret ? "can" : "cannot"));
25828
25829 return ret;
25830 }
25831 \f
25832 /* Allocate a stack temp and fixup the address so it meets the particular
25833 memory requirements (either offetable or REG+REG addressing). */
25834
25835 rtx
25836 rs6000_allocate_stack_temp (machine_mode mode,
25837 bool offsettable_p,
25838 bool reg_reg_p)
25839 {
25840 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
25841 rtx addr = XEXP (stack, 0);
25842 int strict_p = reload_completed;
25843
25844 if (!legitimate_indirect_address_p (addr, strict_p))
25845 {
25846 if (offsettable_p
25847 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
25848 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
25849
25850 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
25851 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
25852 }
25853
25854 return stack;
25855 }
25856
25857 /* Given a memory reference, if it is not a reg or reg+reg addressing,
25858 convert to such a form to deal with memory reference instructions
25859 like STFIWX and LDBRX that only take reg+reg addressing. */
25860
25861 rtx
25862 rs6000_force_indexed_or_indirect_mem (rtx x)
25863 {
25864 machine_mode mode = GET_MODE (x);
25865
25866 gcc_assert (MEM_P (x));
25867 if (can_create_pseudo_p () && !indexed_or_indirect_operand (x, mode))
25868 {
25869 rtx addr = XEXP (x, 0);
25870 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
25871 {
25872 rtx reg = XEXP (addr, 0);
25873 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
25874 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
25875 gcc_assert (REG_P (reg));
25876 emit_insn (gen_add3_insn (reg, reg, size_rtx));
25877 addr = reg;
25878 }
25879 else if (GET_CODE (addr) == PRE_MODIFY)
25880 {
25881 rtx reg = XEXP (addr, 0);
25882 rtx expr = XEXP (addr, 1);
25883 gcc_assert (REG_P (reg));
25884 gcc_assert (GET_CODE (expr) == PLUS);
25885 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
25886 addr = reg;
25887 }
25888
25889 if (GET_CODE (addr) == PLUS)
25890 {
25891 rtx op0 = XEXP (addr, 0);
25892 rtx op1 = XEXP (addr, 1);
25893 op0 = force_reg (Pmode, op0);
25894 op1 = force_reg (Pmode, op1);
25895 x = replace_equiv_address (x, gen_rtx_PLUS (Pmode, op0, op1));
25896 }
25897 else
25898 x = replace_equiv_address (x, force_reg (Pmode, addr));
25899 }
25900
25901 return x;
25902 }
25903
25904 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
25905
25906 On the RS/6000, all integer constants are acceptable, most won't be valid
25907 for particular insns, though. Only easy FP constants are acceptable. */
25908
25909 static bool
25910 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
25911 {
25912 if (TARGET_ELF && tls_referenced_p (x))
25913 return false;
25914
25915 if (CONST_DOUBLE_P (x))
25916 return easy_fp_constant (x, mode);
25917
25918 if (GET_CODE (x) == CONST_VECTOR)
25919 return easy_vector_constant (x, mode);
25920
25921 return true;
25922 }
25923
25924 #if TARGET_AIX_OS
25925 /* Implement TARGET_PRECOMPUTE_TLS_P.
25926
25927 On the AIX, TLS symbols are in the TOC, which is maintained in the
25928 constant pool. AIX TOC TLS symbols need to be pre-computed, but
25929 must be considered legitimate constants. */
25930
25931 static bool
25932 rs6000_aix_precompute_tls_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
25933 {
25934 return tls_referenced_p (x);
25935 }
25936 #endif
25937
25938 \f
25939 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
25940
25941 static bool
25942 chain_already_loaded (rtx_insn *last)
25943 {
25944 for (; last != NULL; last = PREV_INSN (last))
25945 {
25946 if (NONJUMP_INSN_P (last))
25947 {
25948 rtx patt = PATTERN (last);
25949
25950 if (GET_CODE (patt) == SET)
25951 {
25952 rtx lhs = XEXP (patt, 0);
25953
25954 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
25955 return true;
25956 }
25957 }
25958 }
25959 return false;
25960 }
25961
25962 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
25963
25964 void
25965 rs6000_call_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
25966 {
25967 rtx func = func_desc;
25968 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
25969 rtx toc_load = NULL_RTX;
25970 rtx toc_restore = NULL_RTX;
25971 rtx func_addr;
25972 rtx abi_reg = NULL_RTX;
25973 rtx call[5];
25974 int n_call;
25975 rtx insn;
25976 bool is_pltseq_longcall;
25977
25978 if (global_tlsarg)
25979 tlsarg = global_tlsarg;
25980
25981 /* Handle longcall attributes. */
25982 is_pltseq_longcall = false;
25983 if ((INTVAL (cookie) & CALL_LONG) != 0
25984 && GET_CODE (func_desc) == SYMBOL_REF)
25985 {
25986 func = rs6000_longcall_ref (func_desc, tlsarg);
25987 if (TARGET_PLTSEQ)
25988 is_pltseq_longcall = true;
25989 }
25990
25991 /* Handle indirect calls. */
25992 if (!SYMBOL_REF_P (func)
25993 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func)))
25994 {
25995 if (!rs6000_pcrel_p ())
25996 {
25997 /* Save the TOC into its reserved slot before the call,
25998 and prepare to restore it after the call. */
25999 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
26000 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
26001 gen_rtvec (1, stack_toc_offset),
26002 UNSPEC_TOCSLOT);
26003 toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
26004
26005 /* Can we optimize saving the TOC in the prologue or
26006 do we need to do it at every call? */
26007 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
26008 cfun->machine->save_toc_in_prologue = true;
26009 else
26010 {
26011 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
26012 rtx stack_toc_mem = gen_frame_mem (Pmode,
26013 gen_rtx_PLUS (Pmode, stack_ptr,
26014 stack_toc_offset));
26015 MEM_VOLATILE_P (stack_toc_mem) = 1;
26016 if (is_pltseq_longcall)
26017 {
26018 rtvec v = gen_rtvec (3, toc_reg, func_desc, tlsarg);
26019 rtx mark_toc_reg = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
26020 emit_insn (gen_rtx_SET (stack_toc_mem, mark_toc_reg));
26021 }
26022 else
26023 emit_move_insn (stack_toc_mem, toc_reg);
26024 }
26025 }
26026
26027 if (DEFAULT_ABI == ABI_ELFv2)
26028 {
26029 /* A function pointer in the ELFv2 ABI is just a plain address, but
26030 the ABI requires it to be loaded into r12 before the call. */
26031 func_addr = gen_rtx_REG (Pmode, 12);
26032 emit_move_insn (func_addr, func);
26033 abi_reg = func_addr;
26034 /* Indirect calls via CTR are strongly preferred over indirect
26035 calls via LR, so move the address there. Needed to mark
26036 this insn for linker plt sequence editing too. */
26037 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
26038 if (is_pltseq_longcall)
26039 {
26040 rtvec v = gen_rtvec (3, abi_reg, func_desc, tlsarg);
26041 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
26042 emit_insn (gen_rtx_SET (func_addr, mark_func));
26043 v = gen_rtvec (2, func_addr, func_desc);
26044 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
26045 }
26046 else
26047 emit_move_insn (func_addr, abi_reg);
26048 }
26049 else
26050 {
26051 /* A function pointer under AIX is a pointer to a data area whose
26052 first word contains the actual address of the function, whose
26053 second word contains a pointer to its TOC, and whose third word
26054 contains a value to place in the static chain register (r11).
26055 Note that if we load the static chain, our "trampoline" need
26056 not have any executable code. */
26057
26058 /* Load up address of the actual function. */
26059 func = force_reg (Pmode, func);
26060 func_addr = gen_reg_rtx (Pmode);
26061 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func));
26062
26063 /* Indirect calls via CTR are strongly preferred over indirect
26064 calls via LR, so move the address there. */
26065 rtx ctr_reg = gen_rtx_REG (Pmode, CTR_REGNO);
26066 emit_move_insn (ctr_reg, func_addr);
26067 func_addr = ctr_reg;
26068
26069 /* Prepare to load the TOC of the called function. Note that the
26070 TOC load must happen immediately before the actual call so
26071 that unwinding the TOC registers works correctly. See the
26072 comment in frob_update_context. */
26073 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
26074 rtx func_toc_mem = gen_rtx_MEM (Pmode,
26075 gen_rtx_PLUS (Pmode, func,
26076 func_toc_offset));
26077 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
26078
26079 /* If we have a static chain, load it up. But, if the call was
26080 originally direct, the 3rd word has not been written since no
26081 trampoline has been built, so we ought not to load it, lest we
26082 override a static chain value. */
26083 if (!(GET_CODE (func_desc) == SYMBOL_REF
26084 && SYMBOL_REF_FUNCTION_P (func_desc))
26085 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
26086 && !chain_already_loaded (get_current_sequence ()->next->last))
26087 {
26088 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
26089 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
26090 rtx func_sc_mem = gen_rtx_MEM (Pmode,
26091 gen_rtx_PLUS (Pmode, func,
26092 func_sc_offset));
26093 emit_move_insn (sc_reg, func_sc_mem);
26094 abi_reg = sc_reg;
26095 }
26096 }
26097 }
26098 else
26099 {
26100 /* No TOC register needed for calls from PC-relative callers. */
26101 if (!rs6000_pcrel_p ())
26102 /* Direct calls use the TOC: for local calls, the callee will
26103 assume the TOC register is set; for non-local calls, the
26104 PLT stub needs the TOC register. */
26105 abi_reg = toc_reg;
26106 func_addr = func;
26107 }
26108
26109 /* Create the call. */
26110 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
26111 if (value != NULL_RTX)
26112 call[0] = gen_rtx_SET (value, call[0]);
26113 call[1] = gen_rtx_USE (VOIDmode, cookie);
26114 n_call = 2;
26115
26116 if (toc_load)
26117 call[n_call++] = toc_load;
26118 if (toc_restore)
26119 call[n_call++] = toc_restore;
26120
26121 call[n_call++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
26122
26123 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
26124 insn = emit_call_insn (insn);
26125
26126 /* Mention all registers defined by the ABI to hold information
26127 as uses in CALL_INSN_FUNCTION_USAGE. */
26128 if (abi_reg)
26129 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
26130 }
26131
26132 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
26133
26134 void
26135 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
26136 {
26137 rtx call[2];
26138 rtx insn;
26139 rtx r12 = NULL_RTX;
26140 rtx func_addr = func_desc;
26141
26142 if (global_tlsarg)
26143 tlsarg = global_tlsarg;
26144
26145 /* Handle longcall attributes. */
26146 if (INTVAL (cookie) & CALL_LONG && SYMBOL_REF_P (func_desc))
26147 {
26148 /* PCREL can do a sibling call to a longcall function
26149 because we don't need to restore the TOC register. */
26150 gcc_assert (rs6000_pcrel_p ());
26151 func_desc = rs6000_longcall_ref (func_desc, tlsarg);
26152 }
26153 else
26154 gcc_assert (INTVAL (cookie) == 0);
26155
26156 /* For ELFv2, r12 and CTR need to hold the function address
26157 for an indirect call. */
26158 if (GET_CODE (func_desc) != SYMBOL_REF && DEFAULT_ABI == ABI_ELFv2)
26159 {
26160 r12 = gen_rtx_REG (Pmode, 12);
26161 emit_move_insn (r12, func_desc);
26162 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
26163 emit_move_insn (func_addr, r12);
26164 }
26165
26166 /* Create the call. */
26167 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
26168 if (value != NULL_RTX)
26169 call[0] = gen_rtx_SET (value, call[0]);
26170
26171 call[1] = simple_return_rtx;
26172
26173 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
26174 insn = emit_call_insn (insn);
26175
26176 /* Note use of the TOC register. */
26177 if (!rs6000_pcrel_p ())
26178 use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
26179 gen_rtx_REG (Pmode, TOC_REGNUM));
26180
26181 /* Note use of r12. */
26182 if (r12)
26183 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r12);
26184 }
26185
26186 /* Expand code to perform a call under the SYSV4 ABI. */
26187
26188 void
26189 rs6000_call_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
26190 {
26191 rtx func = func_desc;
26192 rtx func_addr;
26193 rtx call[4];
26194 rtx insn;
26195 rtx abi_reg = NULL_RTX;
26196 int n;
26197
26198 if (global_tlsarg)
26199 tlsarg = global_tlsarg;
26200
26201 /* Handle longcall attributes. */
26202 if ((INTVAL (cookie) & CALL_LONG) != 0
26203 && GET_CODE (func_desc) == SYMBOL_REF)
26204 {
26205 func = rs6000_longcall_ref (func_desc, tlsarg);
26206 /* If the longcall was implemented as an inline PLT call using
26207 PLT unspecs then func will be REG:r11. If not, func will be
26208 a pseudo reg. The inline PLT call sequence supports lazy
26209 linking (and longcalls to functions in dlopen'd libraries).
26210 The other style of longcalls don't. The lazy linking entry
26211 to the dynamic symbol resolver requires r11 be the function
26212 address (as it is for linker generated PLT stubs). Ensure
26213 r11 stays valid to the bctrl by marking r11 used by the call. */
26214 if (TARGET_PLTSEQ)
26215 abi_reg = func;
26216 }
26217
26218 /* Handle indirect calls. */
26219 if (GET_CODE (func) != SYMBOL_REF)
26220 {
26221 func = force_reg (Pmode, func);
26222
26223 /* Indirect calls via CTR are strongly preferred over indirect
26224 calls via LR, so move the address there. That can't be left
26225 to reload because we want to mark every instruction in an
26226 inline PLT call sequence with a reloc, enabling the linker to
26227 edit the sequence back to a direct call when that makes sense. */
26228 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
26229 if (abi_reg)
26230 {
26231 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
26232 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
26233 emit_insn (gen_rtx_SET (func_addr, mark_func));
26234 v = gen_rtvec (2, func_addr, func_desc);
26235 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
26236 }
26237 else
26238 emit_move_insn (func_addr, func);
26239 }
26240 else
26241 func_addr = func;
26242
26243 /* Create the call. */
26244 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
26245 if (value != NULL_RTX)
26246 call[0] = gen_rtx_SET (value, call[0]);
26247
26248 call[1] = gen_rtx_USE (VOIDmode, cookie);
26249 n = 2;
26250 if (TARGET_SECURE_PLT
26251 && flag_pic
26252 && GET_CODE (func_addr) == SYMBOL_REF
26253 && !SYMBOL_REF_LOCAL_P (func_addr))
26254 call[n++] = gen_rtx_USE (VOIDmode, pic_offset_table_rtx);
26255
26256 call[n++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
26257
26258 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n, call));
26259 insn = emit_call_insn (insn);
26260 if (abi_reg)
26261 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
26262 }
26263
26264 /* Expand code to perform a sibling call under the SysV4 ABI. */
26265
26266 void
26267 rs6000_sibcall_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
26268 {
26269 rtx func = func_desc;
26270 rtx func_addr;
26271 rtx call[3];
26272 rtx insn;
26273 rtx abi_reg = NULL_RTX;
26274
26275 if (global_tlsarg)
26276 tlsarg = global_tlsarg;
26277
26278 /* Handle longcall attributes. */
26279 if ((INTVAL (cookie) & CALL_LONG) != 0
26280 && GET_CODE (func_desc) == SYMBOL_REF)
26281 {
26282 func = rs6000_longcall_ref (func_desc, tlsarg);
26283 /* If the longcall was implemented as an inline PLT call using
26284 PLT unspecs then func will be REG:r11. If not, func will be
26285 a pseudo reg. The inline PLT call sequence supports lazy
26286 linking (and longcalls to functions in dlopen'd libraries).
26287 The other style of longcalls don't. The lazy linking entry
26288 to the dynamic symbol resolver requires r11 be the function
26289 address (as it is for linker generated PLT stubs). Ensure
26290 r11 stays valid to the bctr by marking r11 used by the call. */
26291 if (TARGET_PLTSEQ)
26292 abi_reg = func;
26293 }
26294
26295 /* Handle indirect calls. */
26296 if (GET_CODE (func) != SYMBOL_REF)
26297 {
26298 func = force_reg (Pmode, func);
26299
26300 /* Indirect sibcalls must go via CTR. That can't be left to
26301 reload because we want to mark every instruction in an inline
26302 PLT call sequence with a reloc, enabling the linker to edit
26303 the sequence back to a direct call when that makes sense. */
26304 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
26305 if (abi_reg)
26306 {
26307 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
26308 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
26309 emit_insn (gen_rtx_SET (func_addr, mark_func));
26310 v = gen_rtvec (2, func_addr, func_desc);
26311 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
26312 }
26313 else
26314 emit_move_insn (func_addr, func);
26315 }
26316 else
26317 func_addr = func;
26318
26319 /* Create the call. */
26320 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
26321 if (value != NULL_RTX)
26322 call[0] = gen_rtx_SET (value, call[0]);
26323
26324 call[1] = gen_rtx_USE (VOIDmode, cookie);
26325 call[2] = simple_return_rtx;
26326
26327 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
26328 insn = emit_call_insn (insn);
26329 if (abi_reg)
26330 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
26331 }
26332
26333 #if TARGET_MACHO
26334
26335 /* Expand code to perform a call under the Darwin ABI.
26336 Modulo handling of mlongcall, this is much the same as sysv.
26337 if/when the longcall optimisation is removed, we could drop this
26338 code and use the sysv case (taking care to avoid the tls stuff).
26339
26340 We can use this for sibcalls too, if needed. */
26341
26342 void
26343 rs6000_call_darwin_1 (rtx value, rtx func_desc, rtx tlsarg,
26344 rtx cookie, bool sibcall)
26345 {
26346 rtx func = func_desc;
26347 rtx func_addr;
26348 rtx call[3];
26349 rtx insn;
26350 int cookie_val = INTVAL (cookie);
26351 bool make_island = false;
26352
26353 /* Handle longcall attributes, there are two cases for Darwin:
26354 1) Newer linkers are capable of synthesising any branch islands needed.
26355 2) We need a helper branch island synthesised by the compiler.
26356 The second case has mostly been retired and we don't use it for m64.
26357 In fact, it's is an optimisation, we could just indirect as sysv does..
26358 ... however, backwards compatibility for now.
26359 If we're going to use this, then we need to keep the CALL_LONG bit set,
26360 so that we can pick up the special insn form later. */
26361 if ((cookie_val & CALL_LONG) != 0
26362 && GET_CODE (func_desc) == SYMBOL_REF)
26363 {
26364 /* FIXME: the longcall opt should not hang off this flag, it is most
26365 likely incorrect for kernel-mode code-generation. */
26366 if (darwin_symbol_stubs && TARGET_32BIT)
26367 make_island = true; /* Do nothing yet, retain the CALL_LONG flag. */
26368 else
26369 {
26370 /* The linker is capable of doing this, but the user explicitly
26371 asked for -mlongcall, so we'll do the 'normal' version. */
26372 func = rs6000_longcall_ref (func_desc, NULL_RTX);
26373 cookie_val &= ~CALL_LONG; /* Handled, zap it. */
26374 }
26375 }
26376
26377 /* Handle indirect calls. */
26378 if (GET_CODE (func) != SYMBOL_REF)
26379 {
26380 func = force_reg (Pmode, func);
26381
26382 /* Indirect calls via CTR are strongly preferred over indirect
26383 calls via LR, and are required for indirect sibcalls, so move
26384 the address there. */
26385 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
26386 emit_move_insn (func_addr, func);
26387 }
26388 else
26389 func_addr = func;
26390
26391 /* Create the call. */
26392 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
26393 if (value != NULL_RTX)
26394 call[0] = gen_rtx_SET (value, call[0]);
26395
26396 call[1] = gen_rtx_USE (VOIDmode, GEN_INT (cookie_val));
26397
26398 if (sibcall)
26399 call[2] = simple_return_rtx;
26400 else
26401 call[2] = gen_hard_reg_clobber (Pmode, LR_REGNO);
26402
26403 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
26404 insn = emit_call_insn (insn);
26405 /* Now we have the debug info in the insn, we can set up the branch island
26406 if we're using one. */
26407 if (make_island)
26408 {
26409 tree funname = get_identifier (XSTR (func_desc, 0));
26410
26411 if (no_previous_def (funname))
26412 {
26413 rtx label_rtx = gen_label_rtx ();
26414 char *label_buf, temp_buf[256];
26415 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
26416 CODE_LABEL_NUMBER (label_rtx));
26417 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
26418 tree labelname = get_identifier (label_buf);
26419 add_compiler_branch_island (labelname, funname,
26420 insn_line ((const rtx_insn*)insn));
26421 }
26422 }
26423 }
26424 #endif
26425
26426 void
26427 rs6000_call_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
26428 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
26429 {
26430 #if TARGET_MACHO
26431 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, false);
26432 #else
26433 gcc_unreachable();
26434 #endif
26435 }
26436
26437
26438 void
26439 rs6000_sibcall_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
26440 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
26441 {
26442 #if TARGET_MACHO
26443 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, true);
26444 #else
26445 gcc_unreachable();
26446 #endif
26447 }
26448
26449 /* Return whether we should generate PC-relative code for FNDECL. */
26450 bool
26451 rs6000_fndecl_pcrel_p (const_tree fndecl)
26452 {
26453 if (DEFAULT_ABI != ABI_ELFv2)
26454 return false;
26455
26456 struct cl_target_option *opts = target_opts_for_fn (fndecl);
26457
26458 return ((opts->x_rs6000_isa_flags & OPTION_MASK_PCREL) != 0
26459 && TARGET_CMODEL == CMODEL_MEDIUM);
26460 }
26461
26462 /* Return whether we should generate PC-relative code for *FN. */
26463 bool
26464 rs6000_function_pcrel_p (struct function *fn)
26465 {
26466 if (DEFAULT_ABI != ABI_ELFv2)
26467 return false;
26468
26469 /* Optimize usual case. */
26470 if (fn == cfun)
26471 return ((rs6000_isa_flags & OPTION_MASK_PCREL) != 0
26472 && TARGET_CMODEL == CMODEL_MEDIUM);
26473
26474 return rs6000_fndecl_pcrel_p (fn->decl);
26475 }
26476
26477 /* Return whether we should generate PC-relative code for the current
26478 function. */
26479 bool
26480 rs6000_pcrel_p ()
26481 {
26482 return (DEFAULT_ABI == ABI_ELFv2
26483 && (rs6000_isa_flags & OPTION_MASK_PCREL) != 0
26484 && TARGET_CMODEL == CMODEL_MEDIUM);
26485 }
26486
26487 \f
26488 /* Given an address (ADDR), a mode (MODE), and what the format of the
26489 non-prefixed address (NON_PREFIXED_FORMAT) is, return the instruction format
26490 for the address. */
26491
26492 enum insn_form
26493 address_to_insn_form (rtx addr,
26494 machine_mode mode,
26495 enum non_prefixed_form non_prefixed_format)
26496 {
26497 /* Single register is easy. */
26498 if (REG_P (addr) || SUBREG_P (addr))
26499 return INSN_FORM_BASE_REG;
26500
26501 /* If the non prefixed instruction format doesn't support offset addressing,
26502 make sure only indexed addressing is allowed.
26503
26504 We special case SDmode so that the register allocator does not try to move
26505 SDmode through GPR registers, but instead uses the 32-bit integer load and
26506 store instructions for the floating point registers. */
26507 if (non_prefixed_format == NON_PREFIXED_X || (mode == SDmode && TARGET_DFP))
26508 {
26509 if (GET_CODE (addr) != PLUS)
26510 return INSN_FORM_BAD;
26511
26512 rtx op0 = XEXP (addr, 0);
26513 rtx op1 = XEXP (addr, 1);
26514 if (!REG_P (op0) && !SUBREG_P (op0))
26515 return INSN_FORM_BAD;
26516
26517 if (!REG_P (op1) && !SUBREG_P (op1))
26518 return INSN_FORM_BAD;
26519
26520 return INSN_FORM_X;
26521 }
26522
26523 /* Deal with update forms. */
26524 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
26525 return INSN_FORM_UPDATE;
26526
26527 /* Handle PC-relative symbols and labels. Check for both local and
26528 external symbols. Assume labels are always local. TLS symbols
26529 are not PC-relative for rs6000. */
26530 if (TARGET_PCREL)
26531 {
26532 if (LABEL_REF_P (addr))
26533 return INSN_FORM_PCREL_LOCAL;
26534
26535 if (SYMBOL_REF_P (addr) && !SYMBOL_REF_TLS_MODEL (addr))
26536 {
26537 if (!SYMBOL_REF_LOCAL_P (addr))
26538 return INSN_FORM_PCREL_EXTERNAL;
26539 else
26540 return INSN_FORM_PCREL_LOCAL;
26541 }
26542 }
26543
26544 if (GET_CODE (addr) == CONST)
26545 addr = XEXP (addr, 0);
26546
26547 /* Recognize LO_SUM addresses used with TOC and 32-bit addressing. */
26548 if (GET_CODE (addr) == LO_SUM)
26549 return INSN_FORM_LO_SUM;
26550
26551 /* Everything below must be an offset address of some form. */
26552 if (GET_CODE (addr) != PLUS)
26553 return INSN_FORM_BAD;
26554
26555 rtx op0 = XEXP (addr, 0);
26556 rtx op1 = XEXP (addr, 1);
26557
26558 /* Check for indexed addresses. */
26559 if (REG_P (op1) || SUBREG_P (op1))
26560 {
26561 if (REG_P (op0) || SUBREG_P (op0))
26562 return INSN_FORM_X;
26563
26564 return INSN_FORM_BAD;
26565 }
26566
26567 if (!CONST_INT_P (op1))
26568 return INSN_FORM_BAD;
26569
26570 HOST_WIDE_INT offset = INTVAL (op1);
26571 if (!SIGNED_INTEGER_34BIT_P (offset))
26572 return INSN_FORM_BAD;
26573
26574 /* Check for local and external PC-relative addresses. Labels are always
26575 local. TLS symbols are not PC-relative for rs6000. */
26576 if (TARGET_PCREL)
26577 {
26578 if (LABEL_REF_P (op0))
26579 return INSN_FORM_PCREL_LOCAL;
26580
26581 if (SYMBOL_REF_P (op0) && !SYMBOL_REF_TLS_MODEL (op0))
26582 {
26583 if (!SYMBOL_REF_LOCAL_P (op0))
26584 return INSN_FORM_PCREL_EXTERNAL;
26585 else
26586 return INSN_FORM_PCREL_LOCAL;
26587 }
26588 }
26589
26590 /* If it isn't PC-relative, the address must use a base register. */
26591 if (!REG_P (op0) && !SUBREG_P (op0))
26592 return INSN_FORM_BAD;
26593
26594 /* Large offsets must be prefixed. */
26595 if (!SIGNED_INTEGER_16BIT_P (offset))
26596 {
26597 if (TARGET_PREFIXED)
26598 return INSN_FORM_PREFIXED_NUMERIC;
26599
26600 return INSN_FORM_BAD;
26601 }
26602
26603 /* We have a 16-bit offset, see what default instruction format to use. */
26604 if (non_prefixed_format == NON_PREFIXED_DEFAULT)
26605 {
26606 unsigned size = GET_MODE_SIZE (mode);
26607
26608 /* On 64-bit systems, assume 64-bit integers need to use DS form
26609 addresses (for LD/STD). VSX vectors need to use DQ form addresses
26610 (for LXV and STXV). TImode is problematical in that its normal usage
26611 is expected to be GPRs where it wants a DS instruction format, but if
26612 it goes into the vector registers, it wants a DQ instruction
26613 format. */
26614 if (TARGET_POWERPC64 && size >= 8 && GET_MODE_CLASS (mode) == MODE_INT)
26615 non_prefixed_format = NON_PREFIXED_DS;
26616
26617 else if (TARGET_VSX && size >= 16
26618 && (VECTOR_MODE_P (mode) || VECTOR_ALIGNMENT_P (mode)))
26619 non_prefixed_format = NON_PREFIXED_DQ;
26620
26621 else
26622 non_prefixed_format = NON_PREFIXED_D;
26623 }
26624
26625 /* Classify the D/DS/DQ-form addresses. */
26626 switch (non_prefixed_format)
26627 {
26628 /* Instruction format D, all 16 bits are valid. */
26629 case NON_PREFIXED_D:
26630 return INSN_FORM_D;
26631
26632 /* Instruction format DS, bottom 2 bits must be 0. */
26633 case NON_PREFIXED_DS:
26634 if ((offset & 3) == 0)
26635 return INSN_FORM_DS;
26636
26637 else if (TARGET_PREFIXED)
26638 return INSN_FORM_PREFIXED_NUMERIC;
26639
26640 else
26641 return INSN_FORM_BAD;
26642
26643 /* Instruction format DQ, bottom 4 bits must be 0. */
26644 case NON_PREFIXED_DQ:
26645 if ((offset & 15) == 0)
26646 return INSN_FORM_DQ;
26647
26648 else if (TARGET_PREFIXED)
26649 return INSN_FORM_PREFIXED_NUMERIC;
26650
26651 else
26652 return INSN_FORM_BAD;
26653
26654 default:
26655 break;
26656 }
26657
26658 return INSN_FORM_BAD;
26659 }
26660
26661 /* Given address rtx ADDR for a load of MODE, is this legitimate for a
26662 non-prefixed D-form or X-form instruction? NON_PREFIXED_FORMAT is
26663 given NON_PREFIXED_D or NON_PREFIXED_DS to indicate whether we want
26664 a D-form or DS-form instruction. X-form and base_reg are always
26665 allowed. */
26666 bool
26667 address_is_non_pfx_d_or_x (rtx addr, machine_mode mode,
26668 enum non_prefixed_form non_prefixed_format)
26669 {
26670 enum insn_form result_form;
26671
26672 result_form = address_to_insn_form (addr, mode, non_prefixed_format);
26673
26674 switch (non_prefixed_format)
26675 {
26676 case NON_PREFIXED_D:
26677 switch (result_form)
26678 {
26679 case INSN_FORM_X:
26680 case INSN_FORM_D:
26681 case INSN_FORM_DS:
26682 case INSN_FORM_BASE_REG:
26683 return true;
26684 default:
26685 return false;
26686 }
26687 break;
26688 case NON_PREFIXED_DS:
26689 switch (result_form)
26690 {
26691 case INSN_FORM_X:
26692 case INSN_FORM_DS:
26693 case INSN_FORM_BASE_REG:
26694 return true;
26695 default:
26696 return false;
26697 }
26698 break;
26699 default:
26700 break;
26701 }
26702 return false;
26703 }
26704
26705 /* Return true if an REG with a given MODE is loaded from or stored into a MEM
26706 location uses a non-prefixed D/DS/DQ-form address. This is used to validate
26707 the load or store with the PCREL_OPT optimization to make sure it is an
26708 instruction that can be optimized.
26709
26710 We need to specify the MODE separately from the REG to allow for loads that
26711 include zero/sign/float extension. */
26712
26713 bool
26714 pcrel_opt_valid_mem_p (rtx reg, machine_mode mode, rtx mem)
26715 {
26716 /* If the instruction is indexed only like LFIWAX/LXSIWAX we cannot do the
26717 PCREL_OPT optimization. */
26718 enum non_prefixed_form non_prefixed = reg_to_non_prefixed (reg, mode);
26719 if (non_prefixed == NON_PREFIXED_X)
26720 return false;
26721
26722 /* Check if this is a non-prefixed D/DS/DQ-form instruction. */
26723 rtx addr = XEXP (mem, 0);
26724 enum insn_form iform = address_to_insn_form (addr, mode, non_prefixed);
26725 return (iform == INSN_FORM_BASE_REG
26726 || iform == INSN_FORM_D
26727 || iform == INSN_FORM_DS
26728 || iform == INSN_FORM_DQ);
26729 }
26730
26731 /* Helper function to see if we're potentially looking at lfs/stfs.
26732 - PARALLEL containing a SET and a CLOBBER
26733 - stfs:
26734 - SET is from UNSPEC_SI_FROM_SF to MEM:SI
26735 - CLOBBER is a V4SF
26736 - lfs:
26737 - SET is from UNSPEC_SF_FROM_SI to REG:SF
26738 - CLOBBER is a DI
26739 */
26740
26741 static bool
26742 is_lfs_stfs_insn (rtx_insn *insn)
26743 {
26744 rtx pattern = PATTERN (insn);
26745 if (GET_CODE (pattern) != PARALLEL)
26746 return false;
26747
26748 /* This should be a parallel with exactly one set and one clobber. */
26749 if (XVECLEN (pattern, 0) != 2)
26750 return false;
26751
26752 rtx set = XVECEXP (pattern, 0, 0);
26753 if (GET_CODE (set) != SET)
26754 return false;
26755
26756 rtx clobber = XVECEXP (pattern, 0, 1);
26757 if (GET_CODE (clobber) != CLOBBER)
26758 return false;
26759
26760 /* All we care is that the destination of the SET is a mem:SI,
26761 the source should be an UNSPEC_SI_FROM_SF, and the clobber
26762 should be a scratch:V4SF. */
26763
26764 rtx dest = SET_DEST (set);
26765 rtx src = SET_SRC (set);
26766 rtx scratch = SET_DEST (clobber);
26767
26768 if (GET_CODE (src) != UNSPEC)
26769 return false;
26770
26771 /* stfs case. */
26772 if (XINT (src, 1) == UNSPEC_SI_FROM_SF
26773 && GET_CODE (dest) == MEM && GET_MODE (dest) == SImode
26774 && GET_CODE (scratch) == SCRATCH && GET_MODE (scratch) == V4SFmode)
26775 return true;
26776
26777 /* lfs case. */
26778 if (XINT (src, 1) == UNSPEC_SF_FROM_SI
26779 && GET_CODE (dest) == REG && GET_MODE (dest) == SFmode
26780 && GET_CODE (scratch) == SCRATCH && GET_MODE (scratch) == DImode)
26781 return true;
26782
26783 return false;
26784 }
26785
26786 /* Helper function to take a REG and a MODE and turn it into the non-prefixed
26787 instruction format (D/DS/DQ) used for offset memory. */
26788
26789 enum non_prefixed_form
26790 reg_to_non_prefixed (rtx reg, machine_mode mode)
26791 {
26792 /* If it isn't a register, use the defaults. */
26793 if (!REG_P (reg) && !SUBREG_P (reg))
26794 return NON_PREFIXED_DEFAULT;
26795
26796 unsigned int r = reg_or_subregno (reg);
26797
26798 /* If we have a pseudo, use the default instruction format. */
26799 if (!HARD_REGISTER_NUM_P (r))
26800 return NON_PREFIXED_DEFAULT;
26801
26802 unsigned size = GET_MODE_SIZE (mode);
26803
26804 /* FPR registers use D-mode for scalars, and DQ-mode for vectors, IEEE
26805 128-bit floating point, and 128-bit integers. Before power9, only indexed
26806 addressing was available for vectors. */
26807 if (FP_REGNO_P (r))
26808 {
26809 if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
26810 return NON_PREFIXED_D;
26811
26812 else if (size < 8)
26813 return NON_PREFIXED_X;
26814
26815 else if (TARGET_VSX && size >= 16
26816 && (VECTOR_MODE_P (mode)
26817 || VECTOR_ALIGNMENT_P (mode)
26818 || mode == TImode || mode == CTImode))
26819 return (TARGET_P9_VECTOR) ? NON_PREFIXED_DQ : NON_PREFIXED_X;
26820
26821 else
26822 return NON_PREFIXED_DEFAULT;
26823 }
26824
26825 /* Altivec registers use DS-mode for scalars, and DQ-mode for vectors, IEEE
26826 128-bit floating point, and 128-bit integers. Before power9, only indexed
26827 addressing was available. */
26828 else if (ALTIVEC_REGNO_P (r))
26829 {
26830 if (!TARGET_P9_VECTOR)
26831 return NON_PREFIXED_X;
26832
26833 if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
26834 return NON_PREFIXED_DS;
26835
26836 else if (size < 8)
26837 return NON_PREFIXED_X;
26838
26839 else if (TARGET_VSX && size >= 16
26840 && (VECTOR_MODE_P (mode)
26841 || VECTOR_ALIGNMENT_P (mode)
26842 || mode == TImode || mode == CTImode))
26843 return NON_PREFIXED_DQ;
26844
26845 else
26846 return NON_PREFIXED_DEFAULT;
26847 }
26848
26849 /* GPR registers use DS-mode for 64-bit items on 64-bit systems, and D-mode
26850 otherwise. Assume that any other register, such as LR, CRs, etc. will go
26851 through the GPR registers for memory operations. */
26852 else if (TARGET_POWERPC64 && size >= 8)
26853 return NON_PREFIXED_DS;
26854
26855 return NON_PREFIXED_D;
26856 }
26857
26858 \f
26859 /* Whether a load instruction is a prefixed instruction. This is called from
26860 the prefixed attribute processing. */
26861
26862 bool
26863 prefixed_load_p (rtx_insn *insn)
26864 {
26865 /* Validate the insn to make sure it is a normal load insn. */
26866 extract_insn_cached (insn);
26867 if (recog_data.n_operands < 2)
26868 return false;
26869
26870 rtx reg = recog_data.operand[0];
26871 rtx mem = recog_data.operand[1];
26872
26873 if (!REG_P (reg) && !SUBREG_P (reg))
26874 return false;
26875
26876 if (!MEM_P (mem))
26877 return false;
26878
26879 /* Prefixed load instructions do not support update or indexed forms. */
26880 if (get_attr_indexed (insn) == INDEXED_YES
26881 || get_attr_update (insn) == UPDATE_YES)
26882 return false;
26883
26884 /* LWA uses the DS format instead of the D format that LWZ uses. */
26885 enum non_prefixed_form non_prefixed;
26886 machine_mode reg_mode = GET_MODE (reg);
26887 machine_mode mem_mode = GET_MODE (mem);
26888
26889 if (mem_mode == SImode && reg_mode == DImode
26890 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
26891 non_prefixed = NON_PREFIXED_DS;
26892
26893 else
26894 non_prefixed = reg_to_non_prefixed (reg, mem_mode);
26895
26896 if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn))
26897 return address_is_prefixed (XEXP (mem, 0), mem_mode, NON_PREFIXED_DEFAULT);
26898 else
26899 return address_is_prefixed (XEXP (mem, 0), mem_mode, non_prefixed);
26900 }
26901
26902 /* Whether a store instruction is a prefixed instruction. This is called from
26903 the prefixed attribute processing. */
26904
26905 bool
26906 prefixed_store_p (rtx_insn *insn)
26907 {
26908 /* Validate the insn to make sure it is a normal store insn. */
26909 extract_insn_cached (insn);
26910 if (recog_data.n_operands < 2)
26911 return false;
26912
26913 rtx mem = recog_data.operand[0];
26914 rtx reg = recog_data.operand[1];
26915
26916 if (!REG_P (reg) && !SUBREG_P (reg))
26917 return false;
26918
26919 if (!MEM_P (mem))
26920 return false;
26921
26922 /* Prefixed store instructions do not support update or indexed forms. */
26923 if (get_attr_indexed (insn) == INDEXED_YES
26924 || get_attr_update (insn) == UPDATE_YES)
26925 return false;
26926
26927 machine_mode mem_mode = GET_MODE (mem);
26928 rtx addr = XEXP (mem, 0);
26929 enum non_prefixed_form non_prefixed = reg_to_non_prefixed (reg, mem_mode);
26930
26931 /* Need to make sure we aren't looking at a stfs which doesn't look
26932 like the other things reg_to_non_prefixed/address_is_prefixed
26933 looks for. */
26934 if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn))
26935 return address_is_prefixed (addr, mem_mode, NON_PREFIXED_DEFAULT);
26936 else
26937 return address_is_prefixed (addr, mem_mode, non_prefixed);
26938 }
26939
26940 /* Whether a load immediate or add instruction is a prefixed instruction. This
26941 is called from the prefixed attribute processing. */
26942
26943 bool
26944 prefixed_paddi_p (rtx_insn *insn)
26945 {
26946 rtx set = single_set (insn);
26947 if (!set)
26948 return false;
26949
26950 rtx dest = SET_DEST (set);
26951 rtx src = SET_SRC (set);
26952
26953 if (!REG_P (dest) && !SUBREG_P (dest))
26954 return false;
26955
26956 /* Is this a load immediate that can't be done with a simple ADDI or
26957 ADDIS? */
26958 if (CONST_INT_P (src))
26959 return (satisfies_constraint_eI (src)
26960 && !satisfies_constraint_I (src)
26961 && !satisfies_constraint_L (src));
26962
26963 /* Is this a PADDI instruction that can't be done with a simple ADDI or
26964 ADDIS? */
26965 if (GET_CODE (src) == PLUS)
26966 {
26967 rtx op1 = XEXP (src, 1);
26968
26969 return (CONST_INT_P (op1)
26970 && satisfies_constraint_eI (op1)
26971 && !satisfies_constraint_I (op1)
26972 && !satisfies_constraint_L (op1));
26973 }
26974
26975 /* If not, is it a load of a PC-relative address? */
26976 if (!TARGET_PCREL || GET_MODE (dest) != Pmode)
26977 return false;
26978
26979 if (!SYMBOL_REF_P (src) && !LABEL_REF_P (src) && GET_CODE (src) != CONST)
26980 return false;
26981
26982 enum insn_form iform = address_to_insn_form (src, Pmode,
26983 NON_PREFIXED_DEFAULT);
26984
26985 return (iform == INSN_FORM_PCREL_EXTERNAL || iform == INSN_FORM_PCREL_LOCAL);
26986 }
26987
26988 /* Whether the next instruction needs a 'p' prefix issued before the
26989 instruction is printed out. */
26990 static bool prepend_p_to_next_insn;
26991
26992 /* Define FINAL_PRESCAN_INSN if some processing needs to be done before
26993 outputting the assembler code. On the PowerPC, we remember if the current
26994 insn is a prefixed insn where we need to emit a 'p' before the insn.
26995
26996 In addition, if the insn is part of a PC-relative reference to an external
26997 label optimization, this is recorded also. */
26998 void
26999 rs6000_final_prescan_insn (rtx_insn *insn, rtx [], int)
27000 {
27001 prepend_p_to_next_insn = (get_attr_maybe_prefixed (insn)
27002 == MAYBE_PREFIXED_YES
27003 && get_attr_prefixed (insn) == PREFIXED_YES);
27004 return;
27005 }
27006
27007 /* Define ASM_OUTPUT_OPCODE to do anything special before emitting an opcode.
27008 We use it to emit a 'p' for prefixed insns that is set in
27009 FINAL_PRESCAN_INSN. */
27010 void
27011 rs6000_asm_output_opcode (FILE *stream)
27012 {
27013 if (prepend_p_to_next_insn)
27014 {
27015 fprintf (stream, "p");
27016
27017 /* Reset the flag in the case where there are separate insn lines in the
27018 sequence, so the 'p' is only emitted for the first line. This shows up
27019 when we are doing the PCREL_OPT optimization, in that the label created
27020 with %r<n> would have a leading 'p' printed. */
27021 prepend_p_to_next_insn = false;
27022 }
27023
27024 return;
27025 }
27026
27027 /* Emit the relocation to tie the next instruction to a previous instruction
27028 that loads up an external address. This is used to do the PCREL_OPT
27029 optimization. Note, the label is generated after the PLD of the got
27030 pc-relative address to allow for the assembler to insert NOPs before the PLD
27031 instruction. The operand is a constant integer that is the label
27032 number. */
27033
27034 void
27035 output_pcrel_opt_reloc (rtx label_num)
27036 {
27037 rtx operands[1] = { label_num };
27038 output_asm_insn (".reloc .Lpcrel%0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel%0-8)",
27039 operands);
27040 }
27041
27042 /* Adjust the length of an INSN. LENGTH is the currently-computed length and
27043 should be adjusted to reflect any required changes. This macro is used when
27044 there is some systematic length adjustment required that would be difficult
27045 to express in the length attribute.
27046
27047 In the PowerPC, we use this to adjust the length of an instruction if one or
27048 more prefixed instructions are generated, using the attribute
27049 num_prefixed_insns. A prefixed instruction is 8 bytes instead of 4, but the
27050 hardware requires that a prefied instruciton does not cross a 64-byte
27051 boundary. This means the compiler has to assume the length of the first
27052 prefixed instruction is 12 bytes instead of 8 bytes. Since the length is
27053 already set for the non-prefixed instruction, we just need to udpate for the
27054 difference. */
27055
27056 int
27057 rs6000_adjust_insn_length (rtx_insn *insn, int length)
27058 {
27059 if (TARGET_PREFIXED && NONJUMP_INSN_P (insn))
27060 {
27061 rtx pattern = PATTERN (insn);
27062 if (GET_CODE (pattern) != USE && GET_CODE (pattern) != CLOBBER
27063 && get_attr_prefixed (insn) == PREFIXED_YES)
27064 {
27065 int num_prefixed = get_attr_max_prefixed_insns (insn);
27066 length += 4 * (num_prefixed + 1);
27067 }
27068 }
27069
27070 return length;
27071 }
27072
27073 \f
27074 #ifdef HAVE_GAS_HIDDEN
27075 # define USE_HIDDEN_LINKONCE 1
27076 #else
27077 # define USE_HIDDEN_LINKONCE 0
27078 #endif
27079
27080 /* Fills in the label name that should be used for a 476 link stack thunk. */
27081
27082 void
27083 get_ppc476_thunk_name (char name[32])
27084 {
27085 gcc_assert (TARGET_LINK_STACK);
27086
27087 if (USE_HIDDEN_LINKONCE)
27088 sprintf (name, "__ppc476.get_thunk");
27089 else
27090 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
27091 }
27092
27093 /* This function emits the simple thunk routine that is used to preserve
27094 the link stack on the 476 cpu. */
27095
27096 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
27097 static void
27098 rs6000_code_end (void)
27099 {
27100 char name[32];
27101 tree decl;
27102
27103 if (!TARGET_LINK_STACK)
27104 return;
27105
27106 get_ppc476_thunk_name (name);
27107
27108 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
27109 build_function_type_list (void_type_node, NULL_TREE));
27110 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
27111 NULL_TREE, void_type_node);
27112 TREE_PUBLIC (decl) = 1;
27113 TREE_STATIC (decl) = 1;
27114
27115 #if RS6000_WEAK
27116 if (USE_HIDDEN_LINKONCE && !TARGET_XCOFF)
27117 {
27118 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
27119 targetm.asm_out.unique_section (decl, 0);
27120 switch_to_section (get_named_section (decl, NULL, 0));
27121 DECL_WEAK (decl) = 1;
27122 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
27123 targetm.asm_out.globalize_label (asm_out_file, name);
27124 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
27125 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
27126 }
27127 else
27128 #endif
27129 {
27130 switch_to_section (text_section);
27131 ASM_OUTPUT_LABEL (asm_out_file, name);
27132 }
27133
27134 DECL_INITIAL (decl) = make_node (BLOCK);
27135 current_function_decl = decl;
27136 allocate_struct_function (decl, false);
27137 init_function_start (decl);
27138 first_function_block_is_cold = false;
27139 /* Make sure unwind info is emitted for the thunk if needed. */
27140 final_start_function (emit_barrier (), asm_out_file, 1);
27141
27142 fputs ("\tblr\n", asm_out_file);
27143
27144 final_end_function ();
27145 init_insn_lengths ();
27146 free_after_compilation (cfun);
27147 set_cfun (NULL);
27148 current_function_decl = NULL;
27149 }
27150
27151 /* Add r30 to hard reg set if the prologue sets it up and it is not
27152 pic_offset_table_rtx. */
27153
27154 static void
27155 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
27156 {
27157 if (!TARGET_SINGLE_PIC_BASE
27158 && TARGET_TOC
27159 && TARGET_MINIMAL_TOC
27160 && !constant_pool_empty_p ())
27161 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
27162 if (cfun->machine->split_stack_argp_used)
27163 add_to_hard_reg_set (&set->set, Pmode, 12);
27164
27165 /* Make sure the hard reg set doesn't include r2, which was possibly added
27166 via PIC_OFFSET_TABLE_REGNUM. */
27167 if (TARGET_TOC)
27168 remove_from_hard_reg_set (&set->set, Pmode, TOC_REGNUM);
27169 }
27170
27171 \f
27172 /* Helper function for rs6000_split_logical to emit a logical instruction after
27173 spliting the operation to single GPR registers.
27174
27175 DEST is the destination register.
27176 OP1 and OP2 are the input source registers.
27177 CODE is the base operation (AND, IOR, XOR, NOT).
27178 MODE is the machine mode.
27179 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
27180 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
27181 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
27182
27183 static void
27184 rs6000_split_logical_inner (rtx dest,
27185 rtx op1,
27186 rtx op2,
27187 enum rtx_code code,
27188 machine_mode mode,
27189 bool complement_final_p,
27190 bool complement_op1_p,
27191 bool complement_op2_p)
27192 {
27193 rtx bool_rtx;
27194
27195 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
27196 if (op2 && CONST_INT_P (op2)
27197 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
27198 && !complement_final_p && !complement_op1_p && !complement_op2_p)
27199 {
27200 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
27201 HOST_WIDE_INT value = INTVAL (op2) & mask;
27202
27203 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
27204 if (code == AND)
27205 {
27206 if (value == 0)
27207 {
27208 emit_insn (gen_rtx_SET (dest, const0_rtx));
27209 return;
27210 }
27211
27212 else if (value == mask)
27213 {
27214 if (!rtx_equal_p (dest, op1))
27215 emit_insn (gen_rtx_SET (dest, op1));
27216 return;
27217 }
27218 }
27219
27220 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
27221 into separate ORI/ORIS or XORI/XORIS instrucitons. */
27222 else if (code == IOR || code == XOR)
27223 {
27224 if (value == 0)
27225 {
27226 if (!rtx_equal_p (dest, op1))
27227 emit_insn (gen_rtx_SET (dest, op1));
27228 return;
27229 }
27230 }
27231 }
27232
27233 if (code == AND && mode == SImode
27234 && !complement_final_p && !complement_op1_p && !complement_op2_p)
27235 {
27236 emit_insn (gen_andsi3 (dest, op1, op2));
27237 return;
27238 }
27239
27240 if (complement_op1_p)
27241 op1 = gen_rtx_NOT (mode, op1);
27242
27243 if (complement_op2_p)
27244 op2 = gen_rtx_NOT (mode, op2);
27245
27246 /* For canonical RTL, if only one arm is inverted it is the first. */
27247 if (!complement_op1_p && complement_op2_p)
27248 std::swap (op1, op2);
27249
27250 bool_rtx = ((code == NOT)
27251 ? gen_rtx_NOT (mode, op1)
27252 : gen_rtx_fmt_ee (code, mode, op1, op2));
27253
27254 if (complement_final_p)
27255 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
27256
27257 emit_insn (gen_rtx_SET (dest, bool_rtx));
27258 }
27259
27260 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
27261 operations are split immediately during RTL generation to allow for more
27262 optimizations of the AND/IOR/XOR.
27263
27264 OPERANDS is an array containing the destination and two input operands.
27265 CODE is the base operation (AND, IOR, XOR, NOT).
27266 MODE is the machine mode.
27267 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
27268 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
27269 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
27270 CLOBBER_REG is either NULL or a scratch register of type CC to allow
27271 formation of the AND instructions. */
27272
27273 static void
27274 rs6000_split_logical_di (rtx operands[3],
27275 enum rtx_code code,
27276 bool complement_final_p,
27277 bool complement_op1_p,
27278 bool complement_op2_p)
27279 {
27280 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
27281 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
27282 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
27283 enum hi_lo { hi = 0, lo = 1 };
27284 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
27285 size_t i;
27286
27287 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
27288 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
27289 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
27290 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
27291
27292 if (code == NOT)
27293 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
27294 else
27295 {
27296 if (!CONST_INT_P (operands[2]))
27297 {
27298 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
27299 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
27300 }
27301 else
27302 {
27303 HOST_WIDE_INT value = INTVAL (operands[2]);
27304 HOST_WIDE_INT value_hi_lo[2];
27305
27306 gcc_assert (!complement_final_p);
27307 gcc_assert (!complement_op1_p);
27308 gcc_assert (!complement_op2_p);
27309
27310 value_hi_lo[hi] = value >> 32;
27311 value_hi_lo[lo] = value & lower_32bits;
27312
27313 for (i = 0; i < 2; i++)
27314 {
27315 HOST_WIDE_INT sub_value = value_hi_lo[i];
27316
27317 if (sub_value & sign_bit)
27318 sub_value |= upper_32bits;
27319
27320 op2_hi_lo[i] = GEN_INT (sub_value);
27321
27322 /* If this is an AND instruction, check to see if we need to load
27323 the value in a register. */
27324 if (code == AND && sub_value != -1 && sub_value != 0
27325 && !and_operand (op2_hi_lo[i], SImode))
27326 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
27327 }
27328 }
27329 }
27330
27331 for (i = 0; i < 2; i++)
27332 {
27333 /* Split large IOR/XOR operations. */
27334 if ((code == IOR || code == XOR)
27335 && CONST_INT_P (op2_hi_lo[i])
27336 && !complement_final_p
27337 && !complement_op1_p
27338 && !complement_op2_p
27339 && !logical_const_operand (op2_hi_lo[i], SImode))
27340 {
27341 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
27342 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
27343 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
27344 rtx tmp = gen_reg_rtx (SImode);
27345
27346 /* Make sure the constant is sign extended. */
27347 if ((hi_16bits & sign_bit) != 0)
27348 hi_16bits |= upper_32bits;
27349
27350 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
27351 code, SImode, false, false, false);
27352
27353 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
27354 code, SImode, false, false, false);
27355 }
27356 else
27357 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
27358 code, SImode, complement_final_p,
27359 complement_op1_p, complement_op2_p);
27360 }
27361
27362 return;
27363 }
27364
27365 /* Split the insns that make up boolean operations operating on multiple GPR
27366 registers. The boolean MD patterns ensure that the inputs either are
27367 exactly the same as the output registers, or there is no overlap.
27368
27369 OPERANDS is an array containing the destination and two input operands.
27370 CODE is the base operation (AND, IOR, XOR, NOT).
27371 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
27372 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
27373 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
27374
27375 void
27376 rs6000_split_logical (rtx operands[3],
27377 enum rtx_code code,
27378 bool complement_final_p,
27379 bool complement_op1_p,
27380 bool complement_op2_p)
27381 {
27382 machine_mode mode = GET_MODE (operands[0]);
27383 machine_mode sub_mode;
27384 rtx op0, op1, op2;
27385 int sub_size, regno0, regno1, nregs, i;
27386
27387 /* If this is DImode, use the specialized version that can run before
27388 register allocation. */
27389 if (mode == DImode && !TARGET_POWERPC64)
27390 {
27391 rs6000_split_logical_di (operands, code, complement_final_p,
27392 complement_op1_p, complement_op2_p);
27393 return;
27394 }
27395
27396 op0 = operands[0];
27397 op1 = operands[1];
27398 op2 = (code == NOT) ? NULL_RTX : operands[2];
27399 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
27400 sub_size = GET_MODE_SIZE (sub_mode);
27401 regno0 = REGNO (op0);
27402 regno1 = REGNO (op1);
27403
27404 gcc_assert (reload_completed);
27405 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
27406 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
27407
27408 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
27409 gcc_assert (nregs > 1);
27410
27411 if (op2 && REG_P (op2))
27412 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
27413
27414 for (i = 0; i < nregs; i++)
27415 {
27416 int offset = i * sub_size;
27417 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
27418 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
27419 rtx sub_op2 = ((code == NOT)
27420 ? NULL_RTX
27421 : simplify_subreg (sub_mode, op2, mode, offset));
27422
27423 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
27424 complement_final_p, complement_op1_p,
27425 complement_op2_p);
27426 }
27427
27428 return;
27429 }
27430
27431 /* Emit instructions to move SRC to DST. Called by splitters for
27432 multi-register moves. It will emit at most one instruction for
27433 each register that is accessed; that is, it won't emit li/lis pairs
27434 (or equivalent for 64-bit code). One of SRC or DST must be a hard
27435 register. */
27436
27437 void
27438 rs6000_split_multireg_move (rtx dst, rtx src)
27439 {
27440 /* The register number of the first register being moved. */
27441 int reg;
27442 /* The mode that is to be moved. */
27443 machine_mode mode;
27444 /* The mode that the move is being done in, and its size. */
27445 machine_mode reg_mode;
27446 int reg_mode_size;
27447 /* The number of registers that will be moved. */
27448 int nregs;
27449
27450 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
27451 mode = GET_MODE (dst);
27452 nregs = hard_regno_nregs (reg, mode);
27453
27454 /* If we have a vector quad register for MMA, and this is a load or store,
27455 see if we can use vector paired load/stores. */
27456 if (mode == XOmode && TARGET_MMA
27457 && (MEM_P (dst) || MEM_P (src)))
27458 {
27459 reg_mode = OOmode;
27460 nregs /= 2;
27461 }
27462 /* If we have a vector pair/quad mode, split it into two/four separate
27463 vectors. */
27464 else if (mode == OOmode || mode == XOmode)
27465 reg_mode = V1TImode;
27466 else if (FP_REGNO_P (reg))
27467 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
27468 (TARGET_HARD_FLOAT ? DFmode : SFmode);
27469 else if (ALTIVEC_REGNO_P (reg))
27470 reg_mode = V16QImode;
27471 else
27472 reg_mode = word_mode;
27473 reg_mode_size = GET_MODE_SIZE (reg_mode);
27474
27475 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
27476
27477 /* TDmode residing in FP registers is special, since the ISA requires that
27478 the lower-numbered word of a register pair is always the most significant
27479 word, even in little-endian mode. This does not match the usual subreg
27480 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
27481 the appropriate constituent registers "by hand" in little-endian mode.
27482
27483 Note we do not need to check for destructive overlap here since TDmode
27484 can only reside in even/odd register pairs. */
27485 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
27486 {
27487 rtx p_src, p_dst;
27488 int i;
27489
27490 for (i = 0; i < nregs; i++)
27491 {
27492 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
27493 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
27494 else
27495 p_src = simplify_gen_subreg (reg_mode, src, mode,
27496 i * reg_mode_size);
27497
27498 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
27499 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
27500 else
27501 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
27502 i * reg_mode_size);
27503
27504 emit_insn (gen_rtx_SET (p_dst, p_src));
27505 }
27506
27507 return;
27508 }
27509
27510 /* The __vector_pair and __vector_quad modes are multi-register
27511 modes, so if we have to load or store the registers, we have to be
27512 careful to properly swap them if we're in little endian mode
27513 below. This means the last register gets the first memory
27514 location. We also need to be careful of using the right register
27515 numbers if we are splitting XO to OO. */
27516 if (mode == OOmode || mode == XOmode)
27517 {
27518 nregs = hard_regno_nregs (reg, mode);
27519 int reg_mode_nregs = hard_regno_nregs (reg, reg_mode);
27520 if (MEM_P (dst))
27521 {
27522 unsigned offset = 0;
27523 unsigned size = GET_MODE_SIZE (reg_mode);
27524
27525 /* If we are reading an accumulator register, we have to
27526 deprime it before we can access it. */
27527 if (TARGET_MMA
27528 && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
27529 emit_insn (gen_mma_xxmfacc (src, src));
27530
27531 for (int i = 0; i < nregs; i += reg_mode_nregs)
27532 {
27533 unsigned subreg
27534 = WORDS_BIG_ENDIAN ? i : (nregs - reg_mode_nregs - i);
27535 rtx dst2 = adjust_address (dst, reg_mode, offset);
27536 rtx src2 = gen_rtx_REG (reg_mode, reg + subreg);
27537 offset += size;
27538 emit_insn (gen_rtx_SET (dst2, src2));
27539 }
27540
27541 return;
27542 }
27543
27544 if (MEM_P (src))
27545 {
27546 unsigned offset = 0;
27547 unsigned size = GET_MODE_SIZE (reg_mode);
27548
27549 for (int i = 0; i < nregs; i += reg_mode_nregs)
27550 {
27551 unsigned subreg
27552 = WORDS_BIG_ENDIAN ? i : (nregs - reg_mode_nregs - i);
27553 rtx dst2 = gen_rtx_REG (reg_mode, reg + subreg);
27554 rtx src2 = adjust_address (src, reg_mode, offset);
27555 offset += size;
27556 emit_insn (gen_rtx_SET (dst2, src2));
27557 }
27558
27559 /* If we are writing an accumulator register, we have to
27560 prime it after we've written it. */
27561 if (TARGET_MMA
27562 && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
27563 emit_insn (gen_mma_xxmtacc (dst, dst));
27564
27565 return;
27566 }
27567
27568 if (GET_CODE (src) == UNSPEC
27569 || GET_CODE (src) == UNSPEC_VOLATILE)
27570 {
27571 gcc_assert (XINT (src, 1) == UNSPEC_VSX_ASSEMBLE
27572 || XINT (src, 1) == UNSPECV_MMA_ASSEMBLE);
27573 gcc_assert (REG_P (dst));
27574 if (GET_MODE (src) == XOmode)
27575 gcc_assert (FP_REGNO_P (REGNO (dst)));
27576 if (GET_MODE (src) == OOmode)
27577 gcc_assert (VSX_REGNO_P (REGNO (dst)));
27578
27579 int nvecs = XVECLEN (src, 0);
27580 for (int i = 0; i < nvecs; i++)
27581 {
27582 rtx op;
27583 int regno = reg + i;
27584
27585 if (WORDS_BIG_ENDIAN)
27586 {
27587 op = XVECEXP (src, 0, i);
27588
27589 /* If we are loading an even VSX register and the memory location
27590 is adjacent to the next register's memory location (if any),
27591 then we can load them both with one LXVP instruction. */
27592 if ((regno & 1) == 0)
27593 {
27594 rtx op2 = XVECEXP (src, 0, i + 1);
27595 if (adjacent_mem_locations (op, op2) == op)
27596 {
27597 op = adjust_address (op, OOmode, 0);
27598 /* Skip the next register, since we're going to
27599 load it together with this register. */
27600 i++;
27601 }
27602 }
27603 }
27604 else
27605 {
27606 op = XVECEXP (src, 0, nvecs - i - 1);
27607
27608 /* If we are loading an even VSX register and the memory location
27609 is adjacent to the next register's memory location (if any),
27610 then we can load them both with one LXVP instruction. */
27611 if ((regno & 1) == 0)
27612 {
27613 rtx op2 = XVECEXP (src, 0, nvecs - i - 2);
27614 if (adjacent_mem_locations (op2, op) == op2)
27615 {
27616 op = adjust_address (op2, OOmode, 0);
27617 /* Skip the next register, since we're going to
27618 load it together with this register. */
27619 i++;
27620 }
27621 }
27622 }
27623
27624 rtx dst_i = gen_rtx_REG (GET_MODE (op), regno);
27625 emit_insn (gen_rtx_SET (dst_i, op));
27626 }
27627
27628 /* We are writing an accumulator register, so we have to
27629 prime it after we've written it. */
27630 if (GET_MODE (src) == XOmode)
27631 emit_insn (gen_mma_xxmtacc (dst, dst));
27632
27633 return;
27634 }
27635
27636 /* Register -> register moves can use common code. */
27637 }
27638
27639 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
27640 {
27641 /* If we are reading an accumulator register, we have to
27642 deprime it before we can access it. */
27643 if (TARGET_MMA
27644 && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
27645 emit_insn (gen_mma_xxmfacc (src, src));
27646
27647 /* Move register range backwards, if we might have destructive
27648 overlap. */
27649 int i;
27650 /* XO/OO are opaque so cannot use subregs. */
27651 if (mode == OOmode || mode == XOmode )
27652 {
27653 for (i = nregs - 1; i >= 0; i--)
27654 {
27655 rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + i);
27656 rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + i);
27657 emit_insn (gen_rtx_SET (dst_i, src_i));
27658 }
27659 }
27660 else
27661 {
27662 for (i = nregs - 1; i >= 0; i--)
27663 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
27664 i * reg_mode_size),
27665 simplify_gen_subreg (reg_mode, src, mode,
27666 i * reg_mode_size)));
27667 }
27668
27669 /* If we are writing an accumulator register, we have to
27670 prime it after we've written it. */
27671 if (TARGET_MMA
27672 && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
27673 emit_insn (gen_mma_xxmtacc (dst, dst));
27674 }
27675 else
27676 {
27677 int i;
27678 int j = -1;
27679 bool used_update = false;
27680 rtx restore_basereg = NULL_RTX;
27681
27682 if (MEM_P (src) && INT_REGNO_P (reg))
27683 {
27684 rtx breg;
27685
27686 if (GET_CODE (XEXP (src, 0)) == PRE_INC
27687 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
27688 {
27689 rtx delta_rtx;
27690 breg = XEXP (XEXP (src, 0), 0);
27691 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
27692 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
27693 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
27694 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
27695 src = replace_equiv_address (src, breg);
27696 }
27697 else if (! rs6000_offsettable_memref_p (src, reg_mode, true))
27698 {
27699 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
27700 {
27701 rtx basereg = XEXP (XEXP (src, 0), 0);
27702 if (TARGET_UPDATE)
27703 {
27704 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
27705 emit_insn (gen_rtx_SET (ndst,
27706 gen_rtx_MEM (reg_mode,
27707 XEXP (src, 0))));
27708 used_update = true;
27709 }
27710 else
27711 emit_insn (gen_rtx_SET (basereg,
27712 XEXP (XEXP (src, 0), 1)));
27713 src = replace_equiv_address (src, basereg);
27714 }
27715 else
27716 {
27717 rtx basereg = gen_rtx_REG (Pmode, reg);
27718 emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
27719 src = replace_equiv_address (src, basereg);
27720 }
27721 }
27722
27723 breg = XEXP (src, 0);
27724 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
27725 breg = XEXP (breg, 0);
27726
27727 /* If the base register we are using to address memory is
27728 also a destination reg, then change that register last. */
27729 if (REG_P (breg)
27730 && REGNO (breg) >= REGNO (dst)
27731 && REGNO (breg) < REGNO (dst) + nregs)
27732 j = REGNO (breg) - REGNO (dst);
27733 }
27734 else if (MEM_P (dst) && INT_REGNO_P (reg))
27735 {
27736 rtx breg;
27737
27738 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
27739 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
27740 {
27741 rtx delta_rtx;
27742 breg = XEXP (XEXP (dst, 0), 0);
27743 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
27744 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
27745 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
27746
27747 /* We have to update the breg before doing the store.
27748 Use store with update, if available. */
27749
27750 if (TARGET_UPDATE)
27751 {
27752 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
27753 emit_insn (TARGET_32BIT
27754 ? (TARGET_POWERPC64
27755 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
27756 : gen_movsi_si_update (breg, breg, delta_rtx, nsrc))
27757 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
27758 used_update = true;
27759 }
27760 else
27761 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
27762 dst = replace_equiv_address (dst, breg);
27763 }
27764 else if (!rs6000_offsettable_memref_p (dst, reg_mode, true)
27765 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
27766 {
27767 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
27768 {
27769 rtx basereg = XEXP (XEXP (dst, 0), 0);
27770 if (TARGET_UPDATE)
27771 {
27772 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
27773 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
27774 XEXP (dst, 0)),
27775 nsrc));
27776 used_update = true;
27777 }
27778 else
27779 emit_insn (gen_rtx_SET (basereg,
27780 XEXP (XEXP (dst, 0), 1)));
27781 dst = replace_equiv_address (dst, basereg);
27782 }
27783 else
27784 {
27785 rtx basereg = XEXP (XEXP (dst, 0), 0);
27786 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
27787 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
27788 && REG_P (basereg)
27789 && REG_P (offsetreg)
27790 && REGNO (basereg) != REGNO (offsetreg));
27791 if (REGNO (basereg) == 0)
27792 {
27793 rtx tmp = offsetreg;
27794 offsetreg = basereg;
27795 basereg = tmp;
27796 }
27797 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
27798 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
27799 dst = replace_equiv_address (dst, basereg);
27800 }
27801 }
27802 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
27803 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode, true));
27804 }
27805
27806 /* If we are reading an accumulator register, we have to
27807 deprime it before we can access it. */
27808 if (TARGET_MMA && REG_P (src)
27809 && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
27810 emit_insn (gen_mma_xxmfacc (src, src));
27811
27812 for (i = 0; i < nregs; i++)
27813 {
27814 /* Calculate index to next subword. */
27815 ++j;
27816 if (j == nregs)
27817 j = 0;
27818
27819 /* If compiler already emitted move of first word by
27820 store with update, no need to do anything. */
27821 if (j == 0 && used_update)
27822 continue;
27823
27824 /* XO/OO are opaque so cannot use subregs. */
27825 if (mode == OOmode || mode == XOmode )
27826 {
27827 rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + j);
27828 rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + j);
27829 emit_insn (gen_rtx_SET (dst_i, src_i));
27830 }
27831 else
27832 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
27833 j * reg_mode_size),
27834 simplify_gen_subreg (reg_mode, src, mode,
27835 j * reg_mode_size)));
27836 }
27837
27838 /* If we are writing an accumulator register, we have to
27839 prime it after we've written it. */
27840 if (TARGET_MMA && REG_P (dst)
27841 && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
27842 emit_insn (gen_mma_xxmtacc (dst, dst));
27843
27844 if (restore_basereg != NULL_RTX)
27845 emit_insn (restore_basereg);
27846 }
27847 }
27848 \f
27849 /* Return true if the peephole2 can combine a load involving a combination of
27850 an addis instruction and a load with an offset that can be fused together on
27851 a power8. */
27852
27853 bool
27854 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
27855 rtx addis_value, /* addis value. */
27856 rtx target, /* target register that is loaded. */
27857 rtx mem) /* bottom part of the memory addr. */
27858 {
27859 rtx addr;
27860 rtx base_reg;
27861
27862 /* Validate arguments. */
27863 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
27864 return false;
27865
27866 if (!base_reg_operand (target, GET_MODE (target)))
27867 return false;
27868
27869 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
27870 return false;
27871
27872 /* Allow sign/zero extension. */
27873 if (GET_CODE (mem) == ZERO_EXTEND
27874 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
27875 mem = XEXP (mem, 0);
27876
27877 if (!MEM_P (mem))
27878 return false;
27879
27880 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
27881 return false;
27882
27883 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
27884 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
27885 return false;
27886
27887 /* Validate that the register used to load the high value is either the
27888 register being loaded, or we can safely replace its use.
27889
27890 This function is only called from the peephole2 pass and we assume that
27891 there are 2 instructions in the peephole (addis and load), so we want to
27892 check if the target register was not used in the memory address and the
27893 register to hold the addis result is dead after the peephole. */
27894 if (REGNO (addis_reg) != REGNO (target))
27895 {
27896 if (reg_mentioned_p (target, mem))
27897 return false;
27898
27899 if (!peep2_reg_dead_p (2, addis_reg))
27900 return false;
27901
27902 /* If the target register being loaded is the stack pointer, we must
27903 avoid loading any other value into it, even temporarily. */
27904 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
27905 return false;
27906 }
27907
27908 base_reg = XEXP (addr, 0);
27909 return REGNO (addis_reg) == REGNO (base_reg);
27910 }
27911
27912 /* During the peephole2 pass, adjust and expand the insns for a load fusion
27913 sequence. We adjust the addis register to use the target register. If the
27914 load sign extends, we adjust the code to do the zero extending load, and an
27915 explicit sign extension later since the fusion only covers zero extending
27916 loads.
27917
27918 The operands are:
27919 operands[0] register set with addis (to be replaced with target)
27920 operands[1] value set via addis
27921 operands[2] target register being loaded
27922 operands[3] D-form memory reference using operands[0]. */
27923
27924 void
27925 expand_fusion_gpr_load (rtx *operands)
27926 {
27927 rtx addis_value = operands[1];
27928 rtx target = operands[2];
27929 rtx orig_mem = operands[3];
27930 rtx new_addr, new_mem, orig_addr, offset;
27931 enum rtx_code plus_or_lo_sum;
27932 machine_mode target_mode = GET_MODE (target);
27933 machine_mode extend_mode = target_mode;
27934 machine_mode ptr_mode = Pmode;
27935 enum rtx_code extend = UNKNOWN;
27936
27937 if (GET_CODE (orig_mem) == ZERO_EXTEND
27938 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
27939 {
27940 extend = GET_CODE (orig_mem);
27941 orig_mem = XEXP (orig_mem, 0);
27942 target_mode = GET_MODE (orig_mem);
27943 }
27944
27945 gcc_assert (MEM_P (orig_mem));
27946
27947 orig_addr = XEXP (orig_mem, 0);
27948 plus_or_lo_sum = GET_CODE (orig_addr);
27949 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
27950
27951 offset = XEXP (orig_addr, 1);
27952 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
27953 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
27954
27955 if (extend != UNKNOWN)
27956 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
27957
27958 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
27959 UNSPEC_FUSION_GPR);
27960 emit_insn (gen_rtx_SET (target, new_mem));
27961
27962 if (extend == SIGN_EXTEND)
27963 {
27964 int sub_off = ((BYTES_BIG_ENDIAN)
27965 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
27966 : 0);
27967 rtx sign_reg
27968 = simplify_subreg (target_mode, target, extend_mode, sub_off);
27969
27970 emit_insn (gen_rtx_SET (target,
27971 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
27972 }
27973
27974 return;
27975 }
27976
27977 /* Emit the addis instruction that will be part of a fused instruction
27978 sequence. */
27979
27980 void
27981 emit_fusion_addis (rtx target, rtx addis_value)
27982 {
27983 rtx fuse_ops[10];
27984 const char *addis_str = NULL;
27985
27986 /* Emit the addis instruction. */
27987 fuse_ops[0] = target;
27988 if (satisfies_constraint_L (addis_value))
27989 {
27990 fuse_ops[1] = addis_value;
27991 addis_str = "lis %0,%v1";
27992 }
27993
27994 else if (GET_CODE (addis_value) == PLUS)
27995 {
27996 rtx op0 = XEXP (addis_value, 0);
27997 rtx op1 = XEXP (addis_value, 1);
27998
27999 if (REG_P (op0) && CONST_INT_P (op1)
28000 && satisfies_constraint_L (op1))
28001 {
28002 fuse_ops[1] = op0;
28003 fuse_ops[2] = op1;
28004 addis_str = "addis %0,%1,%v2";
28005 }
28006 }
28007
28008 else if (GET_CODE (addis_value) == HIGH)
28009 {
28010 rtx value = XEXP (addis_value, 0);
28011 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
28012 {
28013 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
28014 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
28015 if (TARGET_ELF)
28016 addis_str = "addis %0,%2,%1@toc@ha";
28017
28018 else if (TARGET_XCOFF)
28019 addis_str = "addis %0,%1@u(%2)";
28020
28021 else
28022 gcc_unreachable ();
28023 }
28024
28025 else if (GET_CODE (value) == PLUS)
28026 {
28027 rtx op0 = XEXP (value, 0);
28028 rtx op1 = XEXP (value, 1);
28029
28030 if (GET_CODE (op0) == UNSPEC
28031 && XINT (op0, 1) == UNSPEC_TOCREL
28032 && CONST_INT_P (op1))
28033 {
28034 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
28035 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
28036 fuse_ops[3] = op1;
28037 if (TARGET_ELF)
28038 addis_str = "addis %0,%2,%1+%3@toc@ha";
28039
28040 else if (TARGET_XCOFF)
28041 addis_str = "addis %0,%1+%3@u(%2)";
28042
28043 else
28044 gcc_unreachable ();
28045 }
28046 }
28047
28048 else if (satisfies_constraint_L (value))
28049 {
28050 fuse_ops[1] = value;
28051 addis_str = "lis %0,%v1";
28052 }
28053
28054 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
28055 {
28056 fuse_ops[1] = value;
28057 addis_str = "lis %0,%1@ha";
28058 }
28059 }
28060
28061 if (!addis_str)
28062 fatal_insn ("Could not generate addis value for fusion", addis_value);
28063
28064 output_asm_insn (addis_str, fuse_ops);
28065 }
28066
28067 /* Emit a D-form load or store instruction that is the second instruction
28068 of a fusion sequence. */
28069
28070 static void
28071 emit_fusion_load (rtx load_reg, rtx addis_reg, rtx offset, const char *insn_str)
28072 {
28073 rtx fuse_ops[10];
28074 char insn_template[80];
28075
28076 fuse_ops[0] = load_reg;
28077 fuse_ops[1] = addis_reg;
28078
28079 if (CONST_INT_P (offset) && satisfies_constraint_I (offset))
28080 {
28081 sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str);
28082 fuse_ops[2] = offset;
28083 output_asm_insn (insn_template, fuse_ops);
28084 }
28085
28086 else if (GET_CODE (offset) == UNSPEC
28087 && XINT (offset, 1) == UNSPEC_TOCREL)
28088 {
28089 if (TARGET_ELF)
28090 sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str);
28091
28092 else if (TARGET_XCOFF)
28093 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
28094
28095 else
28096 gcc_unreachable ();
28097
28098 fuse_ops[2] = XVECEXP (offset, 0, 0);
28099 output_asm_insn (insn_template, fuse_ops);
28100 }
28101
28102 else if (GET_CODE (offset) == PLUS
28103 && GET_CODE (XEXP (offset, 0)) == UNSPEC
28104 && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL
28105 && CONST_INT_P (XEXP (offset, 1)))
28106 {
28107 rtx tocrel_unspec = XEXP (offset, 0);
28108 if (TARGET_ELF)
28109 sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str);
28110
28111 else if (TARGET_XCOFF)
28112 sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str);
28113
28114 else
28115 gcc_unreachable ();
28116
28117 fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0);
28118 fuse_ops[3] = XEXP (offset, 1);
28119 output_asm_insn (insn_template, fuse_ops);
28120 }
28121
28122 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset))
28123 {
28124 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
28125
28126 fuse_ops[2] = offset;
28127 output_asm_insn (insn_template, fuse_ops);
28128 }
28129
28130 else
28131 fatal_insn ("Unable to generate load/store offset for fusion", offset);
28132
28133 return;
28134 }
28135
28136 /* Given an address, convert it into the addis and load offset parts. Addresses
28137 created during the peephole2 process look like:
28138 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
28139 (unspec [(...)] UNSPEC_TOCREL)) */
28140
28141 static void
28142 fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo)
28143 {
28144 rtx hi, lo;
28145
28146 if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
28147 {
28148 hi = XEXP (addr, 0);
28149 lo = XEXP (addr, 1);
28150 }
28151 else
28152 gcc_unreachable ();
28153
28154 *p_hi = hi;
28155 *p_lo = lo;
28156 }
28157
28158 /* Return a string to fuse an addis instruction with a gpr load to the same
28159 register that we loaded up the addis instruction. The address that is used
28160 is the logical address that was formed during peephole2:
28161 (lo_sum (high) (low-part))
28162
28163 The code is complicated, so we call output_asm_insn directly, and just
28164 return "". */
28165
28166 const char *
28167 emit_fusion_gpr_load (rtx target, rtx mem)
28168 {
28169 rtx addis_value;
28170 rtx addr;
28171 rtx load_offset;
28172 const char *load_str = NULL;
28173 machine_mode mode;
28174
28175 if (GET_CODE (mem) == ZERO_EXTEND)
28176 mem = XEXP (mem, 0);
28177
28178 gcc_assert (REG_P (target) && MEM_P (mem));
28179
28180 addr = XEXP (mem, 0);
28181 fusion_split_address (addr, &addis_value, &load_offset);
28182
28183 /* Now emit the load instruction to the same register. */
28184 mode = GET_MODE (mem);
28185 switch (mode)
28186 {
28187 case E_QImode:
28188 load_str = "lbz";
28189 break;
28190
28191 case E_HImode:
28192 load_str = "lhz";
28193 break;
28194
28195 case E_SImode:
28196 case E_SFmode:
28197 load_str = "lwz";
28198 break;
28199
28200 case E_DImode:
28201 case E_DFmode:
28202 gcc_assert (TARGET_POWERPC64);
28203 load_str = "ld";
28204 break;
28205
28206 default:
28207 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem));
28208 }
28209
28210 /* Emit the addis instruction. */
28211 emit_fusion_addis (target, addis_value);
28212
28213 /* Emit the D-form load instruction. */
28214 emit_fusion_load (target, target, load_offset, load_str);
28215
28216 return "";
28217 }
28218 \f
28219 /* This is not inside an #ifdef RS6000_GLIBC_ATOMIC_FENV because gengtype
28220 ignores it then. */
28221 static GTY(()) tree atomic_hold_decl;
28222 static GTY(()) tree atomic_clear_decl;
28223 static GTY(()) tree atomic_update_decl;
28224
28225 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
28226 static void
28227 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
28228 {
28229 if (!TARGET_HARD_FLOAT)
28230 {
28231 #ifdef RS6000_GLIBC_ATOMIC_FENV
28232 if (atomic_hold_decl == NULL_TREE)
28233 {
28234 atomic_hold_decl
28235 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
28236 get_identifier ("__atomic_feholdexcept"),
28237 build_function_type_list (void_type_node,
28238 double_ptr_type_node,
28239 NULL_TREE));
28240 TREE_PUBLIC (atomic_hold_decl) = 1;
28241 DECL_EXTERNAL (atomic_hold_decl) = 1;
28242 }
28243
28244 if (atomic_clear_decl == NULL_TREE)
28245 {
28246 atomic_clear_decl
28247 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
28248 get_identifier ("__atomic_feclearexcept"),
28249 build_function_type_list (void_type_node,
28250 NULL_TREE));
28251 TREE_PUBLIC (atomic_clear_decl) = 1;
28252 DECL_EXTERNAL (atomic_clear_decl) = 1;
28253 }
28254
28255 tree const_double = build_qualified_type (double_type_node,
28256 TYPE_QUAL_CONST);
28257 tree const_double_ptr = build_pointer_type (const_double);
28258 if (atomic_update_decl == NULL_TREE)
28259 {
28260 atomic_update_decl
28261 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
28262 get_identifier ("__atomic_feupdateenv"),
28263 build_function_type_list (void_type_node,
28264 const_double_ptr,
28265 NULL_TREE));
28266 TREE_PUBLIC (atomic_update_decl) = 1;
28267 DECL_EXTERNAL (atomic_update_decl) = 1;
28268 }
28269
28270 tree fenv_var = create_tmp_var_raw (double_type_node);
28271 TREE_ADDRESSABLE (fenv_var) = 1;
28272 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node,
28273 build4 (TARGET_EXPR, double_type_node, fenv_var,
28274 void_node, NULL_TREE, NULL_TREE));
28275
28276 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
28277 *clear = build_call_expr (atomic_clear_decl, 0);
28278 *update = build_call_expr (atomic_update_decl, 1,
28279 fold_convert (const_double_ptr, fenv_addr));
28280 #endif
28281 return;
28282 }
28283
28284 tree mffs = rs6000_builtin_decls[RS6000_BIF_MFFS];
28285 tree mtfsf = rs6000_builtin_decls[RS6000_BIF_MTFSF];
28286 tree call_mffs = build_call_expr (mffs, 0);
28287
28288 /* Generates the equivalent of feholdexcept (&fenv_var)
28289
28290 *fenv_var = __builtin_mffs ();
28291 double fenv_hold;
28292 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
28293 __builtin_mtfsf (0xff, fenv_hold); */
28294
28295 /* Mask to clear everything except for the rounding modes and non-IEEE
28296 arithmetic flag. */
28297 const unsigned HOST_WIDE_INT hold_exception_mask
28298 = HOST_WIDE_INT_C (0xffffffff00000007);
28299
28300 tree fenv_var = create_tmp_var_raw (double_type_node);
28301
28302 tree hold_mffs = build4 (TARGET_EXPR, double_type_node, fenv_var, call_mffs,
28303 NULL_TREE, NULL_TREE);
28304
28305 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
28306 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
28307 build_int_cst (uint64_type_node,
28308 hold_exception_mask));
28309
28310 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
28311 fenv_llu_and);
28312
28313 tree hold_mtfsf = build_call_expr (mtfsf, 2,
28314 build_int_cst (unsigned_type_node, 0xff),
28315 fenv_hold_mtfsf);
28316
28317 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
28318
28319 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
28320
28321 double fenv_clear = __builtin_mffs ();
28322 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
28323 __builtin_mtfsf (0xff, fenv_clear); */
28324
28325 /* Mask to clear everything except for the rounding modes and non-IEEE
28326 arithmetic flag. */
28327 const unsigned HOST_WIDE_INT clear_exception_mask
28328 = HOST_WIDE_INT_C (0xffffffff00000000);
28329
28330 tree fenv_clear = create_tmp_var_raw (double_type_node);
28331
28332 tree clear_mffs = build4 (TARGET_EXPR, double_type_node, fenv_clear,
28333 call_mffs, NULL_TREE, NULL_TREE);
28334
28335 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
28336 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
28337 fenv_clean_llu,
28338 build_int_cst (uint64_type_node,
28339 clear_exception_mask));
28340
28341 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
28342 fenv_clear_llu_and);
28343
28344 tree clear_mtfsf = build_call_expr (mtfsf, 2,
28345 build_int_cst (unsigned_type_node, 0xff),
28346 fenv_clear_mtfsf);
28347
28348 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
28349
28350 /* Generates the equivalent of feupdateenv (&fenv_var)
28351
28352 double old_fenv = __builtin_mffs ();
28353 double fenv_update;
28354 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
28355 (*(uint64_t*)fenv_var 0x1ff80fff);
28356 __builtin_mtfsf (0xff, fenv_update); */
28357
28358 const unsigned HOST_WIDE_INT update_exception_mask
28359 = HOST_WIDE_INT_C (0xffffffff1fffff00);
28360 const unsigned HOST_WIDE_INT new_exception_mask
28361 = HOST_WIDE_INT_C (0x1ff80fff);
28362
28363 tree old_fenv = create_tmp_var_raw (double_type_node);
28364 tree update_mffs = build4 (TARGET_EXPR, double_type_node, old_fenv,
28365 call_mffs, NULL_TREE, NULL_TREE);
28366
28367 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
28368 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
28369 build_int_cst (uint64_type_node,
28370 update_exception_mask));
28371
28372 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
28373 build_int_cst (uint64_type_node,
28374 new_exception_mask));
28375
28376 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
28377 old_llu_and, new_llu_and);
28378
28379 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
28380 new_llu_mask);
28381
28382 tree update_mtfsf = build_call_expr (mtfsf, 2,
28383 build_int_cst (unsigned_type_node, 0xff),
28384 fenv_update_mtfsf);
28385
28386 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
28387 }
28388
28389 void
28390 rs6000_generate_float2_double_code (rtx dst, rtx src1, rtx src2)
28391 {
28392 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
28393
28394 rtx_tmp0 = gen_reg_rtx (V2DFmode);
28395 rtx_tmp1 = gen_reg_rtx (V2DFmode);
28396
28397 /* The destination of the vmrgew instruction layout is:
28398 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
28399 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
28400 vmrgew instruction will be correct. */
28401 if (BYTES_BIG_ENDIAN)
28402 {
28403 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp0, src1, src2,
28404 GEN_INT (0)));
28405 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp1, src1, src2,
28406 GEN_INT (3)));
28407 }
28408 else
28409 {
28410 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (3)));
28411 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (0)));
28412 }
28413
28414 rtx_tmp2 = gen_reg_rtx (V4SFmode);
28415 rtx_tmp3 = gen_reg_rtx (V4SFmode);
28416
28417 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp2, rtx_tmp0));
28418 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp3, rtx_tmp1));
28419
28420 if (BYTES_BIG_ENDIAN)
28421 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
28422 else
28423 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
28424 }
28425
28426 void
28427 rs6000_generate_float2_code (bool signed_convert, rtx dst, rtx src1, rtx src2)
28428 {
28429 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
28430
28431 rtx_tmp0 = gen_reg_rtx (V2DImode);
28432 rtx_tmp1 = gen_reg_rtx (V2DImode);
28433
28434 /* The destination of the vmrgew instruction layout is:
28435 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
28436 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
28437 vmrgew instruction will be correct. */
28438 if (BYTES_BIG_ENDIAN)
28439 {
28440 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp0, src1, src2, GEN_INT (0)));
28441 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp1, src1, src2, GEN_INT (3)));
28442 }
28443 else
28444 {
28445 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp0, src1, src2, GEN_INT (3)));
28446 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp1, src1, src2, GEN_INT (0)));
28447 }
28448
28449 rtx_tmp2 = gen_reg_rtx (V4SFmode);
28450 rtx_tmp3 = gen_reg_rtx (V4SFmode);
28451
28452 if (signed_convert)
28453 {
28454 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp2, rtx_tmp0));
28455 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp3, rtx_tmp1));
28456 }
28457 else
28458 {
28459 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp2, rtx_tmp0));
28460 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp3, rtx_tmp1));
28461 }
28462
28463 if (BYTES_BIG_ENDIAN)
28464 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
28465 else
28466 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
28467 }
28468
28469 void
28470 rs6000_generate_vsigned2_code (bool signed_convert, rtx dst, rtx src1,
28471 rtx src2)
28472 {
28473 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
28474
28475 rtx_tmp0 = gen_reg_rtx (V2DFmode);
28476 rtx_tmp1 = gen_reg_rtx (V2DFmode);
28477
28478 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (0)));
28479 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (3)));
28480
28481 rtx_tmp2 = gen_reg_rtx (V4SImode);
28482 rtx_tmp3 = gen_reg_rtx (V4SImode);
28483
28484 if (signed_convert)
28485 {
28486 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp2, rtx_tmp0));
28487 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp3, rtx_tmp1));
28488 }
28489 else
28490 {
28491 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp2, rtx_tmp0));
28492 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp3, rtx_tmp1));
28493 }
28494
28495 emit_insn (gen_p8_vmrgew_v4si (dst, rtx_tmp2, rtx_tmp3));
28496 }
28497
28498 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
28499
28500 static bool
28501 rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode,
28502 optimization_type opt_type)
28503 {
28504 switch (op)
28505 {
28506 case rsqrt_optab:
28507 return (opt_type == OPTIMIZE_FOR_SPEED
28508 && RS6000_RECIP_AUTO_RSQRTE_P (mode1));
28509
28510 default:
28511 return true;
28512 }
28513 }
28514
28515 /* Implement TARGET_CONSTANT_ALIGNMENT. */
28516
28517 static HOST_WIDE_INT
28518 rs6000_constant_alignment (const_tree exp, HOST_WIDE_INT align)
28519 {
28520 if (TREE_CODE (exp) == STRING_CST
28521 && (STRICT_ALIGNMENT || !optimize_size))
28522 return MAX (align, BITS_PER_WORD);
28523 return align;
28524 }
28525
28526 /* Implement TARGET_STARTING_FRAME_OFFSET. */
28527
28528 static HOST_WIDE_INT
28529 rs6000_starting_frame_offset (void)
28530 {
28531 if (FRAME_GROWS_DOWNWARD)
28532 return 0;
28533 return RS6000_STARTING_FRAME_OFFSET;
28534 }
28535 \f
28536 /* Internal function to return the built-in function id for the complex
28537 multiply operation for a given mode. */
28538
28539 static inline built_in_function
28540 complex_multiply_builtin_code (machine_mode mode)
28541 {
28542 gcc_assert (IN_RANGE (mode, MIN_MODE_COMPLEX_FLOAT, MAX_MODE_COMPLEX_FLOAT));
28543 int func = BUILT_IN_COMPLEX_MUL_MIN + mode - MIN_MODE_COMPLEX_FLOAT;
28544 return (built_in_function) func;
28545 }
28546
28547 /* Internal function to return the built-in function id for the complex divide
28548 operation for a given mode. */
28549
28550 static inline built_in_function
28551 complex_divide_builtin_code (machine_mode mode)
28552 {
28553 gcc_assert (IN_RANGE (mode, MIN_MODE_COMPLEX_FLOAT, MAX_MODE_COMPLEX_FLOAT));
28554 int func = BUILT_IN_COMPLEX_DIV_MIN + mode - MIN_MODE_COMPLEX_FLOAT;
28555 return (built_in_function) func;
28556 }
28557
28558 /* On 64-bit Linux and Freebsd systems, possibly switch the long double library
28559 function names from <foo>l to <foo>f128 if the default long double type is
28560 IEEE 128-bit. Typically, with the C and C++ languages, the standard math.h
28561 include file switches the names on systems that support long double as IEEE
28562 128-bit, but that doesn't work if the user uses __builtin_<foo>l directly.
28563 In the future, glibc will export names like __ieee128_sinf128 and we can
28564 switch to using those instead of using sinf128, which pollutes the user's
28565 namespace.
28566
28567 This will switch the names for Fortran math functions as well (which doesn't
28568 use math.h). However, Fortran needs other changes to the compiler and
28569 library before you can switch the real*16 type at compile time.
28570
28571 We use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change this name. We
28572 only do this transformation if the __float128 type is enabled. This
28573 prevents us from doing the transformation on older 32-bit ports that might
28574 have enabled using IEEE 128-bit floating point as the default long double
28575 type.
28576
28577 We also use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change the
28578 function names used for complex multiply and divide to the appropriate
28579 names. */
28580
28581 static tree
28582 rs6000_mangle_decl_assembler_name (tree decl, tree id)
28583 {
28584 /* Handle complex multiply/divide. For IEEE 128-bit, use __mulkc3 or
28585 __divkc3 and for IBM 128-bit use __multc3 and __divtc3. */
28586 if (TARGET_FLOAT128_TYPE
28587 && TREE_CODE (decl) == FUNCTION_DECL
28588 && DECL_IS_UNDECLARED_BUILTIN (decl)
28589 && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
28590 {
28591 built_in_function id = DECL_FUNCTION_CODE (decl);
28592 const char *newname = NULL;
28593
28594 if (id == complex_multiply_builtin_code (KCmode))
28595 newname = "__mulkc3";
28596
28597 else if (id == complex_multiply_builtin_code (ICmode))
28598 newname = "__multc3";
28599
28600 else if (id == complex_multiply_builtin_code (TCmode))
28601 newname = (TARGET_IEEEQUAD) ? "__mulkc3" : "__multc3";
28602
28603 else if (id == complex_divide_builtin_code (KCmode))
28604 newname = "__divkc3";
28605
28606 else if (id == complex_divide_builtin_code (ICmode))
28607 newname = "__divtc3";
28608
28609 else if (id == complex_divide_builtin_code (TCmode))
28610 newname = (TARGET_IEEEQUAD) ? "__divkc3" : "__divtc3";
28611
28612 if (newname)
28613 {
28614 if (TARGET_DEBUG_BUILTIN)
28615 fprintf (stderr, "Map complex mul/div => %s\n", newname);
28616
28617 return get_identifier (newname);
28618 }
28619 }
28620
28621 /* Map long double built-in functions if long double is IEEE 128-bit. */
28622 if (TARGET_FLOAT128_TYPE && TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128
28623 && TREE_CODE (decl) == FUNCTION_DECL
28624 && DECL_IS_UNDECLARED_BUILTIN (decl)
28625 && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
28626 {
28627 size_t len = IDENTIFIER_LENGTH (id);
28628 const char *name = IDENTIFIER_POINTER (id);
28629 char *newname = NULL;
28630
28631 /* See if it is one of the built-in functions with an unusual name. */
28632 switch (DECL_FUNCTION_CODE (decl))
28633 {
28634 case BUILT_IN_DREML:
28635 newname = xstrdup ("__remainderieee128");
28636 break;
28637
28638 case BUILT_IN_GAMMAL:
28639 newname = xstrdup ("__lgammaieee128");
28640 break;
28641
28642 case BUILT_IN_GAMMAL_R:
28643 case BUILT_IN_LGAMMAL_R:
28644 newname = xstrdup ("__lgammaieee128_r");
28645 break;
28646
28647 case BUILT_IN_NEXTTOWARD:
28648 newname = xstrdup ("__nexttoward_to_ieee128");
28649 break;
28650
28651 case BUILT_IN_NEXTTOWARDF:
28652 newname = xstrdup ("__nexttowardf_to_ieee128");
28653 break;
28654
28655 case BUILT_IN_NEXTTOWARDL:
28656 newname = xstrdup ("__nexttowardieee128");
28657 break;
28658
28659 case BUILT_IN_POW10L:
28660 newname = xstrdup ("__exp10ieee128");
28661 break;
28662
28663 case BUILT_IN_SCALBL:
28664 newname = xstrdup ("__scalbieee128");
28665 break;
28666
28667 case BUILT_IN_SIGNIFICANDL:
28668 newname = xstrdup ("__significandieee128");
28669 break;
28670
28671 case BUILT_IN_SINCOSL:
28672 newname = xstrdup ("__sincosieee128");
28673 break;
28674
28675 default:
28676 break;
28677 }
28678
28679 /* Update the __builtin_*printf and __builtin_*scanf functions. */
28680 if (!newname)
28681 {
28682 size_t printf_len = strlen ("printf");
28683 size_t scanf_len = strlen ("scanf");
28684 size_t printf_chk_len = strlen ("printf_chk");
28685
28686 if (len >= printf_len
28687 && strcmp (name + len - printf_len, "printf") == 0)
28688 newname = xasprintf ("__%sieee128", name);
28689
28690 else if (len >= scanf_len
28691 && strcmp (name + len - scanf_len, "scanf") == 0)
28692 newname = xasprintf ("__isoc99_%sieee128", name);
28693
28694 else if (len >= printf_chk_len
28695 && strcmp (name + len - printf_chk_len, "printf_chk") == 0)
28696 newname = xasprintf ("%sieee128", name);
28697
28698 else if (name[len - 1] == 'l')
28699 {
28700 bool uses_ieee128_p = false;
28701 tree type = TREE_TYPE (decl);
28702 machine_mode ret_mode = TYPE_MODE (type);
28703
28704 /* See if the function returns a IEEE 128-bit floating point type or
28705 complex type. */
28706 if (ret_mode == TFmode || ret_mode == TCmode)
28707 uses_ieee128_p = true;
28708 else
28709 {
28710 function_args_iterator args_iter;
28711 tree arg;
28712
28713 /* See if the function passes a IEEE 128-bit floating point type
28714 or complex type. */
28715 FOREACH_FUNCTION_ARGS (type, arg, args_iter)
28716 {
28717 machine_mode arg_mode = TYPE_MODE (arg);
28718 if (arg_mode == TFmode || arg_mode == TCmode)
28719 {
28720 uses_ieee128_p = true;
28721 break;
28722 }
28723 }
28724 }
28725
28726 /* If we passed or returned an IEEE 128-bit floating point type,
28727 change the name. Use __<name>ieee128, instead of <name>l. */
28728 if (uses_ieee128_p)
28729 newname = xasprintf ("__%.*sieee128", (int)(len - 1), name);
28730 }
28731 }
28732
28733 if (newname)
28734 {
28735 if (TARGET_DEBUG_BUILTIN)
28736 fprintf (stderr, "Map %s => %s\n", name, newname);
28737
28738 id = get_identifier (newname);
28739 free (newname);
28740 }
28741 }
28742
28743 return id;
28744 }
28745
28746 /* Predict whether the given loop in gimple will be transformed in the RTL
28747 doloop_optimize pass. */
28748
28749 static bool
28750 rs6000_predict_doloop_p (struct loop *loop)
28751 {
28752 gcc_assert (loop);
28753
28754 /* On rs6000, targetm.can_use_doloop_p is actually
28755 can_use_doloop_if_innermost. Just ensure the loop is innermost. */
28756 if (loop->inner != NULL)
28757 {
28758 if (dump_file && (dump_flags & TDF_DETAILS))
28759 fprintf (dump_file, "Predict doloop failure due to"
28760 " loop nesting.\n");
28761 return false;
28762 }
28763
28764 return true;
28765 }
28766
28767 /* Implement TARGET_PREFERRED_DOLOOP_MODE. */
28768
28769 static machine_mode
28770 rs6000_preferred_doloop_mode (machine_mode)
28771 {
28772 return word_mode;
28773 }
28774
28775 /* Implement TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P. */
28776
28777 static bool
28778 rs6000_cannot_substitute_mem_equiv_p (rtx mem)
28779 {
28780 gcc_assert (MEM_P (mem));
28781
28782 /* curr_insn_transform()'s handling of subregs cannot handle altivec AND:
28783 type addresses, so don't allow MEMs with those address types to be
28784 substituted as an equivalent expression. See PR93974 for details. */
28785 if (GET_CODE (XEXP (mem, 0)) == AND)
28786 return true;
28787
28788 return false;
28789 }
28790
28791 /* Implement TARGET_INVALID_CONVERSION. */
28792
28793 static const char *
28794 rs6000_invalid_conversion (const_tree fromtype, const_tree totype)
28795 {
28796 /* Make sure we're working with the canonical types. */
28797 if (TYPE_CANONICAL (fromtype) != NULL_TREE)
28798 fromtype = TYPE_CANONICAL (fromtype);
28799 if (TYPE_CANONICAL (totype) != NULL_TREE)
28800 totype = TYPE_CANONICAL (totype);
28801
28802 machine_mode frommode = TYPE_MODE (fromtype);
28803 machine_mode tomode = TYPE_MODE (totype);
28804
28805 if (frommode != tomode)
28806 {
28807 /* Do not allow conversions to/from XOmode and OOmode types. */
28808 if (frommode == XOmode)
28809 return N_("invalid conversion from type %<__vector_quad%>");
28810 if (tomode == XOmode)
28811 return N_("invalid conversion to type %<__vector_quad%>");
28812 if (frommode == OOmode)
28813 return N_("invalid conversion from type %<__vector_pair%>");
28814 if (tomode == OOmode)
28815 return N_("invalid conversion to type %<__vector_pair%>");
28816 }
28817
28818 /* Conversion allowed. */
28819 return NULL;
28820 }
28821
28822 /* Convert a SFmode constant to the integer bit pattern. */
28823
28824 long
28825 rs6000_const_f32_to_i32 (rtx operand)
28826 {
28827 long value;
28828 const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (operand);
28829
28830 gcc_assert (GET_MODE (operand) == SFmode);
28831 REAL_VALUE_TO_TARGET_SINGLE (*rv, value);
28832 return value;
28833 }
28834
28835 void
28836 rs6000_emit_xxspltidp_v2df (rtx dst, long value)
28837 {
28838 if (((value & 0x7F800000) == 0) && ((value & 0x7FFFFF) != 0))
28839 inform (input_location,
28840 "the result for the xxspltidp instruction "
28841 "is undefined for subnormal input values");
28842 emit_insn( gen_xxspltidp_v2df_inst (dst, GEN_INT (value)));
28843 }
28844
28845 /* Implement TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC. */
28846
28847 static bool
28848 rs6000_gen_pic_addr_diff_vec (void)
28849 {
28850 return rs6000_relative_jumptables;
28851 }
28852
28853 void
28854 rs6000_output_addr_vec_elt (FILE *file, int value)
28855 {
28856 const char *directive = TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t";
28857 char buf[100];
28858
28859 fprintf (file, "%s", directive);
28860 ASM_GENERATE_INTERNAL_LABEL (buf, "L", value);
28861 assemble_name (file, buf);
28862 fprintf (file, "\n");
28863 }
28864
28865 \f
28866 /* Copy an integer constant to the vector constant structure. */
28867
28868 static void
28869 constant_int_to_128bit_vector (rtx op,
28870 machine_mode mode,
28871 size_t byte_num,
28872 vec_const_128bit_type *info)
28873 {
28874 unsigned HOST_WIDE_INT uvalue = UINTVAL (op);
28875 unsigned bitsize = GET_MODE_BITSIZE (mode);
28876
28877 for (int shift = bitsize - 8; shift >= 0; shift -= 8)
28878 info->bytes[byte_num++] = (uvalue >> shift) & 0xff;
28879 }
28880
28881 /* Copy a floating point constant to the vector constant structure. */
28882
28883 static void
28884 constant_fp_to_128bit_vector (rtx op,
28885 machine_mode mode,
28886 size_t byte_num,
28887 vec_const_128bit_type *info)
28888 {
28889 unsigned bitsize = GET_MODE_BITSIZE (mode);
28890 unsigned num_words = bitsize / 32;
28891 const REAL_VALUE_TYPE *rtype = CONST_DOUBLE_REAL_VALUE (op);
28892 long real_words[VECTOR_128BIT_WORDS];
28893
28894 /* Make sure we don't overflow the real_words array and that it is
28895 filled completely. */
28896 gcc_assert (num_words <= VECTOR_128BIT_WORDS && (bitsize % 32) == 0);
28897
28898 real_to_target (real_words, rtype, mode);
28899
28900 /* Iterate over each 32-bit word in the floating point constant. The
28901 real_to_target function puts out words in target endian fashion. We need
28902 to arrange the order so that the bytes are written in big endian order. */
28903 for (unsigned num = 0; num < num_words; num++)
28904 {
28905 unsigned endian_num = (BYTES_BIG_ENDIAN
28906 ? num
28907 : num_words - 1 - num);
28908
28909 unsigned uvalue = real_words[endian_num];
28910 for (int shift = 32 - 8; shift >= 0; shift -= 8)
28911 info->bytes[byte_num++] = (uvalue >> shift) & 0xff;
28912 }
28913
28914 /* Mark that this constant involves floating point. */
28915 info->fp_constant_p = true;
28916 }
28917
28918 /* Convert a vector constant OP with mode MODE to a vector 128-bit constant
28919 structure INFO.
28920
28921 Break out the constant out to bytes, half words, words, and double words.
28922 Return true if we have successfully converted the constant.
28923
28924 We handle CONST_INT, CONST_DOUBLE, CONST_VECTOR, and VEC_DUPLICATE of
28925 constants. Integer and floating point scalar constants are splatted to fill
28926 out the vector. */
28927
28928 bool
28929 vec_const_128bit_to_bytes (rtx op,
28930 machine_mode mode,
28931 vec_const_128bit_type *info)
28932 {
28933 /* Initialize the constant structure. */
28934 memset ((void *)info, 0, sizeof (vec_const_128bit_type));
28935
28936 /* Assume CONST_INTs are DImode. */
28937 if (mode == VOIDmode)
28938 mode = CONST_INT_P (op) ? DImode : GET_MODE (op);
28939
28940 if (mode == VOIDmode)
28941 return false;
28942
28943 unsigned size = GET_MODE_SIZE (mode);
28944 bool splat_p = false;
28945
28946 if (size > VECTOR_128BIT_BYTES)
28947 return false;
28948
28949 /* Set up the bits. */
28950 switch (GET_CODE (op))
28951 {
28952 /* Integer constants, default to double word. */
28953 case CONST_INT:
28954 {
28955 constant_int_to_128bit_vector (op, mode, 0, info);
28956 splat_p = true;
28957 break;
28958 }
28959
28960 /* Floating point constants. */
28961 case CONST_DOUBLE:
28962 {
28963 /* Fail if the floating point constant is the wrong mode. */
28964 if (GET_MODE (op) != mode)
28965 return false;
28966
28967 /* SFmode stored as scalars are stored in DFmode format. */
28968 if (mode == SFmode)
28969 {
28970 mode = DFmode;
28971 size = GET_MODE_SIZE (DFmode);
28972 }
28973
28974 constant_fp_to_128bit_vector (op, mode, 0, info);
28975 splat_p = true;
28976 break;
28977 }
28978
28979 /* Vector constants, iterate over each element. On little endian
28980 systems, we have to reverse the element numbers. */
28981 case CONST_VECTOR:
28982 {
28983 /* Fail if the vector constant is the wrong mode or size. */
28984 if (GET_MODE (op) != mode
28985 || GET_MODE_SIZE (mode) != VECTOR_128BIT_BYTES)
28986 return false;
28987
28988 machine_mode ele_mode = GET_MODE_INNER (mode);
28989 size_t ele_size = GET_MODE_SIZE (ele_mode);
28990 size_t nunits = GET_MODE_NUNITS (mode);
28991
28992 for (size_t num = 0; num < nunits; num++)
28993 {
28994 rtx ele = CONST_VECTOR_ELT (op, num);
28995 size_t byte_num = (BYTES_BIG_ENDIAN
28996 ? num
28997 : nunits - 1 - num) * ele_size;
28998
28999 if (CONST_INT_P (ele))
29000 constant_int_to_128bit_vector (ele, ele_mode, byte_num, info);
29001 else if (CONST_DOUBLE_P (ele))
29002 constant_fp_to_128bit_vector (ele, ele_mode, byte_num, info);
29003 else
29004 return false;
29005 }
29006
29007 break;
29008 }
29009
29010 /* Treat VEC_DUPLICATE of a constant just like a vector constant.
29011 Since we are duplicating the element, we don't have to worry about
29012 endian issues. */
29013 case VEC_DUPLICATE:
29014 {
29015 /* Fail if the vector duplicate is the wrong mode or size. */
29016 if (GET_MODE (op) != mode
29017 || GET_MODE_SIZE (mode) != VECTOR_128BIT_BYTES)
29018 return false;
29019
29020 machine_mode ele_mode = GET_MODE_INNER (mode);
29021 size_t ele_size = GET_MODE_SIZE (ele_mode);
29022 rtx ele = XEXP (op, 0);
29023 size_t nunits = GET_MODE_NUNITS (mode);
29024
29025 if (!CONST_INT_P (ele) && !CONST_DOUBLE_P (ele))
29026 return false;
29027
29028 for (size_t num = 0; num < nunits; num++)
29029 {
29030 size_t byte_num = num * ele_size;
29031
29032 if (CONST_INT_P (ele))
29033 constant_int_to_128bit_vector (ele, ele_mode, byte_num, info);
29034 else
29035 constant_fp_to_128bit_vector (ele, ele_mode, byte_num, info);
29036 }
29037
29038 break;
29039 }
29040
29041 /* Any thing else, just return failure. */
29042 default:
29043 return false;
29044 }
29045
29046 /* Splat the constant to fill 128 bits if desired. */
29047 if (splat_p && size < VECTOR_128BIT_BYTES)
29048 {
29049 if ((VECTOR_128BIT_BYTES % size) != 0)
29050 return false;
29051
29052 for (size_t offset = size;
29053 offset < VECTOR_128BIT_BYTES;
29054 offset += size)
29055 memcpy ((void *) &info->bytes[offset],
29056 (void *) &info->bytes[0],
29057 size);
29058 }
29059
29060 /* Remember original size. */
29061 info->original_size = size;
29062
29063 /* Determine if the bytes are all the same. */
29064 unsigned char first_byte = info->bytes[0];
29065 info->all_bytes_same = true;
29066 for (size_t i = 1; i < VECTOR_128BIT_BYTES; i++)
29067 if (first_byte != info->bytes[i])
29068 {
29069 info->all_bytes_same = false;
29070 break;
29071 }
29072
29073 /* Pack half words together & determine if all of the half words are the
29074 same. */
29075 for (size_t i = 0; i < VECTOR_128BIT_HALF_WORDS; i++)
29076 info->half_words[i] = ((info->bytes[i * 2] << 8)
29077 | info->bytes[(i * 2) + 1]);
29078
29079 unsigned short first_hword = info->half_words[0];
29080 info->all_half_words_same = true;
29081 for (size_t i = 1; i < VECTOR_128BIT_HALF_WORDS; i++)
29082 if (first_hword != info->half_words[i])
29083 {
29084 info->all_half_words_same = false;
29085 break;
29086 }
29087
29088 /* Pack words together & determine if all of the words are the same. */
29089 for (size_t i = 0; i < VECTOR_128BIT_WORDS; i++)
29090 info->words[i] = ((info->bytes[i * 4] << 24)
29091 | (info->bytes[(i * 4) + 1] << 16)
29092 | (info->bytes[(i * 4) + 2] << 8)
29093 | info->bytes[(i * 4) + 3]);
29094
29095 info->all_words_same
29096 = (info->words[0] == info->words[1]
29097 && info->words[0] == info->words[2]
29098 && info->words[0] == info->words[3]);
29099
29100 /* Pack double words together & determine if all of the double words are the
29101 same. */
29102 for (size_t i = 0; i < VECTOR_128BIT_DOUBLE_WORDS; i++)
29103 {
29104 unsigned HOST_WIDE_INT d_word = 0;
29105 for (size_t j = 0; j < 8; j++)
29106 d_word = (d_word << 8) | info->bytes[(i * 8) + j];
29107
29108 info->double_words[i] = d_word;
29109 }
29110
29111 info->all_double_words_same
29112 = (info->double_words[0] == info->double_words[1]);
29113
29114 return true;
29115 }
29116
29117 /* Determine if an IEEE 128-bit constant can be loaded with LXVKQ. Return zero
29118 if the LXVKQ instruction cannot be used. Otherwise return the immediate
29119 value to be used with the LXVKQ instruction. */
29120
29121 unsigned
29122 constant_generates_lxvkq (vec_const_128bit_type *vsx_const)
29123 {
29124 /* Is the instruction supported with power10 code generation, IEEE 128-bit
29125 floating point hardware and VSX registers are available. */
29126 if (!TARGET_IEEE128_CONSTANT || !TARGET_FLOAT128_HW || !TARGET_POWER10
29127 || !TARGET_VSX)
29128 return 0;
29129
29130 /* All of the constants that are generated by LXVKQ have the bottom 3 words
29131 that are 0. */
29132 if (vsx_const->words[1] != 0
29133 || vsx_const->words[2] != 0
29134 || vsx_const->words[3] != 0)
29135 return 0;
29136
29137 /* See if we have a match for the first word. */
29138 switch (vsx_const->words[0])
29139 {
29140 case 0x3FFF0000U: return 1; /* IEEE 128-bit +1.0. */
29141 case 0x40000000U: return 2; /* IEEE 128-bit +2.0. */
29142 case 0x40008000U: return 3; /* IEEE 128-bit +3.0. */
29143 case 0x40010000U: return 4; /* IEEE 128-bit +4.0. */
29144 case 0x40014000U: return 5; /* IEEE 128-bit +5.0. */
29145 case 0x40018000U: return 6; /* IEEE 128-bit +6.0. */
29146 case 0x4001C000U: return 7; /* IEEE 128-bit +7.0. */
29147 case 0x7FFF0000U: return 8; /* IEEE 128-bit +Infinity. */
29148 case 0x7FFF8000U: return 9; /* IEEE 128-bit quiet NaN. */
29149 case 0x80000000U: return 16; /* IEEE 128-bit -0.0. */
29150 case 0xBFFF0000U: return 17; /* IEEE 128-bit -1.0. */
29151 case 0xC0000000U: return 18; /* IEEE 128-bit -2.0. */
29152 case 0xC0008000U: return 19; /* IEEE 128-bit -3.0. */
29153 case 0xC0010000U: return 20; /* IEEE 128-bit -4.0. */
29154 case 0xC0014000U: return 21; /* IEEE 128-bit -5.0. */
29155 case 0xC0018000U: return 22; /* IEEE 128-bit -6.0. */
29156 case 0xC001C000U: return 23; /* IEEE 128-bit -7.0. */
29157 case 0xFFFF0000U: return 24; /* IEEE 128-bit -Infinity. */
29158
29159 /* anything else cannot be loaded. */
29160 default:
29161 break;
29162 }
29163
29164 return 0;
29165 }
29166
29167 /* Determine if a vector constant can be loaded with XXSPLTIW. Return zero if
29168 the XXSPLTIW instruction cannot be used. Otherwise return the immediate
29169 value to be used with the XXSPLTIW instruction. */
29170
29171 unsigned
29172 constant_generates_xxspltiw (vec_const_128bit_type *vsx_const)
29173 {
29174 if (!TARGET_SPLAT_WORD_CONSTANT || !TARGET_PREFIXED || !TARGET_VSX)
29175 return 0;
29176
29177 if (!vsx_const->all_words_same)
29178 return 0;
29179
29180 /* If we can use XXSPLTIB, don't generate XXSPLTIW. */
29181 if (vsx_const->all_bytes_same)
29182 return 0;
29183
29184 /* See if we can use VSPLTISH or VSPLTISW. */
29185 if (vsx_const->all_half_words_same)
29186 {
29187 short sign_h_word = vsx_const->half_words[0];
29188 if (EASY_VECTOR_15 (sign_h_word))
29189 return 0;
29190 }
29191
29192 int sign_word = vsx_const->words[0];
29193 if (EASY_VECTOR_15 (sign_word))
29194 return 0;
29195
29196 return vsx_const->words[0];
29197 }
29198
29199 /* Determine if a vector constant can be loaded with XXSPLTIDP. Return zero if
29200 the XXSPLTIDP instruction cannot be used. Otherwise return the immediate
29201 value to be used with the XXSPLTIDP instruction. */
29202
29203 unsigned
29204 constant_generates_xxspltidp (vec_const_128bit_type *vsx_const)
29205 {
29206 if (!TARGET_SPLAT_FLOAT_CONSTANT || !TARGET_PREFIXED || !TARGET_VSX)
29207 return 0;
29208
29209 /* Reject if the two 64-bit segments are not the same. */
29210 if (!vsx_const->all_double_words_same)
29211 return 0;
29212
29213 /* If the bytes, half words, or words are all the same, don't use XXSPLTIDP.
29214 Use a simpler instruction (XXSPLTIB, VSPLTISB, VSPLTISH, or VSPLTISW). */
29215 if (vsx_const->all_bytes_same
29216 || vsx_const->all_half_words_same
29217 || vsx_const->all_words_same)
29218 return 0;
29219
29220 unsigned HOST_WIDE_INT value = vsx_const->double_words[0];
29221
29222 /* Avoid values that look like DFmode NaN's, except for the normal NaN bit
29223 pattern and the signalling NaN bit pattern. Recognize infinity and
29224 negative infinity. */
29225
29226 /* Bit representation of DFmode normal quiet NaN. */
29227 #define RS6000_CONST_DF_NAN HOST_WIDE_INT_UC (0x7ff8000000000000)
29228
29229 /* Bit representation of DFmode normal signaling NaN. */
29230 #define RS6000_CONST_DF_NANS HOST_WIDE_INT_UC (0x7ff4000000000000)
29231
29232 /* Bit representation of DFmode positive infinity. */
29233 #define RS6000_CONST_DF_INF HOST_WIDE_INT_UC (0x7ff0000000000000)
29234
29235 /* Bit representation of DFmode negative infinity. */
29236 #define RS6000_CONST_DF_NEG_INF HOST_WIDE_INT_UC (0xfff0000000000000)
29237
29238 if (value != RS6000_CONST_DF_NAN
29239 && value != RS6000_CONST_DF_NANS
29240 && value != RS6000_CONST_DF_INF
29241 && value != RS6000_CONST_DF_NEG_INF)
29242 {
29243 /* The IEEE 754 64-bit floating format has 1 bit for sign, 11 bits for
29244 the exponent, and 52 bits for the mantissa (not counting the hidden
29245 bit used for normal numbers). NaN values have the exponent set to all
29246 1 bits, and the mantissa non-zero (mantissa == 0 is infinity). */
29247
29248 int df_exponent = (value >> 52) & 0x7ff;
29249 unsigned HOST_WIDE_INT
29250 df_mantissa = value & ((HOST_WIDE_INT_1U << 52) - HOST_WIDE_INT_1U);
29251
29252 if (df_exponent == 0x7ff && df_mantissa != 0) /* other NaNs. */
29253 return 0;
29254
29255 /* Avoid values that are DFmode subnormal values. Subnormal numbers have
29256 the exponent all 0 bits, and the mantissa non-zero. If the value is
29257 subnormal, then the hidden bit in the mantissa is not set. */
29258 if (df_exponent == 0 && df_mantissa != 0) /* subnormal. */
29259 return 0;
29260 }
29261
29262 /* Change the representation to DFmode constant. */
29263 long df_words[2] = { vsx_const->words[0], vsx_const->words[1] };
29264
29265 /* real_from_target takes the target words in target order. */
29266 if (!BYTES_BIG_ENDIAN)
29267 std::swap (df_words[0], df_words[1]);
29268
29269 REAL_VALUE_TYPE rv_type;
29270 real_from_target (&rv_type, df_words, DFmode);
29271
29272 const REAL_VALUE_TYPE *rv = &rv_type;
29273
29274 /* Validate that the number can be stored as a SFmode value. */
29275 if (!exact_real_truncate (SFmode, rv))
29276 return 0;
29277
29278 /* Validate that the number is not a SFmode subnormal value (exponent is 0,
29279 mantissa field is non-zero) which is undefined for the XXSPLTIDP
29280 instruction. */
29281 long sf_value;
29282 real_to_target (&sf_value, rv, SFmode);
29283
29284 /* IEEE 754 32-bit values have 1 bit for the sign, 8 bits for the exponent,
29285 and 23 bits for the mantissa. Subnormal numbers have the exponent all
29286 0 bits, and the mantissa non-zero. */
29287 long sf_exponent = (sf_value >> 23) & 0xFF;
29288 long sf_mantissa = sf_value & 0x7FFFFF;
29289
29290 if (sf_exponent == 0 && sf_mantissa != 0)
29291 return 0;
29292
29293 /* Return the immediate to be used. */
29294 return sf_value;
29295 }
29296
29297 /* Now we have only two opaque types, they are __vector_quad and
29298 __vector_pair built-in types. They are target specific and
29299 only available when MMA is supported. With MMA supported, it
29300 simply returns true, otherwise it checks if the given gimple
29301 STMT is an assignment, asm or call stmt and uses either of
29302 these two opaque types unexpectedly, if yes, it would raise
29303 an error message and returns true, otherwise it returns false. */
29304
29305 bool
29306 rs6000_opaque_type_invalid_use_p (gimple *stmt)
29307 {
29308 if (TARGET_MMA)
29309 return false;
29310
29311 /* If the given TYPE is one MMA opaque type, emit the corresponding
29312 error messages and return true, otherwise return false. */
29313 auto check_and_error_invalid_use = [](tree type)
29314 {
29315 tree mv = TYPE_MAIN_VARIANT (type);
29316 if (mv == vector_quad_type_node)
29317 {
29318 error ("type %<__vector_quad%> requires the %qs option", "-mmma");
29319 return true;
29320 }
29321 else if (mv == vector_pair_type_node)
29322 {
29323 error ("type %<__vector_pair%> requires the %qs option", "-mmma");
29324 return true;
29325 }
29326 return false;
29327 };
29328
29329 if (stmt)
29330 {
29331 /* The usage of MMA opaque types is very limited for now,
29332 to check with gassign, gasm and gcall is enough so far. */
29333 if (gassign *ga = dyn_cast<gassign *> (stmt))
29334 {
29335 tree lhs = gimple_assign_lhs (ga);
29336 tree type = TREE_TYPE (lhs);
29337 if (check_and_error_invalid_use (type))
29338 return true;
29339 }
29340 else if (gasm *gs = dyn_cast<gasm *> (stmt))
29341 {
29342 unsigned ninputs = gimple_asm_ninputs (gs);
29343 for (unsigned i = 0; i < ninputs; i++)
29344 {
29345 tree op = gimple_asm_input_op (gs, i);
29346 tree val = TREE_VALUE (op);
29347 tree type = TREE_TYPE (val);
29348 if (check_and_error_invalid_use (type))
29349 return true;
29350 }
29351 unsigned noutputs = gimple_asm_noutputs (gs);
29352 for (unsigned i = 0; i < noutputs; i++)
29353 {
29354 tree op = gimple_asm_output_op (gs, i);
29355 tree val = TREE_VALUE (op);
29356 tree type = TREE_TYPE (val);
29357 if (check_and_error_invalid_use (type))
29358 return true;
29359 }
29360 }
29361 else if (gcall *gc = dyn_cast<gcall *> (stmt))
29362 {
29363 unsigned nargs = gimple_call_num_args (gc);
29364 for (unsigned i = 0; i < nargs; i++)
29365 {
29366 tree arg = gimple_call_arg (gc, i);
29367 tree type = TREE_TYPE (arg);
29368 if (check_and_error_invalid_use (type))
29369 return true;
29370 }
29371 }
29372 }
29373
29374 return false;
29375 }
29376
29377 struct gcc_target targetm = TARGET_INITIALIZER;
29378
29379 #include "gt-rs6000.h"
This page took 2.87736 seconds and 6 git commands to generate.