]> gcc.gnu.org Git - gcc.git/blob - gcc/config/rs6000/rs6000.cc
Revert patches
[gcc.git] / gcc / config / rs6000 / rs6000.cc
1 // SPDX-License-Identifier: GPL-3.0-or-later
2 /* Subroutines used for code generation on IBM RS/6000.
3 Copyright (C) 1991-2023 Free Software Foundation, Inc.
4 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published
10 by the Free Software Foundation; either version 3, or (at your
11 option) any later version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
16 License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #define IN_TARGET_CODE 1
23
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "backend.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "memmodel.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "cfgloop.h"
34 #include "df.h"
35 #include "tm_p.h"
36 #include "stringpool.h"
37 #include "expmed.h"
38 #include "optabs.h"
39 #include "regs.h"
40 #include "ira.h"
41 #include "recog.h"
42 #include "cgraph.h"
43 #include "diagnostic-core.h"
44 #include "insn-attr.h"
45 #include "flags.h"
46 #include "alias.h"
47 #include "fold-const.h"
48 #include "attribs.h"
49 #include "stor-layout.h"
50 #include "calls.h"
51 #include "print-tree.h"
52 #include "varasm.h"
53 #include "explow.h"
54 #include "expr.h"
55 #include "output.h"
56 #include "common/common-target.h"
57 #include "langhooks.h"
58 #include "reload.h"
59 #include "sched-int.h"
60 #include "gimplify.h"
61 #include "gimple-iterator.h"
62 #include "gimple-fold.h"
63 #include "gimple-walk.h"
64 #include "ssa.h"
65 #include "tree-vectorizer.h"
66 #include "tree-ssa-propagate.h"
67 #include "intl.h"
68 #include "tm-constrs.h"
69 #include "target-globals.h"
70 #include "builtins.h"
71 #include "tree-vector-builder.h"
72 #include "context.h"
73 #include "tree-pass.h"
74 #include "symbol-summary.h"
75 #include "ipa-prop.h"
76 #include "ipa-fnsummary.h"
77 #include "except.h"
78 #include "case-cfn-macros.h"
79 #include "ppc-auxv.h"
80 #include "rs6000-internal.h"
81 #include "opts.h"
82
83 /* This file should be included last. */
84 #include "target-def.h"
85
86 extern tree rs6000_builtin_mask_for_load (void);
87 extern tree rs6000_builtin_md_vectorized_function (tree, tree, tree);
88 extern tree rs6000_builtin_reciprocal (tree);
89
90 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
91 systems will also set long double to be IEEE 128-bit. AIX and Darwin
92 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
93 those systems will not pick up this default. This needs to be after all
94 of the include files, so that POWERPC_LINUX and POWERPC_FREEBSD are
95 properly defined. */
96 #ifndef TARGET_IEEEQUAD_DEFAULT
97 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
98 #define TARGET_IEEEQUAD_DEFAULT 1
99 #else
100 #define TARGET_IEEEQUAD_DEFAULT 0
101 #endif
102 #endif
103
104 /* Don't enable PC-relative addressing if the target does not support it. */
105 #ifndef PCREL_SUPPORTED_BY_OS
106 #define PCREL_SUPPORTED_BY_OS 0
107 #endif
108
109 #ifdef USING_ELFOS_H
110 /* Counter for labels which are to be placed in .fixup. */
111 int fixuplabelno = 0;
112 #endif
113
114 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
115 int dot_symbols;
116
117 /* Specify the machine mode that pointers have. After generation of rtl, the
118 compiler makes no further distinction between pointers and any other objects
119 of this machine mode. */
120 scalar_int_mode rs6000_pmode;
121
122 /* Track use of r13 in 64bit AIX TLS. */
123 static bool xcoff_tls_exec_model_detected = false;
124
125 /* Width in bits of a pointer. */
126 unsigned rs6000_pointer_size;
127
128 #ifdef HAVE_AS_GNU_ATTRIBUTE
129 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
130 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
131 # endif
132 /* Flag whether floating point values have been passed/returned.
133 Note that this doesn't say whether fprs are used, since the
134 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
135 should be set for soft-float values passed in gprs and ieee128
136 values passed in vsx registers. */
137 bool rs6000_passes_float = false;
138 bool rs6000_passes_long_double = false;
139 /* Flag whether vector values have been passed/returned. */
140 bool rs6000_passes_vector = false;
141 /* Flag whether small (<= 8 byte) structures have been returned. */
142 bool rs6000_returns_struct = false;
143 #endif
144
145 /* Value is TRUE if register/mode pair is acceptable. */
146 static bool rs6000_hard_regno_mode_ok_p
147 [NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
148
149 /* Maximum number of registers needed for a given register class and mode. */
150 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
151
152 /* How many registers are needed for a given register and mode. */
153 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
154
155 /* Map register number to register class. */
156 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
157
158 static int dbg_cost_ctrl;
159
160 /* Flag to say the TOC is initialized */
161 int toc_initialized, need_toc_init;
162 char toc_label_name[10];
163
164 /* Cached value of rs6000_variable_issue. This is cached in
165 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
166 static short cached_can_issue_more;
167
168 static GTY(()) section *read_only_data_section;
169 static GTY(()) section *private_data_section;
170 static GTY(()) section *tls_data_section;
171 static GTY(()) section *tls_private_data_section;
172 static GTY(()) section *read_only_private_data_section;
173 static GTY(()) section *sdata2_section;
174
175 section *toc_section = 0;
176
177 /* Describe the vector unit used for modes. */
178 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
179 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
180
181 /* Register classes for various constraints that are based on the target
182 switches. */
183 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
184
185 /* Describe the alignment of a vector. */
186 int rs6000_vector_align[NUM_MACHINE_MODES];
187
188 /* What modes to automatically generate reciprocal divide estimate (fre) and
189 reciprocal sqrt (frsqrte) for. */
190 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
191
192 /* Masks to determine which reciprocal esitmate instructions to generate
193 automatically. */
194 enum rs6000_recip_mask {
195 RECIP_SF_DIV = 0x001, /* Use divide estimate */
196 RECIP_DF_DIV = 0x002,
197 RECIP_V4SF_DIV = 0x004,
198 RECIP_V2DF_DIV = 0x008,
199
200 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
201 RECIP_DF_RSQRT = 0x020,
202 RECIP_V4SF_RSQRT = 0x040,
203 RECIP_V2DF_RSQRT = 0x080,
204
205 /* Various combination of flags for -mrecip=xxx. */
206 RECIP_NONE = 0,
207 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
208 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
209 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
210
211 RECIP_HIGH_PRECISION = RECIP_ALL,
212
213 /* On low precision machines like the power5, don't enable double precision
214 reciprocal square root estimate, since it isn't accurate enough. */
215 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
216 };
217
218 /* -mrecip options. */
219 static struct
220 {
221 const char *string; /* option name */
222 unsigned int mask; /* mask bits to set */
223 } recip_options[] = {
224 { "all", RECIP_ALL },
225 { "none", RECIP_NONE },
226 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
227 | RECIP_V2DF_DIV) },
228 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
229 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
230 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
231 | RECIP_V2DF_RSQRT) },
232 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
233 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
234 };
235
236 /* On PowerPC, we have a limited number of target clones that we care about
237 which means we can use an array to hold the options, rather than having more
238 elaborate data structures to identify each possible variation. Order the
239 clones from the default to the highest ISA. */
240 enum {
241 CLONE_DEFAULT = 0, /* default clone. */
242 CLONE_ISA_2_05, /* ISA 2.05 (power6). */
243 CLONE_ISA_2_06, /* ISA 2.06 (power7). */
244 CLONE_ISA_2_07, /* ISA 2.07 (power8). */
245 CLONE_ISA_3_00, /* ISA 3.0 (power9). */
246 CLONE_ISA_3_1, /* ISA 3.1 (power10). */
247 CLONE_MAX
248 };
249
250 /* Map compiler ISA bits into HWCAP names. */
251 struct clone_map {
252 HOST_WIDE_INT isa_mask; /* rs6000_isa mask */
253 const char *name; /* name to use in __builtin_cpu_supports. */
254 };
255
256 static const struct clone_map rs6000_clone_map[CLONE_MAX] = {
257 { 0, "" }, /* Default options. */
258 { OPTION_MASK_CMPB, "arch_2_05" }, /* ISA 2.05 (power6). */
259 { OPTION_MASK_POPCNTD, "arch_2_06" }, /* ISA 2.06 (power7). */
260 { OPTION_MASK_P8_VECTOR, "arch_2_07" }, /* ISA 2.07 (power8). */
261 { OPTION_MASK_P9_VECTOR, "arch_3_00" }, /* ISA 3.0 (power9). */
262 { OPTION_MASK_POWER10, "arch_3_1" }, /* ISA 3.1 (power10). */
263 };
264
265
266 /* Newer LIBCs explicitly export this symbol to declare that they provide
267 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
268 reference to this symbol whenever we expand a CPU builtin, so that
269 we never link against an old LIBC. */
270 const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform";
271
272 /* True if we have expanded a CPU builtin. */
273 bool cpu_builtin_p = false;
274
275 /* Pointer to function (in rs6000-c.cc) that can define or undefine target
276 macros that have changed. Languages that don't support the preprocessor
277 don't link in rs6000-c.cc, so we can't call it directly. */
278 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT);
279
280 /* Simplfy register classes into simpler classifications. We assume
281 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
282 check for standard register classes (gpr/floating/altivec/vsx) and
283 floating/vector classes (float/altivec/vsx). */
284
285 enum rs6000_reg_type {
286 NO_REG_TYPE,
287 PSEUDO_REG_TYPE,
288 GPR_REG_TYPE,
289 VSX_REG_TYPE,
290 ALTIVEC_REG_TYPE,
291 FPR_REG_TYPE,
292 SPR_REG_TYPE,
293 CR_REG_TYPE,
294 DMR_REG_TYPE
295 };
296
297 /* Map register class to register type. */
298 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
299
300 /* First/last register type for the 'normal' register types (i.e. general
301 purpose, floating point, altivec, and VSX registers). */
302 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
303
304 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
305
306
307 /* Register classes we care about in secondary reload or go if legitimate
308 address. We only need to worry about GPR, FPR, Altivec, and DMR registers
309 here, along an ANY field that is the OR of the 4 register classes. */
310
311 enum rs6000_reload_reg_type {
312 RELOAD_REG_GPR, /* General purpose registers. */
313 RELOAD_REG_FPR, /* Traditional floating point regs. */
314 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
315 RELOAD_REG_DMR, /* DMR registers. */
316 RELOAD_REG_ANY, /* OR of GPR/FPR/VMX/DMR masks. */
317 N_RELOAD_REG
318 };
319
320 /* For setting up register classes, loop through the 4 register classes mapping
321 into real registers, and skip the ANY class, which is just an OR of the
322 bits. */
323 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
324 #define LAST_RELOAD_REG_CLASS RELOAD_REG_DMR
325
326 /* Map reload register type to a register in the register class. */
327 struct reload_reg_map_type {
328 const char *name; /* Register class name. */
329 int reg; /* Register in the register class. */
330 };
331
332 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
333 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
334 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
335 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
336 { "DMR", FIRST_DMR_REGNO }, /* RELOAD_REG_DMR. */
337 { "Any", -1 }, /* RELOAD_REG_ANY. */
338 };
339
340 /* Mask bits for each register class, indexed per mode. Historically the
341 compiler has been more restrictive which types can do PRE_MODIFY instead of
342 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
343 typedef unsigned char addr_mask_type;
344
345 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
346 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
347 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
348 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
349 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
350 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
351 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
352 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
353
354 /* Register type masks based on the type, of valid addressing modes. */
355 struct rs6000_reg_addr {
356 enum insn_code reload_load; /* INSN to reload for loading. */
357 enum insn_code reload_store; /* INSN to reload for storing. */
358 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
359 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
360 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
361 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
362 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
363 };
364
365 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
366
367 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
368 static inline bool
369 mode_supports_pre_incdec_p (machine_mode mode)
370 {
371 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
372 != 0);
373 }
374
375 /* Helper function to say whether a mode supports PRE_MODIFY. */
376 static inline bool
377 mode_supports_pre_modify_p (machine_mode mode)
378 {
379 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
380 != 0);
381 }
382
383 /* Return true if we have D-form addressing in altivec registers. */
384 static inline bool
385 mode_supports_vmx_dform (machine_mode mode)
386 {
387 return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0);
388 }
389
390 /* Return true if we have D-form addressing in VSX registers. This addressing
391 is more limited than normal d-form addressing in that the offset must be
392 aligned on a 16-byte boundary. */
393 static inline bool
394 mode_supports_dq_form (machine_mode mode)
395 {
396 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET)
397 != 0);
398 }
399
400 /* Given that there exists at least one variable that is set (produced)
401 by OUT_INSN and read (consumed) by IN_INSN, return true iff
402 IN_INSN represents one or more memory store operations and none of
403 the variables set by OUT_INSN is used by IN_INSN as the address of a
404 store operation. If either IN_INSN or OUT_INSN does not represent
405 a "single" RTL SET expression (as loosely defined by the
406 implementation of the single_set function) or a PARALLEL with only
407 SETs, CLOBBERs, and USEs inside, this function returns false.
408
409 This rs6000-specific version of store_data_bypass_p checks for
410 certain conditions that result in assertion failures (and internal
411 compiler errors) in the generic store_data_bypass_p function and
412 returns false rather than calling store_data_bypass_p if one of the
413 problematic conditions is detected. */
414
415 int
416 rs6000_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
417 {
418 rtx out_set, in_set;
419 rtx out_pat, in_pat;
420 rtx out_exp, in_exp;
421 int i, j;
422
423 in_set = single_set (in_insn);
424 if (in_set)
425 {
426 if (MEM_P (SET_DEST (in_set)))
427 {
428 out_set = single_set (out_insn);
429 if (!out_set)
430 {
431 out_pat = PATTERN (out_insn);
432 if (GET_CODE (out_pat) == PARALLEL)
433 {
434 for (i = 0; i < XVECLEN (out_pat, 0); i++)
435 {
436 out_exp = XVECEXP (out_pat, 0, i);
437 if ((GET_CODE (out_exp) == CLOBBER)
438 || (GET_CODE (out_exp) == USE))
439 continue;
440 else if (GET_CODE (out_exp) != SET)
441 return false;
442 }
443 }
444 }
445 }
446 }
447 else
448 {
449 in_pat = PATTERN (in_insn);
450 if (GET_CODE (in_pat) != PARALLEL)
451 return false;
452
453 for (i = 0; i < XVECLEN (in_pat, 0); i++)
454 {
455 in_exp = XVECEXP (in_pat, 0, i);
456 if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE))
457 continue;
458 else if (GET_CODE (in_exp) != SET)
459 return false;
460
461 if (MEM_P (SET_DEST (in_exp)))
462 {
463 out_set = single_set (out_insn);
464 if (!out_set)
465 {
466 out_pat = PATTERN (out_insn);
467 if (GET_CODE (out_pat) != PARALLEL)
468 return false;
469 for (j = 0; j < XVECLEN (out_pat, 0); j++)
470 {
471 out_exp = XVECEXP (out_pat, 0, j);
472 if ((GET_CODE (out_exp) == CLOBBER)
473 || (GET_CODE (out_exp) == USE))
474 continue;
475 else if (GET_CODE (out_exp) != SET)
476 return false;
477 }
478 }
479 }
480 }
481 }
482 return store_data_bypass_p (out_insn, in_insn);
483 }
484
485 \f
486 /* Processor costs (relative to an add) */
487
488 const struct processor_costs *rs6000_cost;
489
490 /* Instruction size costs on 32bit processors. */
491 static const
492 struct processor_costs size32_cost = {
493 COSTS_N_INSNS (1), /* mulsi */
494 COSTS_N_INSNS (1), /* mulsi_const */
495 COSTS_N_INSNS (1), /* mulsi_const9 */
496 COSTS_N_INSNS (1), /* muldi */
497 COSTS_N_INSNS (1), /* divsi */
498 COSTS_N_INSNS (1), /* divdi */
499 COSTS_N_INSNS (1), /* fp */
500 COSTS_N_INSNS (1), /* dmul */
501 COSTS_N_INSNS (1), /* sdiv */
502 COSTS_N_INSNS (1), /* ddiv */
503 32, /* cache line size */
504 0, /* l1 cache */
505 0, /* l2 cache */
506 0, /* streams */
507 0, /* SF->DF convert */
508 };
509
510 /* Instruction size costs on 64bit processors. */
511 static const
512 struct processor_costs size64_cost = {
513 COSTS_N_INSNS (1), /* mulsi */
514 COSTS_N_INSNS (1), /* mulsi_const */
515 COSTS_N_INSNS (1), /* mulsi_const9 */
516 COSTS_N_INSNS (1), /* muldi */
517 COSTS_N_INSNS (1), /* divsi */
518 COSTS_N_INSNS (1), /* divdi */
519 COSTS_N_INSNS (1), /* fp */
520 COSTS_N_INSNS (1), /* dmul */
521 COSTS_N_INSNS (1), /* sdiv */
522 COSTS_N_INSNS (1), /* ddiv */
523 128, /* cache line size */
524 0, /* l1 cache */
525 0, /* l2 cache */
526 0, /* streams */
527 0, /* SF->DF convert */
528 };
529
530 /* Instruction costs on RS64A processors. */
531 static const
532 struct processor_costs rs64a_cost = {
533 COSTS_N_INSNS (20), /* mulsi */
534 COSTS_N_INSNS (12), /* mulsi_const */
535 COSTS_N_INSNS (8), /* mulsi_const9 */
536 COSTS_N_INSNS (34), /* muldi */
537 COSTS_N_INSNS (65), /* divsi */
538 COSTS_N_INSNS (67), /* divdi */
539 COSTS_N_INSNS (4), /* fp */
540 COSTS_N_INSNS (4), /* dmul */
541 COSTS_N_INSNS (31), /* sdiv */
542 COSTS_N_INSNS (31), /* ddiv */
543 128, /* cache line size */
544 128, /* l1 cache */
545 2048, /* l2 cache */
546 1, /* streams */
547 0, /* SF->DF convert */
548 };
549
550 /* Instruction costs on MPCCORE processors. */
551 static const
552 struct processor_costs mpccore_cost = {
553 COSTS_N_INSNS (2), /* mulsi */
554 COSTS_N_INSNS (2), /* mulsi_const */
555 COSTS_N_INSNS (2), /* mulsi_const9 */
556 COSTS_N_INSNS (2), /* muldi */
557 COSTS_N_INSNS (6), /* divsi */
558 COSTS_N_INSNS (6), /* divdi */
559 COSTS_N_INSNS (4), /* fp */
560 COSTS_N_INSNS (5), /* dmul */
561 COSTS_N_INSNS (10), /* sdiv */
562 COSTS_N_INSNS (17), /* ddiv */
563 32, /* cache line size */
564 4, /* l1 cache */
565 16, /* l2 cache */
566 1, /* streams */
567 0, /* SF->DF convert */
568 };
569
570 /* Instruction costs on PPC403 processors. */
571 static const
572 struct processor_costs ppc403_cost = {
573 COSTS_N_INSNS (4), /* mulsi */
574 COSTS_N_INSNS (4), /* mulsi_const */
575 COSTS_N_INSNS (4), /* mulsi_const9 */
576 COSTS_N_INSNS (4), /* muldi */
577 COSTS_N_INSNS (33), /* divsi */
578 COSTS_N_INSNS (33), /* divdi */
579 COSTS_N_INSNS (11), /* fp */
580 COSTS_N_INSNS (11), /* dmul */
581 COSTS_N_INSNS (11), /* sdiv */
582 COSTS_N_INSNS (11), /* ddiv */
583 32, /* cache line size */
584 4, /* l1 cache */
585 16, /* l2 cache */
586 1, /* streams */
587 0, /* SF->DF convert */
588 };
589
590 /* Instruction costs on PPC405 processors. */
591 static const
592 struct processor_costs ppc405_cost = {
593 COSTS_N_INSNS (5), /* mulsi */
594 COSTS_N_INSNS (4), /* mulsi_const */
595 COSTS_N_INSNS (3), /* mulsi_const9 */
596 COSTS_N_INSNS (5), /* muldi */
597 COSTS_N_INSNS (35), /* divsi */
598 COSTS_N_INSNS (35), /* divdi */
599 COSTS_N_INSNS (11), /* fp */
600 COSTS_N_INSNS (11), /* dmul */
601 COSTS_N_INSNS (11), /* sdiv */
602 COSTS_N_INSNS (11), /* ddiv */
603 32, /* cache line size */
604 16, /* l1 cache */
605 128, /* l2 cache */
606 1, /* streams */
607 0, /* SF->DF convert */
608 };
609
610 /* Instruction costs on PPC440 processors. */
611 static const
612 struct processor_costs ppc440_cost = {
613 COSTS_N_INSNS (3), /* mulsi */
614 COSTS_N_INSNS (2), /* mulsi_const */
615 COSTS_N_INSNS (2), /* mulsi_const9 */
616 COSTS_N_INSNS (3), /* muldi */
617 COSTS_N_INSNS (34), /* divsi */
618 COSTS_N_INSNS (34), /* divdi */
619 COSTS_N_INSNS (5), /* fp */
620 COSTS_N_INSNS (5), /* dmul */
621 COSTS_N_INSNS (19), /* sdiv */
622 COSTS_N_INSNS (33), /* ddiv */
623 32, /* cache line size */
624 32, /* l1 cache */
625 256, /* l2 cache */
626 1, /* streams */
627 0, /* SF->DF convert */
628 };
629
630 /* Instruction costs on PPC476 processors. */
631 static const
632 struct processor_costs ppc476_cost = {
633 COSTS_N_INSNS (4), /* mulsi */
634 COSTS_N_INSNS (4), /* mulsi_const */
635 COSTS_N_INSNS (4), /* mulsi_const9 */
636 COSTS_N_INSNS (4), /* muldi */
637 COSTS_N_INSNS (11), /* divsi */
638 COSTS_N_INSNS (11), /* divdi */
639 COSTS_N_INSNS (6), /* fp */
640 COSTS_N_INSNS (6), /* dmul */
641 COSTS_N_INSNS (19), /* sdiv */
642 COSTS_N_INSNS (33), /* ddiv */
643 32, /* l1 cache line size */
644 32, /* l1 cache */
645 512, /* l2 cache */
646 1, /* streams */
647 0, /* SF->DF convert */
648 };
649
650 /* Instruction costs on PPC601 processors. */
651 static const
652 struct processor_costs ppc601_cost = {
653 COSTS_N_INSNS (5), /* mulsi */
654 COSTS_N_INSNS (5), /* mulsi_const */
655 COSTS_N_INSNS (5), /* mulsi_const9 */
656 COSTS_N_INSNS (5), /* muldi */
657 COSTS_N_INSNS (36), /* divsi */
658 COSTS_N_INSNS (36), /* divdi */
659 COSTS_N_INSNS (4), /* fp */
660 COSTS_N_INSNS (5), /* dmul */
661 COSTS_N_INSNS (17), /* sdiv */
662 COSTS_N_INSNS (31), /* ddiv */
663 32, /* cache line size */
664 32, /* l1 cache */
665 256, /* l2 cache */
666 1, /* streams */
667 0, /* SF->DF convert */
668 };
669
670 /* Instruction costs on PPC603 processors. */
671 static const
672 struct processor_costs ppc603_cost = {
673 COSTS_N_INSNS (5), /* mulsi */
674 COSTS_N_INSNS (3), /* mulsi_const */
675 COSTS_N_INSNS (2), /* mulsi_const9 */
676 COSTS_N_INSNS (5), /* muldi */
677 COSTS_N_INSNS (37), /* divsi */
678 COSTS_N_INSNS (37), /* divdi */
679 COSTS_N_INSNS (3), /* fp */
680 COSTS_N_INSNS (4), /* dmul */
681 COSTS_N_INSNS (18), /* sdiv */
682 COSTS_N_INSNS (33), /* ddiv */
683 32, /* cache line size */
684 8, /* l1 cache */
685 64, /* l2 cache */
686 1, /* streams */
687 0, /* SF->DF convert */
688 };
689
690 /* Instruction costs on PPC604 processors. */
691 static const
692 struct processor_costs ppc604_cost = {
693 COSTS_N_INSNS (4), /* mulsi */
694 COSTS_N_INSNS (4), /* mulsi_const */
695 COSTS_N_INSNS (4), /* mulsi_const9 */
696 COSTS_N_INSNS (4), /* muldi */
697 COSTS_N_INSNS (20), /* divsi */
698 COSTS_N_INSNS (20), /* divdi */
699 COSTS_N_INSNS (3), /* fp */
700 COSTS_N_INSNS (3), /* dmul */
701 COSTS_N_INSNS (18), /* sdiv */
702 COSTS_N_INSNS (32), /* ddiv */
703 32, /* cache line size */
704 16, /* l1 cache */
705 512, /* l2 cache */
706 1, /* streams */
707 0, /* SF->DF convert */
708 };
709
710 /* Instruction costs on PPC604e processors. */
711 static const
712 struct processor_costs ppc604e_cost = {
713 COSTS_N_INSNS (2), /* mulsi */
714 COSTS_N_INSNS (2), /* mulsi_const */
715 COSTS_N_INSNS (2), /* mulsi_const9 */
716 COSTS_N_INSNS (2), /* muldi */
717 COSTS_N_INSNS (20), /* divsi */
718 COSTS_N_INSNS (20), /* divdi */
719 COSTS_N_INSNS (3), /* fp */
720 COSTS_N_INSNS (3), /* dmul */
721 COSTS_N_INSNS (18), /* sdiv */
722 COSTS_N_INSNS (32), /* ddiv */
723 32, /* cache line size */
724 32, /* l1 cache */
725 1024, /* l2 cache */
726 1, /* streams */
727 0, /* SF->DF convert */
728 };
729
730 /* Instruction costs on PPC620 processors. */
731 static const
732 struct processor_costs ppc620_cost = {
733 COSTS_N_INSNS (5), /* mulsi */
734 COSTS_N_INSNS (4), /* mulsi_const */
735 COSTS_N_INSNS (3), /* mulsi_const9 */
736 COSTS_N_INSNS (7), /* muldi */
737 COSTS_N_INSNS (21), /* divsi */
738 COSTS_N_INSNS (37), /* divdi */
739 COSTS_N_INSNS (3), /* fp */
740 COSTS_N_INSNS (3), /* dmul */
741 COSTS_N_INSNS (18), /* sdiv */
742 COSTS_N_INSNS (32), /* ddiv */
743 128, /* cache line size */
744 32, /* l1 cache */
745 1024, /* l2 cache */
746 1, /* streams */
747 0, /* SF->DF convert */
748 };
749
750 /* Instruction costs on PPC630 processors. */
751 static const
752 struct processor_costs ppc630_cost = {
753 COSTS_N_INSNS (5), /* mulsi */
754 COSTS_N_INSNS (4), /* mulsi_const */
755 COSTS_N_INSNS (3), /* mulsi_const9 */
756 COSTS_N_INSNS (7), /* muldi */
757 COSTS_N_INSNS (21), /* divsi */
758 COSTS_N_INSNS (37), /* divdi */
759 COSTS_N_INSNS (3), /* fp */
760 COSTS_N_INSNS (3), /* dmul */
761 COSTS_N_INSNS (17), /* sdiv */
762 COSTS_N_INSNS (21), /* ddiv */
763 128, /* cache line size */
764 64, /* l1 cache */
765 1024, /* l2 cache */
766 1, /* streams */
767 0, /* SF->DF convert */
768 };
769
770 /* Instruction costs on Cell processor. */
771 /* COSTS_N_INSNS (1) ~ one add. */
772 static const
773 struct processor_costs ppccell_cost = {
774 COSTS_N_INSNS (9/2)+2, /* mulsi */
775 COSTS_N_INSNS (6/2), /* mulsi_const */
776 COSTS_N_INSNS (6/2), /* mulsi_const9 */
777 COSTS_N_INSNS (15/2)+2, /* muldi */
778 COSTS_N_INSNS (38/2), /* divsi */
779 COSTS_N_INSNS (70/2), /* divdi */
780 COSTS_N_INSNS (10/2), /* fp */
781 COSTS_N_INSNS (10/2), /* dmul */
782 COSTS_N_INSNS (74/2), /* sdiv */
783 COSTS_N_INSNS (74/2), /* ddiv */
784 128, /* cache line size */
785 32, /* l1 cache */
786 512, /* l2 cache */
787 6, /* streams */
788 0, /* SF->DF convert */
789 };
790
791 /* Instruction costs on PPC750 and PPC7400 processors. */
792 static const
793 struct processor_costs ppc750_cost = {
794 COSTS_N_INSNS (5), /* mulsi */
795 COSTS_N_INSNS (3), /* mulsi_const */
796 COSTS_N_INSNS (2), /* mulsi_const9 */
797 COSTS_N_INSNS (5), /* muldi */
798 COSTS_N_INSNS (17), /* divsi */
799 COSTS_N_INSNS (17), /* divdi */
800 COSTS_N_INSNS (3), /* fp */
801 COSTS_N_INSNS (3), /* dmul */
802 COSTS_N_INSNS (17), /* sdiv */
803 COSTS_N_INSNS (31), /* ddiv */
804 32, /* cache line size */
805 32, /* l1 cache */
806 512, /* l2 cache */
807 1, /* streams */
808 0, /* SF->DF convert */
809 };
810
811 /* Instruction costs on PPC7450 processors. */
812 static const
813 struct processor_costs ppc7450_cost = {
814 COSTS_N_INSNS (4), /* mulsi */
815 COSTS_N_INSNS (3), /* mulsi_const */
816 COSTS_N_INSNS (3), /* mulsi_const9 */
817 COSTS_N_INSNS (4), /* muldi */
818 COSTS_N_INSNS (23), /* divsi */
819 COSTS_N_INSNS (23), /* divdi */
820 COSTS_N_INSNS (5), /* fp */
821 COSTS_N_INSNS (5), /* dmul */
822 COSTS_N_INSNS (21), /* sdiv */
823 COSTS_N_INSNS (35), /* ddiv */
824 32, /* cache line size */
825 32, /* l1 cache */
826 1024, /* l2 cache */
827 1, /* streams */
828 0, /* SF->DF convert */
829 };
830
831 /* Instruction costs on PPC8540 processors. */
832 static const
833 struct processor_costs ppc8540_cost = {
834 COSTS_N_INSNS (4), /* mulsi */
835 COSTS_N_INSNS (4), /* mulsi_const */
836 COSTS_N_INSNS (4), /* mulsi_const9 */
837 COSTS_N_INSNS (4), /* muldi */
838 COSTS_N_INSNS (19), /* divsi */
839 COSTS_N_INSNS (19), /* divdi */
840 COSTS_N_INSNS (4), /* fp */
841 COSTS_N_INSNS (4), /* dmul */
842 COSTS_N_INSNS (29), /* sdiv */
843 COSTS_N_INSNS (29), /* ddiv */
844 32, /* cache line size */
845 32, /* l1 cache */
846 256, /* l2 cache */
847 1, /* prefetch streams /*/
848 0, /* SF->DF convert */
849 };
850
851 /* Instruction costs on E300C2 and E300C3 cores. */
852 static const
853 struct processor_costs ppce300c2c3_cost = {
854 COSTS_N_INSNS (4), /* mulsi */
855 COSTS_N_INSNS (4), /* mulsi_const */
856 COSTS_N_INSNS (4), /* mulsi_const9 */
857 COSTS_N_INSNS (4), /* muldi */
858 COSTS_N_INSNS (19), /* divsi */
859 COSTS_N_INSNS (19), /* divdi */
860 COSTS_N_INSNS (3), /* fp */
861 COSTS_N_INSNS (4), /* dmul */
862 COSTS_N_INSNS (18), /* sdiv */
863 COSTS_N_INSNS (33), /* ddiv */
864 32,
865 16, /* l1 cache */
866 16, /* l2 cache */
867 1, /* prefetch streams /*/
868 0, /* SF->DF convert */
869 };
870
871 /* Instruction costs on PPCE500MC processors. */
872 static const
873 struct processor_costs ppce500mc_cost = {
874 COSTS_N_INSNS (4), /* mulsi */
875 COSTS_N_INSNS (4), /* mulsi_const */
876 COSTS_N_INSNS (4), /* mulsi_const9 */
877 COSTS_N_INSNS (4), /* muldi */
878 COSTS_N_INSNS (14), /* divsi */
879 COSTS_N_INSNS (14), /* divdi */
880 COSTS_N_INSNS (8), /* fp */
881 COSTS_N_INSNS (10), /* dmul */
882 COSTS_N_INSNS (36), /* sdiv */
883 COSTS_N_INSNS (66), /* ddiv */
884 64, /* cache line size */
885 32, /* l1 cache */
886 128, /* l2 cache */
887 1, /* prefetch streams /*/
888 0, /* SF->DF convert */
889 };
890
891 /* Instruction costs on PPCE500MC64 processors. */
892 static const
893 struct processor_costs ppce500mc64_cost = {
894 COSTS_N_INSNS (4), /* mulsi */
895 COSTS_N_INSNS (4), /* mulsi_const */
896 COSTS_N_INSNS (4), /* mulsi_const9 */
897 COSTS_N_INSNS (4), /* muldi */
898 COSTS_N_INSNS (14), /* divsi */
899 COSTS_N_INSNS (14), /* divdi */
900 COSTS_N_INSNS (4), /* fp */
901 COSTS_N_INSNS (10), /* dmul */
902 COSTS_N_INSNS (36), /* sdiv */
903 COSTS_N_INSNS (66), /* ddiv */
904 64, /* cache line size */
905 32, /* l1 cache */
906 128, /* l2 cache */
907 1, /* prefetch streams /*/
908 0, /* SF->DF convert */
909 };
910
911 /* Instruction costs on PPCE5500 processors. */
912 static const
913 struct processor_costs ppce5500_cost = {
914 COSTS_N_INSNS (5), /* mulsi */
915 COSTS_N_INSNS (5), /* mulsi_const */
916 COSTS_N_INSNS (4), /* mulsi_const9 */
917 COSTS_N_INSNS (5), /* muldi */
918 COSTS_N_INSNS (14), /* divsi */
919 COSTS_N_INSNS (14), /* divdi */
920 COSTS_N_INSNS (7), /* fp */
921 COSTS_N_INSNS (10), /* dmul */
922 COSTS_N_INSNS (36), /* sdiv */
923 COSTS_N_INSNS (66), /* ddiv */
924 64, /* cache line size */
925 32, /* l1 cache */
926 128, /* l2 cache */
927 1, /* prefetch streams /*/
928 0, /* SF->DF convert */
929 };
930
931 /* Instruction costs on PPCE6500 processors. */
932 static const
933 struct processor_costs ppce6500_cost = {
934 COSTS_N_INSNS (5), /* mulsi */
935 COSTS_N_INSNS (5), /* mulsi_const */
936 COSTS_N_INSNS (4), /* mulsi_const9 */
937 COSTS_N_INSNS (5), /* muldi */
938 COSTS_N_INSNS (14), /* divsi */
939 COSTS_N_INSNS (14), /* divdi */
940 COSTS_N_INSNS (7), /* fp */
941 COSTS_N_INSNS (10), /* dmul */
942 COSTS_N_INSNS (36), /* sdiv */
943 COSTS_N_INSNS (66), /* ddiv */
944 64, /* cache line size */
945 32, /* l1 cache */
946 128, /* l2 cache */
947 1, /* prefetch streams /*/
948 0, /* SF->DF convert */
949 };
950
951 /* Instruction costs on AppliedMicro Titan processors. */
952 static const
953 struct processor_costs titan_cost = {
954 COSTS_N_INSNS (5), /* mulsi */
955 COSTS_N_INSNS (5), /* mulsi_const */
956 COSTS_N_INSNS (5), /* mulsi_const9 */
957 COSTS_N_INSNS (5), /* muldi */
958 COSTS_N_INSNS (18), /* divsi */
959 COSTS_N_INSNS (18), /* divdi */
960 COSTS_N_INSNS (10), /* fp */
961 COSTS_N_INSNS (10), /* dmul */
962 COSTS_N_INSNS (46), /* sdiv */
963 COSTS_N_INSNS (72), /* ddiv */
964 32, /* cache line size */
965 32, /* l1 cache */
966 512, /* l2 cache */
967 1, /* prefetch streams /*/
968 0, /* SF->DF convert */
969 };
970
971 /* Instruction costs on POWER4 and POWER5 processors. */
972 static const
973 struct processor_costs power4_cost = {
974 COSTS_N_INSNS (3), /* mulsi */
975 COSTS_N_INSNS (2), /* mulsi_const */
976 COSTS_N_INSNS (2), /* mulsi_const9 */
977 COSTS_N_INSNS (4), /* muldi */
978 COSTS_N_INSNS (18), /* divsi */
979 COSTS_N_INSNS (34), /* divdi */
980 COSTS_N_INSNS (3), /* fp */
981 COSTS_N_INSNS (3), /* dmul */
982 COSTS_N_INSNS (17), /* sdiv */
983 COSTS_N_INSNS (17), /* ddiv */
984 128, /* cache line size */
985 32, /* l1 cache */
986 1024, /* l2 cache */
987 8, /* prefetch streams /*/
988 0, /* SF->DF convert */
989 };
990
991 /* Instruction costs on POWER6 processors. */
992 static const
993 struct processor_costs power6_cost = {
994 COSTS_N_INSNS (8), /* mulsi */
995 COSTS_N_INSNS (8), /* mulsi_const */
996 COSTS_N_INSNS (8), /* mulsi_const9 */
997 COSTS_N_INSNS (8), /* muldi */
998 COSTS_N_INSNS (22), /* divsi */
999 COSTS_N_INSNS (28), /* divdi */
1000 COSTS_N_INSNS (3), /* fp */
1001 COSTS_N_INSNS (3), /* dmul */
1002 COSTS_N_INSNS (13), /* sdiv */
1003 COSTS_N_INSNS (16), /* ddiv */
1004 128, /* cache line size */
1005 64, /* l1 cache */
1006 2048, /* l2 cache */
1007 16, /* prefetch streams */
1008 0, /* SF->DF convert */
1009 };
1010
1011 /* Instruction costs on POWER7 processors. */
1012 static const
1013 struct processor_costs power7_cost = {
1014 COSTS_N_INSNS (2), /* mulsi */
1015 COSTS_N_INSNS (2), /* mulsi_const */
1016 COSTS_N_INSNS (2), /* mulsi_const9 */
1017 COSTS_N_INSNS (2), /* muldi */
1018 COSTS_N_INSNS (18), /* divsi */
1019 COSTS_N_INSNS (34), /* divdi */
1020 COSTS_N_INSNS (3), /* fp */
1021 COSTS_N_INSNS (3), /* dmul */
1022 COSTS_N_INSNS (13), /* sdiv */
1023 COSTS_N_INSNS (16), /* ddiv */
1024 128, /* cache line size */
1025 32, /* l1 cache */
1026 256, /* l2 cache */
1027 12, /* prefetch streams */
1028 COSTS_N_INSNS (3), /* SF->DF convert */
1029 };
1030
1031 /* Instruction costs on POWER8 processors. */
1032 static const
1033 struct processor_costs power8_cost = {
1034 COSTS_N_INSNS (3), /* mulsi */
1035 COSTS_N_INSNS (3), /* mulsi_const */
1036 COSTS_N_INSNS (3), /* mulsi_const9 */
1037 COSTS_N_INSNS (3), /* muldi */
1038 COSTS_N_INSNS (19), /* divsi */
1039 COSTS_N_INSNS (35), /* divdi */
1040 COSTS_N_INSNS (3), /* fp */
1041 COSTS_N_INSNS (3), /* dmul */
1042 COSTS_N_INSNS (14), /* sdiv */
1043 COSTS_N_INSNS (17), /* ddiv */
1044 128, /* cache line size */
1045 32, /* l1 cache */
1046 512, /* l2 cache */
1047 12, /* prefetch streams */
1048 COSTS_N_INSNS (3), /* SF->DF convert */
1049 };
1050
1051 /* Instruction costs on POWER9 processors. */
1052 static const
1053 struct processor_costs power9_cost = {
1054 COSTS_N_INSNS (3), /* mulsi */
1055 COSTS_N_INSNS (3), /* mulsi_const */
1056 COSTS_N_INSNS (3), /* mulsi_const9 */
1057 COSTS_N_INSNS (3), /* muldi */
1058 COSTS_N_INSNS (8), /* divsi */
1059 COSTS_N_INSNS (12), /* divdi */
1060 COSTS_N_INSNS (3), /* fp */
1061 COSTS_N_INSNS (3), /* dmul */
1062 COSTS_N_INSNS (13), /* sdiv */
1063 COSTS_N_INSNS (18), /* ddiv */
1064 128, /* cache line size */
1065 32, /* l1 cache */
1066 512, /* l2 cache */
1067 8, /* prefetch streams */
1068 COSTS_N_INSNS (3), /* SF->DF convert */
1069 };
1070
1071 /* Instruction costs on POWER10 processors. */
1072 static const
1073 struct processor_costs power10_cost = {
1074 COSTS_N_INSNS (2), /* mulsi */
1075 COSTS_N_INSNS (2), /* mulsi_const */
1076 COSTS_N_INSNS (2), /* mulsi_const9 */
1077 COSTS_N_INSNS (2), /* muldi */
1078 COSTS_N_INSNS (6), /* divsi */
1079 COSTS_N_INSNS (6), /* divdi */
1080 COSTS_N_INSNS (2), /* fp */
1081 COSTS_N_INSNS (2), /* dmul */
1082 COSTS_N_INSNS (11), /* sdiv */
1083 COSTS_N_INSNS (13), /* ddiv */
1084 128, /* cache line size */
1085 32, /* l1 cache */
1086 512, /* l2 cache */
1087 16, /* prefetch streams */
1088 COSTS_N_INSNS (2), /* SF->DF convert */
1089 };
1090
1091 /* Instruction costs on Future processors. At the moment, this is a copy of
1092 the power10 costs, but it is expected to change over time.. */
1093 static const
1094 struct processor_costs future_cost = {
1095 COSTS_N_INSNS (2), /* mulsi */
1096 COSTS_N_INSNS (2), /* mulsi_const */
1097 COSTS_N_INSNS (2), /* mulsi_const9 */
1098 COSTS_N_INSNS (2), /* muldi */
1099 COSTS_N_INSNS (6), /* divsi */
1100 COSTS_N_INSNS (6), /* divdi */
1101 COSTS_N_INSNS (2), /* fp */
1102 COSTS_N_INSNS (2), /* dmul */
1103 COSTS_N_INSNS (11), /* sdiv */
1104 COSTS_N_INSNS (13), /* ddiv */
1105 128, /* cache line size */
1106 32, /* l1 cache */
1107 512, /* l2 cache */
1108 16, /* prefetch streams */
1109 COSTS_N_INSNS (2), /* SF->DF convert */
1110 };
1111
1112 /* Instruction costs on POWER A2 processors. */
1113 static const
1114 struct processor_costs ppca2_cost = {
1115 COSTS_N_INSNS (16), /* mulsi */
1116 COSTS_N_INSNS (16), /* mulsi_const */
1117 COSTS_N_INSNS (16), /* mulsi_const9 */
1118 COSTS_N_INSNS (16), /* muldi */
1119 COSTS_N_INSNS (22), /* divsi */
1120 COSTS_N_INSNS (28), /* divdi */
1121 COSTS_N_INSNS (3), /* fp */
1122 COSTS_N_INSNS (3), /* dmul */
1123 COSTS_N_INSNS (59), /* sdiv */
1124 COSTS_N_INSNS (72), /* ddiv */
1125 64,
1126 16, /* l1 cache */
1127 2048, /* l2 cache */
1128 16, /* prefetch streams */
1129 0, /* SF->DF convert */
1130 };
1131
1132 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1133 static tree (*rs6000_veclib_handler) (combined_fn, tree, tree);
1134
1135 \f
1136 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1137 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1138 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1139 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1140 static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
1141 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1142 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1143 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
1144 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1145 bool);
1146 static int rs6000_debug_adjust_cost (rtx_insn *, int, rtx_insn *, int,
1147 unsigned int);
1148 static bool is_microcoded_insn (rtx_insn *);
1149 static bool is_nonpipeline_insn (rtx_insn *);
1150 static bool is_cracked_insn (rtx_insn *);
1151 static bool is_load_insn (rtx, rtx *);
1152 static bool is_store_insn (rtx, rtx *);
1153 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1154 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1155 static bool insn_must_be_first_in_group (rtx_insn *);
1156 static bool insn_must_be_last_in_group (rtx_insn *);
1157 bool easy_vector_constant (rtx, machine_mode);
1158 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1159 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1160 #if TARGET_MACHO
1161 static tree get_prev_label (tree);
1162 #endif
1163 static bool rs6000_mode_dependent_address (const_rtx);
1164 static bool rs6000_debug_mode_dependent_address (const_rtx);
1165 static bool rs6000_offsettable_memref_p (rtx, machine_mode, bool);
1166 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1167 machine_mode, rtx);
1168 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1169 machine_mode,
1170 rtx);
1171 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1172 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1173 enum reg_class);
1174 static bool rs6000_debug_secondary_memory_needed (machine_mode,
1175 reg_class_t,
1176 reg_class_t);
1177 static bool rs6000_debug_can_change_mode_class (machine_mode,
1178 machine_mode,
1179 reg_class_t);
1180
1181 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1182 = rs6000_mode_dependent_address;
1183
1184 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1185 machine_mode, rtx)
1186 = rs6000_secondary_reload_class;
1187
1188 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1189 = rs6000_preferred_reload_class;
1190
1191 const int INSN_NOT_AVAILABLE = -1;
1192
1193 static void rs6000_print_isa_options (FILE *, int, const char *,
1194 HOST_WIDE_INT);
1195 static HOST_WIDE_INT rs6000_disable_incompatible_switches (void);
1196
1197 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1198 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1199 enum rs6000_reg_type,
1200 machine_mode,
1201 secondary_reload_info *,
1202 bool);
1203 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1204
1205 /* Hash table stuff for keeping track of TOC entries. */
1206
1207 struct GTY((for_user)) toc_hash_struct
1208 {
1209 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1210 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1211 rtx key;
1212 machine_mode key_mode;
1213 int labelno;
1214 };
1215
1216 struct toc_hasher : ggc_ptr_hash<toc_hash_struct>
1217 {
1218 static hashval_t hash (toc_hash_struct *);
1219 static bool equal (toc_hash_struct *, toc_hash_struct *);
1220 };
1221
1222 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1223
1224
1225 \f
1226 /* Default register names. */
1227 char rs6000_reg_names[][8] =
1228 {
1229 /* GPRs */
1230 "0", "1", "2", "3", "4", "5", "6", "7",
1231 "8", "9", "10", "11", "12", "13", "14", "15",
1232 "16", "17", "18", "19", "20", "21", "22", "23",
1233 "24", "25", "26", "27", "28", "29", "30", "31",
1234 /* FPRs */
1235 "0", "1", "2", "3", "4", "5", "6", "7",
1236 "8", "9", "10", "11", "12", "13", "14", "15",
1237 "16", "17", "18", "19", "20", "21", "22", "23",
1238 "24", "25", "26", "27", "28", "29", "30", "31",
1239 /* VRs */
1240 "0", "1", "2", "3", "4", "5", "6", "7",
1241 "8", "9", "10", "11", "12", "13", "14", "15",
1242 "16", "17", "18", "19", "20", "21", "22", "23",
1243 "24", "25", "26", "27", "28", "29", "30", "31",
1244 /* lr ctr ca ap */
1245 "lr", "ctr", "ca", "ap",
1246 /* cr0..cr7 */
1247 "0", "1", "2", "3", "4", "5", "6", "7",
1248 /* vrsave vscr sfp */
1249 "vrsave", "vscr", "sfp",
1250 /* DMRs */
1251 "0", "1", "2", "3", "4", "5", "6", "7",
1252 };
1253
1254 #ifdef TARGET_REGNAMES
1255 static const char alt_reg_names[][8] =
1256 {
1257 /* GPRs */
1258 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1259 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1260 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1261 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1262 /* FPRs */
1263 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1264 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1265 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1266 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1267 /* VRs */
1268 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1269 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1270 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1271 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1272 /* lr ctr ca ap */
1273 "lr", "ctr", "ca", "ap",
1274 /* cr0..cr7 */
1275 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1276 /* vrsave vscr sfp */
1277 "vrsave", "vscr", "sfp",
1278 /* DMRs */
1279 "%dmr0", "%dmr1", "%dmr2", "%dmr3", "%dmr4", "%dmr5", "%dmr6", "%dmr7",
1280 };
1281 #endif
1282
1283 /* Table of valid machine attributes. */
1284
1285 static const struct attribute_spec rs6000_attribute_table[] =
1286 {
1287 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
1288 affects_type_identity, handler, exclude } */
1289 { "altivec", 1, 1, false, true, false, false,
1290 rs6000_handle_altivec_attribute, NULL },
1291 { "longcall", 0, 0, false, true, true, false,
1292 rs6000_handle_longcall_attribute, NULL },
1293 { "shortcall", 0, 0, false, true, true, false,
1294 rs6000_handle_longcall_attribute, NULL },
1295 { "ms_struct", 0, 0, false, false, false, false,
1296 rs6000_handle_struct_attribute, NULL },
1297 { "gcc_struct", 0, 0, false, false, false, false,
1298 rs6000_handle_struct_attribute, NULL },
1299 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1300 SUBTARGET_ATTRIBUTE_TABLE,
1301 #endif
1302 { NULL, 0, 0, false, false, false, false, NULL, NULL }
1303 };
1304 \f
1305 #ifndef TARGET_PROFILE_KERNEL
1306 #define TARGET_PROFILE_KERNEL 0
1307 #endif
1308 \f
1309 /* Initialize the GCC target structure. */
1310 #undef TARGET_ATTRIBUTE_TABLE
1311 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1312 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1313 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1314 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1315 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1316
1317 #undef TARGET_ASM_ALIGNED_DI_OP
1318 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1319
1320 /* Default unaligned ops are only provided for ELF. Find the ops needed
1321 for non-ELF systems. */
1322 #ifndef OBJECT_FORMAT_ELF
1323 #if TARGET_XCOFF
1324 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1325 64-bit targets. */
1326 #undef TARGET_ASM_UNALIGNED_HI_OP
1327 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1328 #undef TARGET_ASM_UNALIGNED_SI_OP
1329 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1330 #undef TARGET_ASM_UNALIGNED_DI_OP
1331 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1332 #else
1333 /* For Darwin. */
1334 #undef TARGET_ASM_UNALIGNED_HI_OP
1335 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1336 #undef TARGET_ASM_UNALIGNED_SI_OP
1337 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1338 #undef TARGET_ASM_UNALIGNED_DI_OP
1339 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1340 #undef TARGET_ASM_ALIGNED_DI_OP
1341 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1342 #endif
1343 #endif
1344
1345 /* This hook deals with fixups for relocatable code and DI-mode objects
1346 in 64-bit code. */
1347 #undef TARGET_ASM_INTEGER
1348 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1349
1350 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1351 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1352 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1353 #endif
1354
1355 #undef TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY
1356 #define TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY \
1357 rs6000_print_patchable_function_entry
1358
1359 #undef TARGET_SET_UP_BY_PROLOGUE
1360 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1361
1362 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1363 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1364 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1365 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1366 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1367 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1368 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1369 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1370 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1371 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1372 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1373 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1374
1375 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1376 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1377
1378 #undef TARGET_INTERNAL_ARG_POINTER
1379 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1380
1381 #undef TARGET_HAVE_TLS
1382 #define TARGET_HAVE_TLS HAVE_AS_TLS
1383
1384 #undef TARGET_CANNOT_FORCE_CONST_MEM
1385 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1386
1387 #undef TARGET_DELEGITIMIZE_ADDRESS
1388 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1389
1390 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1391 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1392
1393 #undef TARGET_LEGITIMATE_COMBINED_INSN
1394 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1395
1396 #undef TARGET_ASM_FUNCTION_PROLOGUE
1397 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1398 #undef TARGET_ASM_FUNCTION_EPILOGUE
1399 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1400
1401 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1402 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1403
1404 #undef TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC
1405 #define TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC rs6000_gen_pic_addr_diff_vec
1406
1407 #undef TARGET_LEGITIMIZE_ADDRESS
1408 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1409
1410 #undef TARGET_SCHED_VARIABLE_ISSUE
1411 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1412
1413 #undef TARGET_SCHED_ISSUE_RATE
1414 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1415 #undef TARGET_SCHED_ADJUST_COST
1416 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1417 #undef TARGET_SCHED_ADJUST_PRIORITY
1418 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1419 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1420 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1421 #undef TARGET_SCHED_INIT
1422 #define TARGET_SCHED_INIT rs6000_sched_init
1423 #undef TARGET_SCHED_FINISH
1424 #define TARGET_SCHED_FINISH rs6000_sched_finish
1425 #undef TARGET_SCHED_REORDER
1426 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1427 #undef TARGET_SCHED_REORDER2
1428 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1429
1430 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1431 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1432
1433 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1434 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1435
1436 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1437 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1438 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1439 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1440 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1441 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1442 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1443 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1444
1445 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1446 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1447
1448 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1449 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1450 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1451 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1452 rs6000_builtin_support_vector_misalignment
1453 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1454 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1455 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1456 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1457 rs6000_builtin_vectorization_cost
1458 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1459 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1460 rs6000_preferred_simd_mode
1461 #undef TARGET_VECTORIZE_CREATE_COSTS
1462 #define TARGET_VECTORIZE_CREATE_COSTS rs6000_vectorize_create_costs
1463
1464 #undef TARGET_LOOP_UNROLL_ADJUST
1465 #define TARGET_LOOP_UNROLL_ADJUST rs6000_loop_unroll_adjust
1466
1467 #undef TARGET_INIT_BUILTINS
1468 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1469 #undef TARGET_BUILTIN_DECL
1470 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1471
1472 #undef TARGET_FOLD_BUILTIN
1473 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1474 #undef TARGET_GIMPLE_FOLD_BUILTIN
1475 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1476
1477 #undef TARGET_EXPAND_BUILTIN
1478 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1479
1480 #undef TARGET_MANGLE_TYPE
1481 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1482
1483 #undef TARGET_INIT_LIBFUNCS
1484 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1485
1486 #if TARGET_MACHO
1487 #undef TARGET_BINDS_LOCAL_P
1488 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1489 #endif
1490
1491 #undef TARGET_MS_BITFIELD_LAYOUT_P
1492 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1493
1494 #undef TARGET_ASM_OUTPUT_MI_THUNK
1495 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1496
1497 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1498 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1499
1500 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1501 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1502
1503 #undef TARGET_REGISTER_MOVE_COST
1504 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1505 #undef TARGET_MEMORY_MOVE_COST
1506 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1507 #undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
1508 #define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
1509 rs6000_ira_change_pseudo_allocno_class
1510 #undef TARGET_CANNOT_COPY_INSN_P
1511 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1512 #undef TARGET_RTX_COSTS
1513 #define TARGET_RTX_COSTS rs6000_rtx_costs
1514 #undef TARGET_ADDRESS_COST
1515 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1516 #undef TARGET_INSN_COST
1517 #define TARGET_INSN_COST rs6000_insn_cost
1518
1519 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1520 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1521
1522 #undef TARGET_PROMOTE_FUNCTION_MODE
1523 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1524
1525 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
1526 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE rs6000_override_options_after_change
1527
1528 #undef TARGET_RETURN_IN_MEMORY
1529 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1530
1531 #undef TARGET_RETURN_IN_MSB
1532 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1533
1534 #undef TARGET_SETUP_INCOMING_VARARGS
1535 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1536
1537 /* Always strict argument naming on rs6000. */
1538 #undef TARGET_STRICT_ARGUMENT_NAMING
1539 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1540 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1541 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1542 #undef TARGET_SPLIT_COMPLEX_ARG
1543 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1544 #undef TARGET_MUST_PASS_IN_STACK
1545 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1546 #undef TARGET_PASS_BY_REFERENCE
1547 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1548 #undef TARGET_ARG_PARTIAL_BYTES
1549 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1550 #undef TARGET_FUNCTION_ARG_ADVANCE
1551 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1552 #undef TARGET_FUNCTION_ARG
1553 #define TARGET_FUNCTION_ARG rs6000_function_arg
1554 #undef TARGET_FUNCTION_ARG_PADDING
1555 #define TARGET_FUNCTION_ARG_PADDING rs6000_function_arg_padding
1556 #undef TARGET_FUNCTION_ARG_BOUNDARY
1557 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1558
1559 #undef TARGET_BUILD_BUILTIN_VA_LIST
1560 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1561
1562 #undef TARGET_EXPAND_BUILTIN_VA_START
1563 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1564
1565 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1566 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1567
1568 #undef TARGET_EH_RETURN_FILTER_MODE
1569 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1570
1571 #undef TARGET_TRANSLATE_MODE_ATTRIBUTE
1572 #define TARGET_TRANSLATE_MODE_ATTRIBUTE rs6000_translate_mode_attribute
1573
1574 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1575 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1576
1577 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
1578 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
1579 rs6000_libgcc_floating_mode_supported_p
1580
1581 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1582 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1583
1584 #undef TARGET_FLOATN_MODE
1585 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1586
1587 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1588 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1589
1590 #undef TARGET_MD_ASM_ADJUST
1591 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1592
1593 #undef TARGET_OPTION_OVERRIDE
1594 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1595
1596 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1597 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1598 rs6000_builtin_vectorized_function
1599
1600 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1601 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1602 rs6000_builtin_md_vectorized_function
1603
1604 #undef TARGET_STACK_PROTECT_GUARD
1605 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1606
1607 #if !TARGET_MACHO
1608 #undef TARGET_STACK_PROTECT_FAIL
1609 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1610 #endif
1611
1612 #ifdef HAVE_AS_TLS
1613 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1614 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1615 #endif
1616
1617 /* Use a 32-bit anchor range. This leads to sequences like:
1618
1619 addis tmp,anchor,high
1620 add dest,tmp,low
1621
1622 where tmp itself acts as an anchor, and can be shared between
1623 accesses to the same 64k page. */
1624 #undef TARGET_MIN_ANCHOR_OFFSET
1625 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1626 #undef TARGET_MAX_ANCHOR_OFFSET
1627 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1628 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1629 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1630 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1631 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1632
1633 #undef TARGET_BUILTIN_RECIPROCAL
1634 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1635
1636 #undef TARGET_SECONDARY_RELOAD
1637 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1638 #undef TARGET_SECONDARY_MEMORY_NEEDED
1639 #define TARGET_SECONDARY_MEMORY_NEEDED rs6000_secondary_memory_needed
1640 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
1641 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE rs6000_secondary_memory_needed_mode
1642
1643 #undef TARGET_LEGITIMATE_ADDRESS_P
1644 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1645
1646 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1647 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1648
1649 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1650 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1651
1652 #undef TARGET_CAN_ELIMINATE
1653 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1654
1655 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1656 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1657
1658 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1659 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1660
1661 #undef TARGET_TRAMPOLINE_INIT
1662 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1663
1664 #undef TARGET_FUNCTION_VALUE
1665 #define TARGET_FUNCTION_VALUE rs6000_function_value
1666
1667 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1668 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1669
1670 #undef TARGET_OPTION_SAVE
1671 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1672
1673 #undef TARGET_OPTION_RESTORE
1674 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1675
1676 #undef TARGET_OPTION_PRINT
1677 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1678
1679 #undef TARGET_CAN_INLINE_P
1680 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1681
1682 #undef TARGET_SET_CURRENT_FUNCTION
1683 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1684
1685 #undef TARGET_LEGITIMATE_CONSTANT_P
1686 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1687
1688 #undef TARGET_VECTORIZE_VEC_PERM_CONST
1689 #define TARGET_VECTORIZE_VEC_PERM_CONST rs6000_vectorize_vec_perm_const
1690
1691 #undef TARGET_CAN_USE_DOLOOP_P
1692 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1693
1694 #undef TARGET_PREDICT_DOLOOP_P
1695 #define TARGET_PREDICT_DOLOOP_P rs6000_predict_doloop_p
1696
1697 #undef TARGET_HAVE_COUNT_REG_DECR_P
1698 #define TARGET_HAVE_COUNT_REG_DECR_P true
1699
1700 /* 1000000000 is infinite cost in IVOPTs. */
1701 #undef TARGET_DOLOOP_COST_FOR_GENERIC
1702 #define TARGET_DOLOOP_COST_FOR_GENERIC 1000000000
1703
1704 #undef TARGET_DOLOOP_COST_FOR_ADDRESS
1705 #define TARGET_DOLOOP_COST_FOR_ADDRESS 1000000000
1706
1707 #undef TARGET_PREFERRED_DOLOOP_MODE
1708 #define TARGET_PREFERRED_DOLOOP_MODE rs6000_preferred_doloop_mode
1709
1710 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1711 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1712
1713 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1714 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1715 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1716 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1717 #undef TARGET_UNWIND_WORD_MODE
1718 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1719
1720 #undef TARGET_OFFLOAD_OPTIONS
1721 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1722
1723 #undef TARGET_C_MODE_FOR_SUFFIX
1724 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1725
1726 #undef TARGET_INVALID_BINARY_OP
1727 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1728
1729 #undef TARGET_OPTAB_SUPPORTED_P
1730 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1731
1732 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1733 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1734
1735 #undef TARGET_COMPARE_VERSION_PRIORITY
1736 #define TARGET_COMPARE_VERSION_PRIORITY rs6000_compare_version_priority
1737
1738 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
1739 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
1740 rs6000_generate_version_dispatcher_body
1741
1742 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
1743 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
1744 rs6000_get_function_versions_dispatcher
1745
1746 #undef TARGET_OPTION_FUNCTION_VERSIONS
1747 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
1748
1749 #undef TARGET_HARD_REGNO_NREGS
1750 #define TARGET_HARD_REGNO_NREGS rs6000_hard_regno_nregs_hook
1751 #undef TARGET_HARD_REGNO_MODE_OK
1752 #define TARGET_HARD_REGNO_MODE_OK rs6000_hard_regno_mode_ok
1753
1754 #undef TARGET_MODES_TIEABLE_P
1755 #define TARGET_MODES_TIEABLE_P rs6000_modes_tieable_p
1756
1757 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
1758 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
1759 rs6000_hard_regno_call_part_clobbered
1760
1761 #undef TARGET_SLOW_UNALIGNED_ACCESS
1762 #define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access
1763
1764 #undef TARGET_CAN_CHANGE_MODE_CLASS
1765 #define TARGET_CAN_CHANGE_MODE_CLASS rs6000_can_change_mode_class
1766
1767 #undef TARGET_CONSTANT_ALIGNMENT
1768 #define TARGET_CONSTANT_ALIGNMENT rs6000_constant_alignment
1769
1770 #undef TARGET_STARTING_FRAME_OFFSET
1771 #define TARGET_STARTING_FRAME_OFFSET rs6000_starting_frame_offset
1772
1773 #undef TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P
1774 #define TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P hook_bool_void_true
1775
1776 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
1777 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME rs6000_mangle_decl_assembler_name
1778
1779 #undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P
1780 #define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P \
1781 rs6000_cannot_substitute_mem_equiv_p
1782
1783 #undef TARGET_INVALID_CONVERSION
1784 #define TARGET_INVALID_CONVERSION rs6000_invalid_conversion
1785
1786 #undef TARGET_NEED_IPA_FN_TARGET_INFO
1787 #define TARGET_NEED_IPA_FN_TARGET_INFO rs6000_need_ipa_fn_target_info
1788
1789 #undef TARGET_UPDATE_IPA_FN_TARGET_INFO
1790 #define TARGET_UPDATE_IPA_FN_TARGET_INFO rs6000_update_ipa_fn_target_info
1791 \f
1792
1793 /* Processor table. */
1794 struct rs6000_ptt
1795 {
1796 const char *const name; /* Canonical processor name. */
1797 const enum processor_type processor; /* Processor type enum value. */
1798 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1799 };
1800
1801 static struct rs6000_ptt const processor_target_table[] =
1802 {
1803 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1804 #include "rs6000-cpus.def"
1805 #undef RS6000_CPU
1806 };
1807
1808 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1809 name is invalid. */
1810
1811 static int
1812 rs6000_cpu_name_lookup (const char *name)
1813 {
1814 size_t i;
1815
1816 if (name != NULL)
1817 {
1818 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
1819 if (! strcmp (name, processor_target_table[i].name))
1820 return (int)i;
1821 }
1822
1823 return -1;
1824 }
1825
1826 \f
1827 /* Return number of consecutive hard regs needed starting at reg REGNO
1828 to hold something of mode MODE.
1829 This is ordinarily the length in words of a value of mode MODE
1830 but can be less for certain modes in special long registers.
1831
1832 POWER and PowerPC GPRs hold 32 bits worth;
1833 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1834
1835 static int
1836 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
1837 {
1838 unsigned HOST_WIDE_INT reg_size;
1839
1840 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
1841 128-bit floating point that can go in vector registers, which has VSX
1842 memory addressing. */
1843 if (FP_REGNO_P (regno))
1844 reg_size = (VECTOR_MEM_VSX_P (mode)
1845 || VECTOR_ALIGNMENT_P (mode)
1846 || mode == TDOmode
1847 ? UNITS_PER_VSX_WORD
1848 : UNITS_PER_FP_WORD);
1849
1850 else if (ALTIVEC_REGNO_P (regno))
1851 reg_size = UNITS_PER_ALTIVEC_WORD;
1852
1853 else if (DMR_REGNO_P (regno))
1854 reg_size = UNITS_PER_DMR_WORD;
1855
1856 else
1857 reg_size = UNITS_PER_WORD;
1858
1859 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
1860 }
1861
1862 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1863 MODE. */
1864 static int
1865 rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
1866 {
1867 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
1868
1869 if (COMPLEX_MODE_P (mode))
1870 mode = GET_MODE_INNER (mode);
1871
1872 /* Vector pair modes need even/odd VSX register pairs. Only allow vector
1873 registers. */
1874 if (mode == OOmode)
1875 return (TARGET_MMA && VSX_REGNO_P (regno) && (regno & 1) == 0);
1876
1877 /* On ISA 3.1 (power10), MMA accumulator modes need FPR registers divisible
1878 by 4.
1879
1880 If dense math is enabled, allow all VSX registers plus the dense math
1881 registers. We need to make sure we don't cross between the boundary of
1882 FPRs and traditional Altiviec registers. */
1883 if (mode == XOmode)
1884 {
1885 if (TARGET_MMA && !TARGET_DENSE_MATH)
1886 return (FP_REGNO_P (regno) && (regno & 3) == 0);
1887
1888 else if (TARGET_DENSE_MATH)
1889 {
1890 if (DMR_REGNO_P (regno))
1891 return 1;
1892
1893 if (FP_REGNO_P (regno))
1894 return ((regno & 1) == 0 && regno <= LAST_FPR_REGNO - 3);
1895
1896 if (ALTIVEC_REGNO_P (regno))
1897 return ((regno & 1) == 0 && regno <= LAST_ALTIVEC_REGNO - 3);
1898 }
1899
1900 else
1901 return 0;
1902 }
1903
1904 /* Dense math register modes need DMR registers or VSX registers divisible by
1905 2. We need to make sure we don't cross between the boundary of FPRs and
1906 traditional Altiviec registers. */
1907 if (mode == TDOmode)
1908 {
1909 if (!TARGET_DENSE_MATH)
1910 return 0;
1911
1912 if (DMR_REGNO_P (regno))
1913 return 1;
1914
1915 if (FP_REGNO_P (regno))
1916 return ((regno & 1) == 0 && regno <= LAST_FPR_REGNO - 7);
1917
1918 if (ALTIVEC_REGNO_P (regno))
1919 return ((regno & 1) == 0 && regno <= LAST_ALTIVEC_REGNO - 7);
1920
1921 return 0;
1922 }
1923
1924 /* No other types other than XOmode or TDOmode can go in DMRs. */
1925 if (DMR_REGNO_P (regno))
1926 return 0;
1927
1928 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1929 register combinations, and use PTImode where we need to deal with quad
1930 word memory operations. Don't allow quad words in the argument or frame
1931 pointer registers, just registers 0..31. */
1932 if (mode == PTImode)
1933 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1934 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1935 && ((regno & 1) == 0));
1936
1937 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1938 implementations. Don't allow an item to be split between a FP register
1939 and an Altivec register. Allow TImode in all VSX registers if the user
1940 asked for it. */
1941 if (TARGET_VSX && VSX_REGNO_P (regno)
1942 && (VECTOR_MEM_VSX_P (mode)
1943 || VECTOR_ALIGNMENT_P (mode)
1944 || reg_addr[mode].scalar_in_vmx_p
1945 || mode == TImode
1946 || (TARGET_VADDUQM && mode == V1TImode)))
1947 {
1948 if (FP_REGNO_P (regno))
1949 return FP_REGNO_P (last_regno);
1950
1951 if (ALTIVEC_REGNO_P (regno))
1952 {
1953 if (GET_MODE_SIZE (mode) < 16 && !reg_addr[mode].scalar_in_vmx_p)
1954 return 0;
1955
1956 return ALTIVEC_REGNO_P (last_regno);
1957 }
1958 }
1959
1960 /* The GPRs can hold any mode, but values bigger than one register
1961 cannot go past R31. */
1962 if (INT_REGNO_P (regno))
1963 return INT_REGNO_P (last_regno);
1964
1965 /* The float registers (except for VSX vector modes) can only hold floating
1966 modes and DImode. */
1967 if (FP_REGNO_P (regno))
1968 {
1969 if (VECTOR_ALIGNMENT_P (mode))
1970 return false;
1971
1972 if (SCALAR_FLOAT_MODE_P (mode)
1973 && (mode != TDmode || (regno % 2) == 0)
1974 && FP_REGNO_P (last_regno))
1975 return 1;
1976
1977 if (GET_MODE_CLASS (mode) == MODE_INT)
1978 {
1979 if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
1980 return 1;
1981
1982 if (TARGET_P8_VECTOR && (mode == SImode))
1983 return 1;
1984
1985 if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode))
1986 return 1;
1987 }
1988
1989 return 0;
1990 }
1991
1992 /* The CR register can only hold CC modes. */
1993 if (CR_REGNO_P (regno))
1994 return GET_MODE_CLASS (mode) == MODE_CC;
1995
1996 if (CA_REGNO_P (regno))
1997 return mode == Pmode || mode == SImode;
1998
1999 /* AltiVec only in AldyVec registers. */
2000 if (ALTIVEC_REGNO_P (regno))
2001 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
2002 || mode == V1TImode);
2003
2004 /* We cannot put non-VSX TImode or PTImode anywhere except general register
2005 and it must be able to fit within the register set. */
2006
2007 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
2008 }
2009
2010 /* Implement TARGET_HARD_REGNO_NREGS. */
2011
2012 static unsigned int
2013 rs6000_hard_regno_nregs_hook (unsigned int regno, machine_mode mode)
2014 {
2015 return rs6000_hard_regno_nregs[mode][regno];
2016 }
2017
2018 /* Implement TARGET_HARD_REGNO_MODE_OK. */
2019
2020 static bool
2021 rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
2022 {
2023 return rs6000_hard_regno_mode_ok_p[mode][regno];
2024 }
2025
2026 /* Implement TARGET_MODES_TIEABLE_P.
2027
2028 PTImode cannot tie with other modes because PTImode is restricted to even
2029 GPR registers, and TImode can go in any GPR as well as VSX registers (PR
2030 57744).
2031
2032 Similarly, don't allow OOmode (vector pair), XOmode (vector quad), or
2033 TDOmode (dmr register) to pair with anything else. Vector pairs are
2034 restricted to even/odd VSX registers. Without dense math, vector quads are
2035 limited to FPR registers divisible by 4. With dense math, vector quads are
2036 limited to even VSX registers or DMR registers.
2037
2038 Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
2039 128-bit floating point on VSX systems ties with other vectors. */
2040
2041 static bool
2042 rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
2043 {
2044 if (mode1 == PTImode || mode1 == OOmode || mode1 == XOmode
2045 || mode1 == TDOmode || mode2 == PTImode || mode2 == OOmode
2046 || mode2 == XOmode || mode2 == TDOmode)
2047 return mode1 == mode2;
2048
2049 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
2050 return ALTIVEC_OR_VSX_VECTOR_MODE (mode2);
2051 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode2))
2052 return false;
2053
2054 if (SCALAR_FLOAT_MODE_P (mode1))
2055 return SCALAR_FLOAT_MODE_P (mode2);
2056 if (SCALAR_FLOAT_MODE_P (mode2))
2057 return false;
2058
2059 if (GET_MODE_CLASS (mode1) == MODE_CC)
2060 return GET_MODE_CLASS (mode2) == MODE_CC;
2061 if (GET_MODE_CLASS (mode2) == MODE_CC)
2062 return false;
2063
2064 return true;
2065 }
2066
2067 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. */
2068
2069 static bool
2070 rs6000_hard_regno_call_part_clobbered (unsigned int, unsigned int regno,
2071 machine_mode mode)
2072 {
2073 if (TARGET_32BIT
2074 && TARGET_POWERPC64
2075 && GET_MODE_SIZE (mode) > 4
2076 && INT_REGNO_P (regno))
2077 return true;
2078
2079 if (TARGET_VSX
2080 && FP_REGNO_P (regno)
2081 && GET_MODE_SIZE (mode) > 8
2082 && !FLOAT128_2REG_P (mode))
2083 return true;
2084
2085 return false;
2086 }
2087
2088 /* Print interesting facts about registers. */
2089 static void
2090 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
2091 {
2092 int r, m;
2093
2094 for (r = first_regno; r <= last_regno; ++r)
2095 {
2096 const char *comma = "";
2097 int len;
2098
2099 if (first_regno == last_regno)
2100 fprintf (stderr, "%s:\t", reg_name);
2101 else
2102 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
2103
2104 len = 8;
2105 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2106 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
2107 {
2108 if (len > 70)
2109 {
2110 fprintf (stderr, ",\n\t");
2111 len = 8;
2112 comma = "";
2113 }
2114
2115 if (rs6000_hard_regno_nregs[m][r] > 1)
2116 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
2117 rs6000_hard_regno_nregs[m][r]);
2118 else
2119 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
2120
2121 comma = ", ";
2122 }
2123
2124 if (call_used_or_fixed_reg_p (r))
2125 {
2126 if (len > 70)
2127 {
2128 fprintf (stderr, ",\n\t");
2129 len = 8;
2130 comma = "";
2131 }
2132
2133 len += fprintf (stderr, "%s%s", comma, "call-used");
2134 comma = ", ";
2135 }
2136
2137 if (fixed_regs[r])
2138 {
2139 if (len > 70)
2140 {
2141 fprintf (stderr, ",\n\t");
2142 len = 8;
2143 comma = "";
2144 }
2145
2146 len += fprintf (stderr, "%s%s", comma, "fixed");
2147 comma = ", ";
2148 }
2149
2150 if (len > 70)
2151 {
2152 fprintf (stderr, ",\n\t");
2153 comma = "";
2154 }
2155
2156 len += fprintf (stderr, "%sreg-class = %s", comma,
2157 reg_class_names[(int)rs6000_regno_regclass[r]]);
2158 comma = ", ";
2159
2160 if (len > 70)
2161 {
2162 fprintf (stderr, ",\n\t");
2163 comma = "";
2164 }
2165
2166 fprintf (stderr, "%sregno = %d\n", comma, r);
2167 }
2168 }
2169
2170 static const char *
2171 rs6000_debug_vector_unit (enum rs6000_vector v)
2172 {
2173 const char *ret;
2174
2175 switch (v)
2176 {
2177 case VECTOR_NONE: ret = "none"; break;
2178 case VECTOR_ALTIVEC: ret = "altivec"; break;
2179 case VECTOR_VSX: ret = "vsx"; break;
2180 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
2181 default: ret = "unknown"; break;
2182 }
2183
2184 return ret;
2185 }
2186
2187 /* Inner function printing just the address mask for a particular reload
2188 register class. */
2189 DEBUG_FUNCTION char *
2190 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
2191 {
2192 static char ret[8];
2193 char *p = ret;
2194
2195 if ((mask & RELOAD_REG_VALID) != 0)
2196 *p++ = 'v';
2197 else if (keep_spaces)
2198 *p++ = ' ';
2199
2200 if ((mask & RELOAD_REG_MULTIPLE) != 0)
2201 *p++ = 'm';
2202 else if (keep_spaces)
2203 *p++ = ' ';
2204
2205 if ((mask & RELOAD_REG_INDEXED) != 0)
2206 *p++ = 'i';
2207 else if (keep_spaces)
2208 *p++ = ' ';
2209
2210 if ((mask & RELOAD_REG_QUAD_OFFSET) != 0)
2211 *p++ = 'O';
2212 else if ((mask & RELOAD_REG_OFFSET) != 0)
2213 *p++ = 'o';
2214 else if (keep_spaces)
2215 *p++ = ' ';
2216
2217 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2218 *p++ = '+';
2219 else if (keep_spaces)
2220 *p++ = ' ';
2221
2222 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2223 *p++ = '+';
2224 else if (keep_spaces)
2225 *p++ = ' ';
2226
2227 if ((mask & RELOAD_REG_AND_M16) != 0)
2228 *p++ = '&';
2229 else if (keep_spaces)
2230 *p++ = ' ';
2231
2232 *p = '\0';
2233
2234 return ret;
2235 }
2236
2237 /* Print the address masks in a human readble fashion. */
2238 DEBUG_FUNCTION void
2239 rs6000_debug_print_mode (ssize_t m)
2240 {
2241 ssize_t rc;
2242 int spaces = 0;
2243
2244 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2245 for (rc = 0; rc < N_RELOAD_REG; rc++)
2246 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2247 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2248
2249 if ((reg_addr[m].reload_store != CODE_FOR_nothing)
2250 || (reg_addr[m].reload_load != CODE_FOR_nothing))
2251 {
2252 fprintf (stderr, "%*s Reload=%c%c", spaces, "",
2253 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2254 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
2255 spaces = 0;
2256 }
2257 else
2258 spaces += strlen (" Reload=sl");
2259
2260 if (reg_addr[m].scalar_in_vmx_p)
2261 {
2262 fprintf (stderr, "%*s Upper=y", spaces, "");
2263 spaces = 0;
2264 }
2265 else
2266 spaces += strlen (" Upper=y");
2267
2268 if (rs6000_vector_unit[m] != VECTOR_NONE
2269 || rs6000_vector_mem[m] != VECTOR_NONE)
2270 {
2271 fprintf (stderr, "%*s vector: arith=%-10s mem=%s",
2272 spaces, "",
2273 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2274 rs6000_debug_vector_unit (rs6000_vector_mem[m]));
2275 }
2276
2277 fputs ("\n", stderr);
2278 }
2279
2280 #define DEBUG_FMT_ID "%-32s= "
2281 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2282 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2283 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2284
2285 /* Print various interesting information with -mdebug=reg. */
2286 static void
2287 rs6000_debug_reg_global (void)
2288 {
2289 static const char *const tf[2] = { "false", "true" };
2290 const char *nl = (const char *)0;
2291 int m;
2292 size_t m1, m2, v;
2293 char costly_num[20];
2294 char nop_num[20];
2295 char flags_buffer[40];
2296 const char *costly_str;
2297 const char *nop_str;
2298 const char *trace_str;
2299 const char *abi_str;
2300 const char *cmodel_str;
2301 struct cl_target_option cl_opts;
2302
2303 /* Modes we want tieable information on. */
2304 static const machine_mode print_tieable_modes[] = {
2305 QImode,
2306 HImode,
2307 SImode,
2308 DImode,
2309 TImode,
2310 PTImode,
2311 SFmode,
2312 DFmode,
2313 TFmode,
2314 IFmode,
2315 KFmode,
2316 SDmode,
2317 DDmode,
2318 TDmode,
2319 V2SImode,
2320 V2SFmode,
2321 V16QImode,
2322 V8HImode,
2323 V4SImode,
2324 V2DImode,
2325 V1TImode,
2326 V32QImode,
2327 V16HImode,
2328 V8SImode,
2329 V4DImode,
2330 V2TImode,
2331 V4SFmode,
2332 V2DFmode,
2333 V8SFmode,
2334 V4DFmode,
2335 OOmode,
2336 XOmode,
2337 TDOmode,
2338 CCmode,
2339 CCUNSmode,
2340 CCEQmode,
2341 CCFPmode,
2342 };
2343
2344 /* Virtual regs we are interested in. */
2345 const static struct {
2346 int regno; /* register number. */
2347 const char *name; /* register name. */
2348 } virtual_regs[] = {
2349 { STACK_POINTER_REGNUM, "stack pointer:" },
2350 { TOC_REGNUM, "toc: " },
2351 { STATIC_CHAIN_REGNUM, "static chain: " },
2352 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2353 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2354 { ARG_POINTER_REGNUM, "arg pointer: " },
2355 { FRAME_POINTER_REGNUM, "frame pointer:" },
2356 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2357 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2358 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2359 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2360 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2361 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2362 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2363 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2364 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2365 };
2366
2367 fputs ("\nHard register information:\n", stderr);
2368 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2369 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2370 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2371 LAST_ALTIVEC_REGNO,
2372 "vs");
2373 rs6000_debug_reg_print (FIRST_DMR_REGNO, LAST_DMR_REGNO, "dmr");
2374 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2375 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2376 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2377 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2378 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2379 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2380
2381 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2382 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2383 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2384
2385 fprintf (stderr,
2386 "\n"
2387 "d reg_class = %s\n"
2388 "v reg_class = %s\n"
2389 "wa reg_class = %s\n"
2390 "we reg_class = %s\n"
2391 "wr reg_class = %s\n"
2392 "wx reg_class = %s\n"
2393 "wA reg_class = %s\n"
2394 "wD reg_class = %s\n"
2395 "\n",
2396 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2397 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2398 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2399 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
2400 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2401 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2402 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]],
2403 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wD]]);
2404
2405 nl = "\n";
2406 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2407 rs6000_debug_print_mode (m);
2408
2409 fputs ("\n", stderr);
2410
2411 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2412 {
2413 machine_mode mode1 = print_tieable_modes[m1];
2414 bool first_time = true;
2415
2416 nl = (const char *)0;
2417 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2418 {
2419 machine_mode mode2 = print_tieable_modes[m2];
2420 if (mode1 != mode2 && rs6000_modes_tieable_p (mode1, mode2))
2421 {
2422 if (first_time)
2423 {
2424 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2425 nl = "\n";
2426 first_time = false;
2427 }
2428
2429 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2430 }
2431 }
2432
2433 if (!first_time)
2434 fputs ("\n", stderr);
2435 }
2436
2437 if (nl)
2438 fputs (nl, stderr);
2439
2440 if (rs6000_recip_control)
2441 {
2442 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2443
2444 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2445 if (rs6000_recip_bits[m])
2446 {
2447 fprintf (stderr,
2448 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2449 GET_MODE_NAME (m),
2450 (RS6000_RECIP_AUTO_RE_P (m)
2451 ? "auto"
2452 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2453 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2454 ? "auto"
2455 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2456 }
2457
2458 fputs ("\n", stderr);
2459 }
2460
2461 if (rs6000_cpu_index >= 0)
2462 {
2463 const char *name = processor_target_table[rs6000_cpu_index].name;
2464 HOST_WIDE_INT flags
2465 = processor_target_table[rs6000_cpu_index].target_enable;
2466
2467 sprintf (flags_buffer, "-mcpu=%s flags", name);
2468 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2469 }
2470 else
2471 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2472
2473 if (rs6000_tune_index >= 0)
2474 {
2475 const char *name = processor_target_table[rs6000_tune_index].name;
2476 HOST_WIDE_INT flags
2477 = processor_target_table[rs6000_tune_index].target_enable;
2478
2479 sprintf (flags_buffer, "-mtune=%s flags", name);
2480 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2481 }
2482 else
2483 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2484
2485 cl_target_option_save (&cl_opts, &global_options, &global_options_set);
2486 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2487 rs6000_isa_flags);
2488
2489 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2490 rs6000_isa_flags_explicit);
2491
2492 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2493
2494 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2495 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2496
2497 switch (rs6000_sched_costly_dep)
2498 {
2499 case max_dep_latency:
2500 costly_str = "max_dep_latency";
2501 break;
2502
2503 case no_dep_costly:
2504 costly_str = "no_dep_costly";
2505 break;
2506
2507 case all_deps_costly:
2508 costly_str = "all_deps_costly";
2509 break;
2510
2511 case true_store_to_load_dep_costly:
2512 costly_str = "true_store_to_load_dep_costly";
2513 break;
2514
2515 case store_to_load_dep_costly:
2516 costly_str = "store_to_load_dep_costly";
2517 break;
2518
2519 default:
2520 costly_str = costly_num;
2521 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2522 break;
2523 }
2524
2525 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2526
2527 switch (rs6000_sched_insert_nops)
2528 {
2529 case sched_finish_regroup_exact:
2530 nop_str = "sched_finish_regroup_exact";
2531 break;
2532
2533 case sched_finish_pad_groups:
2534 nop_str = "sched_finish_pad_groups";
2535 break;
2536
2537 case sched_finish_none:
2538 nop_str = "sched_finish_none";
2539 break;
2540
2541 default:
2542 nop_str = nop_num;
2543 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2544 break;
2545 }
2546
2547 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2548
2549 switch (rs6000_sdata)
2550 {
2551 default:
2552 case SDATA_NONE:
2553 break;
2554
2555 case SDATA_DATA:
2556 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2557 break;
2558
2559 case SDATA_SYSV:
2560 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2561 break;
2562
2563 case SDATA_EABI:
2564 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2565 break;
2566
2567 }
2568
2569 switch (rs6000_traceback)
2570 {
2571 case traceback_default: trace_str = "default"; break;
2572 case traceback_none: trace_str = "none"; break;
2573 case traceback_part: trace_str = "part"; break;
2574 case traceback_full: trace_str = "full"; break;
2575 default: trace_str = "unknown"; break;
2576 }
2577
2578 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2579
2580 switch (rs6000_current_cmodel)
2581 {
2582 case CMODEL_SMALL: cmodel_str = "small"; break;
2583 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2584 case CMODEL_LARGE: cmodel_str = "large"; break;
2585 default: cmodel_str = "unknown"; break;
2586 }
2587
2588 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2589
2590 switch (rs6000_current_abi)
2591 {
2592 case ABI_NONE: abi_str = "none"; break;
2593 case ABI_AIX: abi_str = "aix"; break;
2594 case ABI_ELFv2: abi_str = "ELFv2"; break;
2595 case ABI_V4: abi_str = "V4"; break;
2596 case ABI_DARWIN: abi_str = "darwin"; break;
2597 default: abi_str = "unknown"; break;
2598 }
2599
2600 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2601
2602 if (rs6000_altivec_abi)
2603 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2604
2605 if (rs6000_aix_extabi)
2606 fprintf (stderr, DEBUG_FMT_S, "AIX vec-extabi", "true");
2607
2608 if (rs6000_darwin64_abi)
2609 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2610
2611 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2612 (TARGET_SOFT_FLOAT ? "true" : "false"));
2613
2614 if (TARGET_LINK_STACK)
2615 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2616
2617 if (TARGET_P8_FUSION)
2618 {
2619 char options[80];
2620
2621 strcpy (options, "power8");
2622 if (TARGET_P8_FUSION_SIGN)
2623 strcat (options, ", sign");
2624
2625 fprintf (stderr, DEBUG_FMT_S, "fusion", options);
2626 }
2627
2628 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2629 TARGET_SECURE_PLT ? "secure" : "bss");
2630 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2631 aix_struct_return ? "aix" : "sysv");
2632 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2633 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2634 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2635 tf[!!rs6000_align_branch_targets]);
2636 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2637 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2638 rs6000_long_double_type_size);
2639 if (rs6000_long_double_type_size > 64)
2640 {
2641 fprintf (stderr, DEBUG_FMT_S, "long double type",
2642 TARGET_IEEEQUAD ? "IEEE" : "IBM");
2643 fprintf (stderr, DEBUG_FMT_S, "default long double type",
2644 TARGET_IEEEQUAD_DEFAULT ? "IEEE" : "IBM");
2645 }
2646 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2647 (int)rs6000_sched_restricted_insns_priority);
2648 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2649 (int)END_BUILTINS);
2650
2651 fprintf (stderr, DEBUG_FMT_D, "Enable float128 on VSX",
2652 (int)TARGET_FLOAT128_ENABLE_TYPE);
2653
2654 if (TARGET_VSX)
2655 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2656 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2657
2658 if (TARGET_DIRECT_MOVE_128)
2659 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element",
2660 (int)VECTOR_ELEMENT_MFVSRLD_64BIT);
2661 }
2662
2663 \f
2664 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2665 legitimate address support to figure out the appropriate addressing to
2666 use. */
2667
2668 static void
2669 rs6000_setup_reg_addr_masks (void)
2670 {
2671 ssize_t rc, reg, m, nregs;
2672 addr_mask_type any_addr_mask, addr_mask;
2673
2674 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2675 {
2676 machine_mode m2 = (machine_mode) m;
2677 bool complex_p = false;
2678 bool small_int_p = (m2 == QImode || m2 == HImode || m2 == SImode);
2679 size_t msize;
2680
2681 if (COMPLEX_MODE_P (m2))
2682 {
2683 complex_p = true;
2684 m2 = GET_MODE_INNER (m2);
2685 }
2686
2687 msize = GET_MODE_SIZE (m2);
2688
2689 /* SDmode is special in that we want to access it only via REG+REG
2690 addressing on power7 and above, since we want to use the LFIWZX and
2691 STFIWZX instructions to load it. */
2692 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2693
2694 any_addr_mask = 0;
2695 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2696 {
2697 addr_mask = 0;
2698 reg = reload_reg_map[rc].reg;
2699
2700 /* Special case DMR registers. */
2701 if (rc == RELOAD_REG_DMR)
2702 {
2703 if (TARGET_DENSE_MATH && (m2 == XOmode || m2 == TDOmode))
2704 {
2705 addr_mask = RELOAD_REG_VALID;
2706 reg_addr[m].addr_mask[rc] = addr_mask;
2707 any_addr_mask |= addr_mask;
2708 }
2709 else
2710 reg_addr[m].addr_mask[rc] = 0;
2711
2712 continue;
2713 }
2714
2715 /* Can mode values go in the GPR/FPR/Altivec registers? */
2716 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2717 {
2718 bool small_int_vsx_p = (small_int_p
2719 && (rc == RELOAD_REG_FPR
2720 || rc == RELOAD_REG_VMX));
2721
2722 nregs = rs6000_hard_regno_nregs[m][reg];
2723 addr_mask |= RELOAD_REG_VALID;
2724
2725 /* Indicate if the mode takes more than 1 physical register. If
2726 it takes a single register, indicate it can do REG+REG
2727 addressing. Small integers in VSX registers can only do
2728 REG+REG addressing. */
2729 if (small_int_vsx_p)
2730 addr_mask |= RELOAD_REG_INDEXED;
2731 else if (nregs > 1 || m == BLKmode || complex_p)
2732 addr_mask |= RELOAD_REG_MULTIPLE;
2733 else
2734 addr_mask |= RELOAD_REG_INDEXED;
2735
2736 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2737 addressing. If we allow scalars into Altivec registers,
2738 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY.
2739
2740 For VSX systems, we don't allow update addressing for
2741 DFmode/SFmode if those registers can go in both the
2742 traditional floating point registers and Altivec registers.
2743 The load/store instructions for the Altivec registers do not
2744 have update forms. If we allowed update addressing, it seems
2745 to break IV-OPT code using floating point if the index type is
2746 int instead of long (PR target/81550 and target/84042). */
2747
2748 if (TARGET_UPDATE
2749 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2750 && msize <= 8
2751 && !VECTOR_MODE_P (m2)
2752 && !VECTOR_ALIGNMENT_P (m2)
2753 && !complex_p
2754 && (m != E_DFmode || !TARGET_VSX)
2755 && (m != E_SFmode || !TARGET_P8_VECTOR)
2756 && !small_int_vsx_p)
2757 {
2758 addr_mask |= RELOAD_REG_PRE_INCDEC;
2759
2760 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2761 we don't allow PRE_MODIFY for some multi-register
2762 operations. */
2763 switch (m)
2764 {
2765 default:
2766 addr_mask |= RELOAD_REG_PRE_MODIFY;
2767 break;
2768
2769 case E_DImode:
2770 if (TARGET_POWERPC64)
2771 addr_mask |= RELOAD_REG_PRE_MODIFY;
2772 break;
2773
2774 case E_DFmode:
2775 case E_DDmode:
2776 if (TARGET_HARD_FLOAT)
2777 addr_mask |= RELOAD_REG_PRE_MODIFY;
2778 break;
2779 }
2780 }
2781 }
2782
2783 /* GPR and FPR registers can do REG+OFFSET addressing, except
2784 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
2785 for 64-bit scalars and 32-bit SFmode to altivec registers. */
2786 if ((addr_mask != 0) && !indexed_only_p
2787 && msize <= 8
2788 && (rc == RELOAD_REG_GPR
2789 || ((msize == 8 || m2 == SFmode)
2790 && (rc == RELOAD_REG_FPR
2791 || (rc == RELOAD_REG_VMX && TARGET_P9_VECTOR)))))
2792 addr_mask |= RELOAD_REG_OFFSET;
2793
2794 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2795 instructions are enabled. The offset for 128-bit VSX registers is
2796 only 12-bits. While GPRs can handle the full offset range, VSX
2797 registers can only handle the restricted range. */
2798 else if ((addr_mask != 0) && !indexed_only_p
2799 && msize == 16 && TARGET_P9_VECTOR
2800 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2)
2801 || (m2 == TImode && TARGET_VSX)))
2802 {
2803 addr_mask |= RELOAD_REG_OFFSET;
2804 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2805 addr_mask |= RELOAD_REG_QUAD_OFFSET;
2806 }
2807
2808 /* Vector pairs can do both indexed and offset loads if the
2809 instructions are enabled, otherwise they can only do offset loads
2810 since it will be broken into two vector moves. Vector quads and
2811 1,024 bit DMR values can only do offset loads. */
2812 else if ((addr_mask != 0) && TARGET_MMA
2813 && (m2 == OOmode || m2 == XOmode || m2 == TDOmode))
2814 {
2815 addr_mask |= RELOAD_REG_OFFSET;
2816 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2817 {
2818 addr_mask |= RELOAD_REG_QUAD_OFFSET;
2819 if (m2 == OOmode)
2820 addr_mask |= RELOAD_REG_INDEXED;
2821 }
2822 }
2823
2824 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2825 addressing on 128-bit types. */
2826 if (rc == RELOAD_REG_VMX && msize == 16
2827 && (addr_mask & RELOAD_REG_VALID) != 0)
2828 addr_mask |= RELOAD_REG_AND_M16;
2829
2830 reg_addr[m].addr_mask[rc] = addr_mask;
2831 any_addr_mask |= addr_mask;
2832 }
2833
2834 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2835 }
2836 }
2837
2838 \f
2839 /* Initialize the various global tables that are based on register size. */
2840 static void
2841 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2842 {
2843 ssize_t r, m, c;
2844 int align64;
2845 int align32;
2846
2847 /* Precalculate REGNO_REG_CLASS. */
2848 rs6000_regno_regclass[0] = GENERAL_REGS;
2849 for (r = 1; r < 32; ++r)
2850 rs6000_regno_regclass[r] = BASE_REGS;
2851
2852 for (r = 32; r < 64; ++r)
2853 rs6000_regno_regclass[r] = FLOAT_REGS;
2854
2855 for (r = 64; HARD_REGISTER_NUM_P (r); ++r)
2856 rs6000_regno_regclass[r] = NO_REGS;
2857
2858 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2859 rs6000_regno_regclass[r] = ALTIVEC_REGS;
2860
2861 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2862 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
2863 rs6000_regno_regclass[r] = CR_REGS;
2864
2865 for (r = FIRST_DMR_REGNO; r <= LAST_DMR_REGNO; ++r)
2866 rs6000_regno_regclass[r] = DM_REGS;
2867
2868 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
2869 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
2870 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
2871 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
2872 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
2873 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
2874 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
2875
2876 /* Precalculate register class to simpler reload register class. We don't
2877 need all of the register classes that are combinations of different
2878 classes, just the simple ones that have constraint letters. */
2879 for (c = 0; c < N_REG_CLASSES; c++)
2880 reg_class_to_reg_type[c] = NO_REG_TYPE;
2881
2882 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
2883 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
2884 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
2885 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
2886 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
2887 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
2888 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
2889 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
2890 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
2891 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
2892 reg_class_to_reg_type[(int)DM_REGS] = DMR_REG_TYPE;
2893
2894 if (TARGET_VSX)
2895 {
2896 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
2897 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
2898 }
2899 else
2900 {
2901 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
2902 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
2903 }
2904
2905 /* Precalculate the valid memory formats as well as the vector information,
2906 this must be set up before the rs6000_hard_regno_nregs_internal calls
2907 below. */
2908 gcc_assert ((int)VECTOR_NONE == 0);
2909 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
2910 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_mem));
2911
2912 gcc_assert ((int)CODE_FOR_nothing == 0);
2913 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
2914
2915 gcc_assert ((int)NO_REGS == 0);
2916 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
2917
2918 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2919 believes it can use native alignment or still uses 128-bit alignment. */
2920 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
2921 {
2922 align64 = 64;
2923 align32 = 32;
2924 }
2925 else
2926 {
2927 align64 = 128;
2928 align32 = 128;
2929 }
2930
2931 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
2932 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
2933 if (TARGET_FLOAT128_TYPE)
2934 {
2935 rs6000_vector_mem[KFmode] = VECTOR_VSX;
2936 rs6000_vector_align[KFmode] = 128;
2937
2938 if (FLOAT128_IEEE_P (TFmode))
2939 {
2940 rs6000_vector_mem[TFmode] = VECTOR_VSX;
2941 rs6000_vector_align[TFmode] = 128;
2942 }
2943 }
2944
2945 /* V2DF mode, VSX only. */
2946 if (TARGET_VSX)
2947 {
2948 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
2949 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
2950 rs6000_vector_align[V2DFmode] = align64;
2951 }
2952
2953 /* V4SF mode, either VSX or Altivec. */
2954 if (TARGET_VSX)
2955 {
2956 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
2957 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
2958 rs6000_vector_align[V4SFmode] = align32;
2959 }
2960 else if (TARGET_ALTIVEC)
2961 {
2962 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
2963 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
2964 rs6000_vector_align[V4SFmode] = align32;
2965 }
2966
2967 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2968 and stores. */
2969 if (TARGET_ALTIVEC)
2970 {
2971 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
2972 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
2973 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
2974 rs6000_vector_align[V4SImode] = align32;
2975 rs6000_vector_align[V8HImode] = align32;
2976 rs6000_vector_align[V16QImode] = align32;
2977
2978 if (TARGET_VSX)
2979 {
2980 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
2981 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
2982 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
2983 }
2984 else
2985 {
2986 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
2987 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
2988 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
2989 }
2990 }
2991
2992 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
2993 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
2994 if (TARGET_VSX)
2995 {
2996 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
2997 rs6000_vector_unit[V2DImode]
2998 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2999 rs6000_vector_align[V2DImode] = align64;
3000
3001 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
3002 rs6000_vector_unit[V1TImode]
3003 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
3004 rs6000_vector_align[V1TImode] = 128;
3005 }
3006
3007 /* DFmode, see if we want to use the VSX unit. Memory is handled
3008 differently, so don't set rs6000_vector_mem. */
3009 if (TARGET_VSX)
3010 {
3011 rs6000_vector_unit[DFmode] = VECTOR_VSX;
3012 rs6000_vector_align[DFmode] = 64;
3013 }
3014
3015 /* SFmode, see if we want to use the VSX unit. */
3016 if (TARGET_P8_VECTOR)
3017 {
3018 rs6000_vector_unit[SFmode] = VECTOR_VSX;
3019 rs6000_vector_align[SFmode] = 32;
3020 }
3021
3022 /* Allow TImode in VSX register and set the VSX memory macros. */
3023 if (TARGET_VSX)
3024 {
3025 rs6000_vector_mem[TImode] = VECTOR_VSX;
3026 rs6000_vector_align[TImode] = align64;
3027 }
3028
3029 /* Add support for vector pairs and vector quad registers. */
3030 if (TARGET_MMA)
3031 {
3032 rs6000_vector_unit[OOmode] = VECTOR_NONE;
3033 rs6000_vector_mem[OOmode] = VECTOR_VSX;
3034 rs6000_vector_align[OOmode] = 256;
3035
3036 rs6000_vector_unit[XOmode] = VECTOR_NONE;
3037 rs6000_vector_mem[XOmode] = VECTOR_VSX;
3038 rs6000_vector_align[XOmode] = 512;
3039 }
3040
3041 /* Add support for 1,024 bit DMR registers. */
3042 if (TARGET_DENSE_MATH)
3043 {
3044 rs6000_vector_unit[TDOmode] = VECTOR_NONE;
3045 rs6000_vector_mem[TDOmode] = VECTOR_VSX;
3046 rs6000_vector_align[TDOmode] = 512;
3047 }
3048
3049 /* Register class constraints for the constraints that depend on compile
3050 switches. When the VSX code was added, different constraints were added
3051 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
3052 of the VSX registers are used. The register classes for scalar floating
3053 point types is set, based on whether we allow that type into the upper
3054 (Altivec) registers. GCC has register classes to target the Altivec
3055 registers for load/store operations, to select using a VSX memory
3056 operation instead of the traditional floating point operation. The
3057 constraints are:
3058
3059 d - Register class to use with traditional DFmode instructions.
3060 v - Altivec register.
3061 wa - Any VSX register.
3062 wc - Reserved to represent individual CR bits (used in LLVM).
3063 wn - always NO_REGS.
3064 wr - GPR if 64-bit mode is permitted.
3065 wx - Float register if we can do 32-bit int stores. */
3066
3067 if (TARGET_HARD_FLOAT)
3068 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS;
3069 if (TARGET_ALTIVEC)
3070 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
3071 if (TARGET_VSX)
3072 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
3073
3074 if (TARGET_POWERPC64)
3075 {
3076 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
3077 rs6000_constraints[RS6000_CONSTRAINT_wA] = BASE_REGS;
3078 }
3079
3080 if (TARGET_STFIWX)
3081 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
3082
3083 /* Support for new direct moves (ISA 3.0 + 64bit). */
3084 if (TARGET_DIRECT_MOVE_128)
3085 rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
3086
3087 /* Support for the accumulator registers, either FPR registers (aka original
3088 mma) or DMR registers (dense math). */
3089 if (TARGET_DENSE_MATH)
3090 rs6000_constraints[RS6000_CONSTRAINT_wD] = DM_REGS;
3091 else if (TARGET_MMA)
3092 rs6000_constraints[RS6000_CONSTRAINT_wD] = FLOAT_REGS;
3093
3094 /* Set up the reload helper and direct move functions. */
3095 if (TARGET_VSX || TARGET_ALTIVEC)
3096 {
3097 if (TARGET_64BIT)
3098 {
3099 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
3100 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
3101 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
3102 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
3103 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
3104 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
3105 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
3106 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
3107 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
3108 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
3109 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
3110 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
3111 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
3112 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
3113 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
3114 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
3115 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
3116 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
3117 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
3118 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
3119
3120 if (FLOAT128_VECTOR_P (KFmode))
3121 {
3122 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store;
3123 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load;
3124 }
3125
3126 if (FLOAT128_VECTOR_P (TFmode))
3127 {
3128 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store;
3129 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load;
3130 }
3131
3132 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3133 available. */
3134 if (TARGET_NO_SDMODE_STACK)
3135 {
3136 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
3137 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
3138 }
3139
3140 if (TARGET_VSX)
3141 {
3142 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
3143 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
3144 }
3145
3146 if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128)
3147 {
3148 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
3149 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
3150 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
3151 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
3152 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
3153 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
3154 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
3155 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
3156 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
3157
3158 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
3159 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
3160 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
3161 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
3162 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
3163 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
3164 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
3165 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
3166 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
3167
3168 if (FLOAT128_VECTOR_P (KFmode))
3169 {
3170 reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf;
3171 reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf;
3172 }
3173
3174 if (FLOAT128_VECTOR_P (TFmode))
3175 {
3176 reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf;
3177 reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf;
3178 }
3179
3180 if (TARGET_MMA)
3181 {
3182 reg_addr[OOmode].reload_store = CODE_FOR_reload_oo_di_store;
3183 reg_addr[OOmode].reload_load = CODE_FOR_reload_oo_di_load;
3184 reg_addr[XOmode].reload_store = CODE_FOR_reload_xo_di_store;
3185 reg_addr[XOmode].reload_load = CODE_FOR_reload_xo_di_load;
3186 }
3187 }
3188 }
3189 else
3190 {
3191 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
3192 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
3193 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
3194 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
3195 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
3196 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
3197 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
3198 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
3199 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
3200 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
3201 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
3202 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
3203 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
3204 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
3205 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
3206 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
3207 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
3208 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
3209 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
3210 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
3211
3212 if (FLOAT128_VECTOR_P (KFmode))
3213 {
3214 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store;
3215 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load;
3216 }
3217
3218 if (FLOAT128_IEEE_P (TFmode))
3219 {
3220 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store;
3221 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load;
3222 }
3223
3224 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3225 available. */
3226 if (TARGET_NO_SDMODE_STACK)
3227 {
3228 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
3229 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
3230 }
3231
3232 if (TARGET_VSX)
3233 {
3234 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
3235 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
3236 }
3237
3238 if (TARGET_DIRECT_MOVE)
3239 {
3240 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
3241 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
3242 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
3243 }
3244 }
3245
3246 reg_addr[DFmode].scalar_in_vmx_p = true;
3247 reg_addr[DImode].scalar_in_vmx_p = true;
3248
3249 if (TARGET_P8_VECTOR)
3250 {
3251 reg_addr[SFmode].scalar_in_vmx_p = true;
3252 reg_addr[SImode].scalar_in_vmx_p = true;
3253
3254 if (TARGET_P9_VECTOR)
3255 {
3256 reg_addr[HImode].scalar_in_vmx_p = true;
3257 reg_addr[QImode].scalar_in_vmx_p = true;
3258 }
3259 }
3260 }
3261
3262 if (TARGET_DENSE_MATH)
3263 {
3264 reg_addr[TDOmode].reload_load = CODE_FOR_reload_dmr_from_memory;
3265 reg_addr[TDOmode].reload_store = CODE_FOR_reload_dmr_to_memory;
3266 }
3267
3268 /* Precalculate HARD_REGNO_NREGS. */
3269 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3270 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3271 rs6000_hard_regno_nregs[m][r]
3272 = rs6000_hard_regno_nregs_internal (r, (machine_mode) m);
3273
3274 /* Precalculate TARGET_HARD_REGNO_MODE_OK. */
3275 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3276 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3277 rs6000_hard_regno_mode_ok_p[m][r]
3278 = rs6000_hard_regno_mode_ok_uncached (r, (machine_mode) m);
3279
3280 /* Precalculate CLASS_MAX_NREGS sizes. */
3281 for (c = 0; c < LIM_REG_CLASSES; ++c)
3282 {
3283 int reg_size;
3284
3285 if (TARGET_VSX && VSX_REG_CLASS_P (c))
3286 reg_size = UNITS_PER_VSX_WORD;
3287
3288 else if (c == ALTIVEC_REGS)
3289 reg_size = UNITS_PER_ALTIVEC_WORD;
3290
3291 else if (c == FLOAT_REGS)
3292 reg_size = UNITS_PER_FP_WORD;
3293
3294 else
3295 reg_size = UNITS_PER_WORD;
3296
3297 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3298 {
3299 machine_mode m2 = (machine_mode)m;
3300 int reg_size2 = reg_size;
3301
3302 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3303 in VSX. */
3304 if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m))
3305 reg_size2 = UNITS_PER_FP_WORD;
3306
3307 rs6000_class_max_nregs[m][c]
3308 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3309 }
3310 }
3311
3312 /* Calculate which modes to automatically generate code to use a the
3313 reciprocal divide and square root instructions. In the future, possibly
3314 automatically generate the instructions even if the user did not specify
3315 -mrecip. The older machines double precision reciprocal sqrt estimate is
3316 not accurate enough. */
3317 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3318 if (TARGET_FRES)
3319 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3320 if (TARGET_FRE)
3321 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3322 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3323 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3324 if (VECTOR_UNIT_VSX_P (V2DFmode))
3325 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3326
3327 if (TARGET_FRSQRTES)
3328 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3329 if (TARGET_FRSQRTE)
3330 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3331 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3332 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3333 if (VECTOR_UNIT_VSX_P (V2DFmode))
3334 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3335
3336 if (rs6000_recip_control)
3337 {
3338 if (!flag_finite_math_only)
3339 warning (0, "%qs requires %qs or %qs", "-mrecip", "-ffinite-math",
3340 "-ffast-math");
3341 if (flag_trapping_math)
3342 warning (0, "%qs requires %qs or %qs", "-mrecip",
3343 "-fno-trapping-math", "-ffast-math");
3344 if (!flag_reciprocal_math)
3345 warning (0, "%qs requires %qs or %qs", "-mrecip", "-freciprocal-math",
3346 "-ffast-math");
3347 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3348 {
3349 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3350 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3351 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3352
3353 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3354 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3355 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3356
3357 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3358 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3359 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3360
3361 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3362 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3363 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3364
3365 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3366 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3367 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3368
3369 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3370 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3371 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3372
3373 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3374 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3375 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3376
3377 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3378 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3379 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3380 }
3381 }
3382
3383 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3384 legitimate address support to figure out the appropriate addressing to
3385 use. */
3386 rs6000_setup_reg_addr_masks ();
3387
3388 if (global_init_p || TARGET_DEBUG_TARGET)
3389 {
3390 if (TARGET_DEBUG_REG)
3391 rs6000_debug_reg_global ();
3392
3393 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3394 fprintf (stderr,
3395 "SImode variable mult cost = %d\n"
3396 "SImode constant mult cost = %d\n"
3397 "SImode short constant mult cost = %d\n"
3398 "DImode multipliciation cost = %d\n"
3399 "SImode division cost = %d\n"
3400 "DImode division cost = %d\n"
3401 "Simple fp operation cost = %d\n"
3402 "DFmode multiplication cost = %d\n"
3403 "SFmode division cost = %d\n"
3404 "DFmode division cost = %d\n"
3405 "cache line size = %d\n"
3406 "l1 cache size = %d\n"
3407 "l2 cache size = %d\n"
3408 "simultaneous prefetches = %d\n"
3409 "\n",
3410 rs6000_cost->mulsi,
3411 rs6000_cost->mulsi_const,
3412 rs6000_cost->mulsi_const9,
3413 rs6000_cost->muldi,
3414 rs6000_cost->divsi,
3415 rs6000_cost->divdi,
3416 rs6000_cost->fp,
3417 rs6000_cost->dmul,
3418 rs6000_cost->sdiv,
3419 rs6000_cost->ddiv,
3420 rs6000_cost->cache_line_size,
3421 rs6000_cost->l1_cache_size,
3422 rs6000_cost->l2_cache_size,
3423 rs6000_cost->simultaneous_prefetches);
3424 }
3425 }
3426
3427 #if TARGET_MACHO
3428 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3429
3430 static void
3431 darwin_rs6000_override_options (void)
3432 {
3433 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3434 off. */
3435 rs6000_altivec_abi = 1;
3436 TARGET_ALTIVEC_VRSAVE = 1;
3437 rs6000_current_abi = ABI_DARWIN;
3438
3439 if (DEFAULT_ABI == ABI_DARWIN
3440 && TARGET_64BIT)
3441 darwin_one_byte_bool = 1;
3442
3443 if (TARGET_64BIT && ! TARGET_POWERPC64)
3444 {
3445 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3446 warning (0, "%qs requires PowerPC64 architecture, enabling", "-m64");
3447 }
3448
3449 /* The linkers [ld64] that support 64Bit do not need the JBSR longcall
3450 optimisation, and will not work with the most generic case (where the
3451 symbol is undefined external, but there is no symbl stub). */
3452 if (TARGET_64BIT)
3453 rs6000_default_long_calls = 0;
3454
3455 /* ld_classic is (so far) still used for kernel (static) code, and supports
3456 the JBSR longcall / branch islands. */
3457 if (flag_mkernel)
3458 {
3459 rs6000_default_long_calls = 1;
3460
3461 /* Allow a kext author to do -mkernel -mhard-float. */
3462 if (! (rs6000_isa_flags_explicit & OPTION_MASK_SOFT_FLOAT))
3463 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3464 }
3465
3466 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3467 Altivec. */
3468 if (!flag_mkernel && !flag_apple_kext
3469 && TARGET_64BIT
3470 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3471 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3472
3473 /* Unless the user (not the configurer) has explicitly overridden
3474 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3475 G4 unless targeting the kernel. */
3476 if (!flag_mkernel
3477 && !flag_apple_kext
3478 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3479 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3480 && ! OPTION_SET_P (rs6000_cpu_index))
3481 {
3482 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3483 }
3484 }
3485 #endif
3486
3487 /* If not otherwise specified by a target, make 'long double' equivalent to
3488 'double'. */
3489
3490 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3491 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3492 #endif
3493
3494 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3495 to clobber the XER[CA] bit because clobbering that bit without telling
3496 the compiler worked just fine with versions of GCC before GCC 5, and
3497 breaking a lot of older code in ways that are hard to track down is
3498 not such a great idea. */
3499
3500 static rtx_insn *
3501 rs6000_md_asm_adjust (vec<rtx> & /*outputs*/, vec<rtx> & /*inputs*/,
3502 vec<machine_mode> & /*input_modes*/,
3503 vec<const char *> & /*constraints*/, vec<rtx> &clobbers,
3504 HARD_REG_SET &clobbered_regs, location_t /*loc*/)
3505 {
3506 clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
3507 SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
3508 return NULL;
3509 }
3510
3511 /* This target function is similar to the hook TARGET_OPTION_OVERRIDE
3512 but is called when the optimize level is changed via an attribute or
3513 pragma or when it is reset at the end of the code affected by the
3514 attribute or pragma. It is not called at the beginning of compilation
3515 when TARGET_OPTION_OVERRIDE is called so if you want to perform these
3516 actions then, you should have TARGET_OPTION_OVERRIDE call
3517 TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE. */
3518
3519 static void
3520 rs6000_override_options_after_change (void)
3521 {
3522 /* Explicit -funroll-loops turns -munroll-only-small-loops off, and
3523 turns -frename-registers on. */
3524 if ((OPTION_SET_P (flag_unroll_loops) && flag_unroll_loops)
3525 || (OPTION_SET_P (flag_unroll_all_loops)
3526 && flag_unroll_all_loops))
3527 {
3528 if (!OPTION_SET_P (unroll_only_small_loops))
3529 unroll_only_small_loops = 0;
3530 if (!OPTION_SET_P (flag_rename_registers))
3531 flag_rename_registers = 1;
3532 if (!OPTION_SET_P (flag_cunroll_grow_size))
3533 flag_cunroll_grow_size = 1;
3534 }
3535 else if (!OPTION_SET_P (flag_cunroll_grow_size))
3536 flag_cunroll_grow_size = flag_peel_loops || optimize >= 3;
3537
3538 /* If we are inserting ROP-protect instructions, disable shrink wrap. */
3539 if (rs6000_rop_protect)
3540 flag_shrink_wrap = 0;
3541 }
3542
3543 #ifdef TARGET_USES_LINUX64_OPT
3544 static void
3545 rs6000_linux64_override_options ()
3546 {
3547 if (!OPTION_SET_P (rs6000_alignment_flags))
3548 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
3549 if (rs6000_isa_flags & OPTION_MASK_64BIT)
3550 {
3551 if (DEFAULT_ABI != ABI_AIX)
3552 {
3553 rs6000_current_abi = ABI_AIX;
3554 error (INVALID_64BIT, "call");
3555 }
3556 dot_symbols = !strcmp (rs6000_abi_name, "aixdesc");
3557 if (ELFv2_ABI_CHECK)
3558 {
3559 rs6000_current_abi = ABI_ELFv2;
3560 if (dot_symbols)
3561 error ("%<-mcall-aixdesc%> incompatible with %<-mabi=elfv2%>");
3562 }
3563 if (rs6000_isa_flags & OPTION_MASK_RELOCATABLE)
3564 {
3565 rs6000_isa_flags &= ~OPTION_MASK_RELOCATABLE;
3566 error (INVALID_64BIT, "relocatable");
3567 }
3568 if (rs6000_isa_flags & OPTION_MASK_EABI)
3569 {
3570 rs6000_isa_flags &= ~OPTION_MASK_EABI;
3571 error (INVALID_64BIT, "eabi");
3572 }
3573 if (TARGET_PROTOTYPE)
3574 {
3575 target_prototype = 0;
3576 error (INVALID_64BIT, "prototype");
3577 }
3578 if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) == 0)
3579 {
3580 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3581 error ("%<-m64%> requires a PowerPC64 cpu");
3582 }
3583 if (!OPTION_SET_P (rs6000_current_cmodel))
3584 SET_CMODEL (CMODEL_MEDIUM);
3585 if ((rs6000_isa_flags_explicit & OPTION_MASK_MINIMAL_TOC) != 0)
3586 {
3587 if (OPTION_SET_P (rs6000_current_cmodel)
3588 && rs6000_current_cmodel != CMODEL_SMALL)
3589 error ("%<-mcmodel%> incompatible with other toc options");
3590 if (TARGET_MINIMAL_TOC)
3591 SET_CMODEL (CMODEL_SMALL);
3592 else if (TARGET_PCREL
3593 || (PCREL_SUPPORTED_BY_OS
3594 && (rs6000_isa_flags_explicit & OPTION_MASK_PCREL) == 0))
3595 /* Ignore -mno-minimal-toc. */
3596 ;
3597 else
3598 SET_CMODEL (CMODEL_SMALL);
3599 }
3600 if (rs6000_current_cmodel != CMODEL_SMALL)
3601 {
3602 if (!OPTION_SET_P (TARGET_NO_FP_IN_TOC))
3603 TARGET_NO_FP_IN_TOC = rs6000_current_cmodel == CMODEL_MEDIUM;
3604 if (!OPTION_SET_P (TARGET_NO_SUM_IN_TOC))
3605 TARGET_NO_SUM_IN_TOC = 0;
3606 }
3607 if (TARGET_PLTSEQ && DEFAULT_ABI != ABI_ELFv2)
3608 {
3609 if (OPTION_SET_P (rs6000_pltseq))
3610 warning (0, "%qs unsupported for this ABI",
3611 "-mpltseq");
3612 rs6000_pltseq = false;
3613 }
3614 }
3615 else if (TARGET_64BIT)
3616 error (INVALID_32BIT, "32");
3617 else
3618 {
3619 if (TARGET_PROFILE_KERNEL)
3620 {
3621 profile_kernel = 0;
3622 error (INVALID_32BIT, "profile-kernel");
3623 }
3624 if (OPTION_SET_P (rs6000_current_cmodel))
3625 {
3626 SET_CMODEL (CMODEL_SMALL);
3627 error (INVALID_32BIT, "cmodel");
3628 }
3629 }
3630 }
3631 #endif
3632
3633 /* Return true if we are using GLIBC, and it supports IEEE 128-bit long double.
3634 This support is only in little endian GLIBC 2.32 or newer. */
3635 static bool
3636 glibc_supports_ieee_128bit (void)
3637 {
3638 #ifdef OPTION_GLIBC
3639 if (OPTION_GLIBC && !BYTES_BIG_ENDIAN
3640 && ((TARGET_GLIBC_MAJOR * 1000) + TARGET_GLIBC_MINOR) >= 2032)
3641 return true;
3642 #endif /* OPTION_GLIBC. */
3643
3644 return false;
3645 }
3646
3647 /* Override command line options.
3648
3649 Combine build-specific configuration information with options
3650 specified on the command line to set various state variables which
3651 influence code generation, optimization, and expansion of built-in
3652 functions. Assure that command-line configuration preferences are
3653 compatible with each other and with the build configuration; issue
3654 warnings while adjusting configuration or error messages while
3655 rejecting configuration.
3656
3657 Upon entry to this function:
3658
3659 This function is called once at the beginning of
3660 compilation, and then again at the start and end of compiling
3661 each section of code that has a different configuration, as
3662 indicated, for example, by adding the
3663
3664 __attribute__((__target__("cpu=power9")))
3665
3666 qualifier to a function definition or, for example, by bracketing
3667 code between
3668
3669 #pragma GCC target("altivec")
3670
3671 and
3672
3673 #pragma GCC reset_options
3674
3675 directives. Parameter global_init_p is true for the initial
3676 invocation, which initializes global variables, and false for all
3677 subsequent invocations.
3678
3679
3680 Various global state information is assumed to be valid. This
3681 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
3682 default CPU specified at build configure time, TARGET_DEFAULT,
3683 representing the default set of option flags for the default
3684 target, and OPTION_SET_P (rs6000_isa_flags), representing
3685 which options were requested on the command line.
3686
3687 Upon return from this function:
3688
3689 rs6000_isa_flags_explicit has a non-zero bit for each flag that
3690 was set by name on the command line. Additionally, if certain
3691 attributes are automatically enabled or disabled by this function
3692 in order to assure compatibility between options and
3693 configuration, the flags associated with those attributes are
3694 also set. By setting these "explicit bits", we avoid the risk
3695 that other code might accidentally overwrite these particular
3696 attributes with "default values".
3697
3698 The various bits of rs6000_isa_flags are set to indicate the
3699 target options that have been selected for the most current
3700 compilation efforts. This has the effect of also turning on the
3701 associated TARGET_XXX values since these are macros which are
3702 generally defined to test the corresponding bit of the
3703 rs6000_isa_flags variable.
3704
3705 Various other global variables and fields of global structures
3706 (over 50 in all) are initialized to reflect the desired options
3707 for the most current compilation efforts. */
3708
3709 static bool
3710 rs6000_option_override_internal (bool global_init_p)
3711 {
3712 bool ret = true;
3713
3714 HOST_WIDE_INT set_masks;
3715 HOST_WIDE_INT ignore_masks;
3716 int cpu_index = -1;
3717 int tune_index;
3718 struct cl_target_option *main_target_opt
3719 = ((global_init_p || target_option_default_node == NULL)
3720 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3721
3722 /* Print defaults. */
3723 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
3724 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
3725
3726 /* Remember the explicit arguments. */
3727 if (global_init_p)
3728 rs6000_isa_flags_explicit = OPTION_SET_P (rs6000_isa_flags);
3729
3730 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3731 library functions, so warn about it. The flag may be useful for
3732 performance studies from time to time though, so don't disable it
3733 entirely. */
3734 if (OPTION_SET_P (rs6000_alignment_flags)
3735 && rs6000_alignment_flags == MASK_ALIGN_POWER
3736 && DEFAULT_ABI == ABI_DARWIN
3737 && TARGET_64BIT)
3738 warning (0, "%qs is not supported for 64-bit Darwin;"
3739 " it is incompatible with the installed C and C++ libraries",
3740 "-malign-power");
3741
3742 /* Numerous experiment shows that IRA based loop pressure
3743 calculation works better for RTL loop invariant motion on targets
3744 with enough (>= 32) registers. It is an expensive optimization.
3745 So it is on only for peak performance. */
3746 if (optimize >= 3 && global_init_p
3747 && !OPTION_SET_P (flag_ira_loop_pressure))
3748 flag_ira_loop_pressure = 1;
3749
3750 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
3751 for tracebacks to be complete but not if any -fasynchronous-unwind-tables
3752 options were already specified. */
3753 if (flag_sanitize & SANITIZE_USER_ADDRESS
3754 && !OPTION_SET_P (flag_asynchronous_unwind_tables))
3755 flag_asynchronous_unwind_tables = 1;
3756
3757 /* -fvariable-expansion-in-unroller is a win for POWER whenever the
3758 loop unroller is active. It is only checked during unrolling, so
3759 we can just set it on by default. */
3760 if (!OPTION_SET_P (flag_variable_expansion_in_unroller))
3761 flag_variable_expansion_in_unroller = 1;
3762
3763 /* Set the pointer size. */
3764 if (TARGET_64BIT)
3765 {
3766 rs6000_pmode = DImode;
3767 rs6000_pointer_size = 64;
3768 }
3769 else
3770 {
3771 rs6000_pmode = SImode;
3772 rs6000_pointer_size = 32;
3773 }
3774
3775 /* Some OSs don't support saving Altivec registers. On those OSs, we don't
3776 touch the OPTION_MASK_ALTIVEC settings; if the user wants it, the user
3777 must explicitly specify it and we won't interfere with the user's
3778 specification. */
3779
3780 set_masks = POWERPC_MASKS;
3781 #ifdef OS_MISSING_ALTIVEC
3782 if (OS_MISSING_ALTIVEC)
3783 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX
3784 | OTHER_VSX_VECTOR_MASKS);
3785 #endif
3786
3787 /* Don't override by the processor default if given explicitly. */
3788 set_masks &= ~rs6000_isa_flags_explicit;
3789
3790 /* Without option powerpc64 specified explicitly, we need to ensure
3791 powerpc64 always enabled for 64 bit here, otherwise some following
3792 checks can use unexpected TARGET_POWERPC64 value. Meanwhile, we
3793 need to ensure set_masks doesn't have OPTION_MASK_POWERPC64 on,
3794 otherwise later processing can clear it. */
3795 if (!(rs6000_isa_flags_explicit & OPTION_MASK_POWERPC64)
3796 && TARGET_64BIT)
3797 {
3798 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3799 set_masks &= ~OPTION_MASK_POWERPC64;
3800 }
3801
3802 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3803 the cpu in a target attribute or pragma, but did not specify a tuning
3804 option, use the cpu for the tuning option rather than the option specified
3805 with -mtune on the command line. Process a '--with-cpu' configuration
3806 request as an implicit --cpu. */
3807 if (rs6000_cpu_index >= 0)
3808 cpu_index = rs6000_cpu_index;
3809 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3810 cpu_index = main_target_opt->x_rs6000_cpu_index;
3811 else if (OPTION_TARGET_CPU_DEFAULT)
3812 cpu_index = rs6000_cpu_name_lookup (OPTION_TARGET_CPU_DEFAULT);
3813
3814 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3815 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3816 with those from the cpu, except for options that were explicitly set. If
3817 we don't have a cpu, do not override the target bits set in
3818 TARGET_DEFAULT. */
3819 if (cpu_index >= 0)
3820 {
3821 rs6000_cpu_index = cpu_index;
3822 rs6000_isa_flags &= ~set_masks;
3823 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3824 & set_masks);
3825 }
3826 else
3827 {
3828 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3829 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3830 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3831 to using rs6000_isa_flags, we need to do the initialization here.
3832
3833 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
3834 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
3835 HOST_WIDE_INT flags;
3836 if (TARGET_DEFAULT)
3837 flags = TARGET_DEFAULT;
3838 else
3839 {
3840 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3841 const char *default_cpu = (!TARGET_POWERPC64
3842 ? "powerpc"
3843 : (BYTES_BIG_ENDIAN
3844 ? "powerpc64"
3845 : "powerpc64le"));
3846 int default_cpu_index = rs6000_cpu_name_lookup (default_cpu);
3847 flags = processor_target_table[default_cpu_index].target_enable;
3848 }
3849 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
3850 }
3851
3852 /* Don't expect powerpc64 enabled on those OSes with OS_MISSING_POWERPC64,
3853 since they do not save and restore the high half of the GPRs correctly
3854 in all cases. If the user explicitly specifies it, we won't interfere
3855 with the user's specification. */
3856 #ifdef OS_MISSING_POWERPC64
3857 if (OS_MISSING_POWERPC64
3858 && TARGET_32BIT
3859 && TARGET_POWERPC64
3860 && !(rs6000_isa_flags_explicit & OPTION_MASK_POWERPC64))
3861 rs6000_isa_flags &= ~OPTION_MASK_POWERPC64;
3862 #endif
3863
3864 if (rs6000_tune_index >= 0)
3865 tune_index = rs6000_tune_index;
3866 else if (cpu_index >= 0)
3867 rs6000_tune_index = tune_index = cpu_index;
3868 else
3869 {
3870 size_t i;
3871 enum processor_type tune_proc
3872 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
3873
3874 tune_index = -1;
3875 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3876 if (processor_target_table[i].processor == tune_proc)
3877 {
3878 tune_index = i;
3879 break;
3880 }
3881 }
3882
3883 if (cpu_index >= 0)
3884 rs6000_cpu = processor_target_table[cpu_index].processor;
3885 else
3886 rs6000_cpu = TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT;
3887
3888 gcc_assert (tune_index >= 0);
3889 rs6000_tune = processor_target_table[tune_index].processor;
3890
3891 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
3892 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
3893 || rs6000_cpu == PROCESSOR_PPCE5500)
3894 {
3895 if (TARGET_ALTIVEC)
3896 error ("AltiVec not supported in this target");
3897 }
3898
3899 /* If we are optimizing big endian systems for space, use the load/store
3900 multiple instructions. */
3901 if (BYTES_BIG_ENDIAN && optimize_size)
3902 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE;
3903
3904 /* Don't allow -mmultiple on little endian systems unless the cpu is a 750,
3905 because the hardware doesn't support the instructions used in little
3906 endian mode, and causes an alignment trap. The 750 does not cause an
3907 alignment trap (except when the target is unaligned). */
3908
3909 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750 && TARGET_MULTIPLE)
3910 {
3911 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
3912 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
3913 warning (0, "%qs is not supported on little endian systems",
3914 "-mmultiple");
3915 }
3916
3917 /* If little-endian, default to -mstrict-align on older processors.
3918 Testing for direct_move matches power8 and later. */
3919 if (!BYTES_BIG_ENDIAN
3920 && !(processor_target_table[tune_index].target_enable
3921 & OPTION_MASK_DIRECT_MOVE))
3922 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
3923
3924 /* Add some warnings for VSX. */
3925 if (TARGET_VSX)
3926 {
3927 const char *msg = NULL;
3928 if (!TARGET_HARD_FLOAT)
3929 {
3930 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3931 msg = N_("%<-mvsx%> requires hardware floating point");
3932 else
3933 {
3934 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3935 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3936 }
3937 }
3938 else if (TARGET_AVOID_XFORM > 0)
3939 msg = N_("%<-mvsx%> needs indexed addressing");
3940 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
3941 & OPTION_MASK_ALTIVEC))
3942 {
3943 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3944 msg = N_("%<-mvsx%> and %<-mno-altivec%> are incompatible");
3945 else
3946 msg = N_("%<-mno-altivec%> disables vsx");
3947 }
3948
3949 if (msg)
3950 {
3951 warning (0, msg);
3952 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3953 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3954 }
3955 }
3956
3957 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
3958 the -mcpu setting to enable options that conflict. */
3959 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
3960 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
3961 | OPTION_MASK_ALTIVEC
3962 | OPTION_MASK_VSX)) != 0)
3963 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
3964 | OPTION_MASK_DIRECT_MOVE)
3965 & ~rs6000_isa_flags_explicit);
3966
3967 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3968 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
3969
3970 #ifdef XCOFF_DEBUGGING_INFO
3971 /* For AIX default to 64-bit DWARF. */
3972 if (!OPTION_SET_P (dwarf_offset_size))
3973 dwarf_offset_size = POINTER_SIZE_UNITS;
3974 #endif
3975
3976 /* Handle explicit -mno-{altivec,vsx,power8-vector,power9-vector} and turn
3977 off all of the options that depend on those flags. */
3978 ignore_masks = rs6000_disable_incompatible_switches ();
3979
3980 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
3981 unless the user explicitly used the -mno-<option> to disable the code. */
3982 if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_MISC)
3983 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3984 else if (TARGET_P9_MINMAX)
3985 {
3986 if (cpu_index >= 0)
3987 {
3988 if (cpu_index == PROCESSOR_POWER9)
3989 {
3990 /* legacy behavior: allow -mcpu=power9 with certain
3991 capabilities explicitly disabled. */
3992 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3993 }
3994 else
3995 error ("power9 target option is incompatible with %<%s=<xxx>%> "
3996 "for <xxx> less than power9", "-mcpu");
3997 }
3998 else if ((ISA_3_0_MASKS_SERVER & rs6000_isa_flags_explicit)
3999 != (ISA_3_0_MASKS_SERVER & rs6000_isa_flags
4000 & rs6000_isa_flags_explicit))
4001 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
4002 were explicitly cleared. */
4003 error ("%qs incompatible with explicitly disabled options",
4004 "-mpower9-minmax");
4005 else
4006 rs6000_isa_flags |= ISA_3_0_MASKS_SERVER;
4007 }
4008 else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
4009 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~ignore_masks);
4010 else if (TARGET_VSX)
4011 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~ignore_masks);
4012 else if (TARGET_POPCNTD)
4013 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~ignore_masks);
4014 else if (TARGET_DFP)
4015 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks);
4016 else if (TARGET_CMPB)
4017 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks);
4018 else if (TARGET_FPRND)
4019 rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks);
4020 else if (TARGET_POPCNTB)
4021 rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks);
4022 else if (TARGET_ALTIVEC)
4023 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~ignore_masks);
4024
4025 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
4026 target attribute or pragma which automatically enables both options,
4027 unless the altivec ABI was set. This is set by default for 64-bit, but
4028 not for 32-bit. Don't move this before the above code using ignore_masks,
4029 since it can reset the cleared VSX/ALTIVEC flag again. */
4030 if (main_target_opt && !main_target_opt->x_rs6000_altivec_abi)
4031 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC)
4032 & ~rs6000_isa_flags_explicit);
4033
4034 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
4035 {
4036 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
4037 error ("%qs requires %qs", "-mcrypto", "-maltivec");
4038 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
4039 }
4040
4041 if (!TARGET_FPRND && TARGET_VSX)
4042 {
4043 if (rs6000_isa_flags_explicit & OPTION_MASK_FPRND)
4044 /* TARGET_VSX = 1 implies Power 7 and newer */
4045 error ("%qs requires %qs", "-mvsx", "-mfprnd");
4046 rs6000_isa_flags &= ~OPTION_MASK_FPRND;
4047 }
4048
4049 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
4050 {
4051 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
4052 error ("%qs requires %qs", "-mdirect-move", "-mvsx");
4053 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
4054 }
4055
4056 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
4057 {
4058 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4059 error ("%qs requires %qs", "-mpower8-vector", "-maltivec");
4060 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
4061 }
4062
4063 if (TARGET_P8_VECTOR && !TARGET_VSX)
4064 {
4065 if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4066 && (rs6000_isa_flags_explicit & OPTION_MASK_VSX))
4067 error ("%qs requires %qs", "-mpower8-vector", "-mvsx");
4068 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) == 0)
4069 {
4070 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
4071 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
4072 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
4073 }
4074 else
4075 {
4076 /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is
4077 not explicit. */
4078 rs6000_isa_flags |= OPTION_MASK_VSX;
4079 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
4080 }
4081 }
4082
4083 if (TARGET_DFP && !TARGET_HARD_FLOAT)
4084 {
4085 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
4086 error ("%qs requires %qs", "-mhard-dfp", "-mhard-float");
4087 rs6000_isa_flags &= ~OPTION_MASK_DFP;
4088 }
4089
4090 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
4091 silently turn off quad memory mode. */
4092 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
4093 {
4094 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4095 warning (0, N_("%<-mquad-memory%> requires 64-bit mode"));
4096
4097 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
4098 warning (0, N_("%<-mquad-memory-atomic%> requires 64-bit mode"));
4099
4100 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
4101 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
4102 }
4103
4104 /* Non-atomic quad memory load/store are disabled for little endian, since
4105 the words are reversed, but atomic operations can still be done by
4106 swapping the words. */
4107 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
4108 {
4109 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4110 warning (0, N_("%<-mquad-memory%> is not available in little endian "
4111 "mode"));
4112
4113 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
4114 }
4115
4116 /* Assume if the user asked for normal quad memory instructions, they want
4117 the atomic versions as well, unless they explicity told us not to use quad
4118 word atomic instructions. */
4119 if (TARGET_QUAD_MEMORY
4120 && !TARGET_QUAD_MEMORY_ATOMIC
4121 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
4122 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
4123
4124 /* If we can shrink-wrap the TOC register save separately, then use
4125 -msave-toc-indirect unless explicitly disabled. */
4126 if ((rs6000_isa_flags_explicit & OPTION_MASK_SAVE_TOC_INDIRECT) == 0
4127 && flag_shrink_wrap_separate
4128 && optimize_function_for_speed_p (cfun))
4129 rs6000_isa_flags |= OPTION_MASK_SAVE_TOC_INDIRECT;
4130
4131 /* Enable power8 fusion if we are tuning for power8, even if we aren't
4132 generating power8 instructions. Power9 does not optimize power8 fusion
4133 cases. */
4134 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
4135 {
4136 if (processor_target_table[tune_index].processor == PROCESSOR_POWER8)
4137 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4138 else
4139 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
4140 }
4141
4142 /* Setting additional fusion flags turns on base fusion. */
4143 if (!TARGET_P8_FUSION && TARGET_P8_FUSION_SIGN)
4144 {
4145 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4146 {
4147 if (TARGET_P8_FUSION_SIGN)
4148 error ("%qs requires %qs", "-mpower8-fusion-sign",
4149 "-mpower8-fusion");
4150
4151 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
4152 }
4153 else
4154 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4155 }
4156
4157 /* Power8 does not fuse sign extended loads with the addis. If we are
4158 optimizing at high levels for speed, convert a sign extended load into a
4159 zero extending load, and an explicit sign extension. */
4160 if (TARGET_P8_FUSION
4161 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
4162 && optimize_function_for_speed_p (cfun)
4163 && optimize >= 3)
4164 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
4165
4166 /* ISA 3.0 vector instructions include ISA 2.07. */
4167 if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
4168 {
4169 /* We prefer to not mention undocumented options in
4170 error messages. However, if users have managed to select
4171 power9-vector without selecting power8-vector, they
4172 already know about undocumented flags. */
4173 if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) &&
4174 (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR))
4175 error ("%qs requires %qs", "-mpower9-vector", "-mpower8-vector");
4176 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) == 0)
4177 {
4178 rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR;
4179 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4180 rs6000_isa_flags_explicit |= OPTION_MASK_P9_VECTOR;
4181 }
4182 else
4183 {
4184 /* OPTION_MASK_P9_VECTOR is explicit and
4185 OPTION_MASK_P8_VECTOR is not explicit. */
4186 rs6000_isa_flags |= OPTION_MASK_P8_VECTOR;
4187 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
4188 }
4189 }
4190
4191 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4192 support. If we only have ISA 2.06 support, and the user did not specify
4193 the switch, leave it set to -1 so the movmisalign patterns are enabled,
4194 but we don't enable the full vectorization support */
4195 if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
4196 TARGET_ALLOW_MOVMISALIGN = 1;
4197
4198 else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
4199 {
4200 if (TARGET_ALLOW_MOVMISALIGN > 0
4201 && OPTION_SET_P (TARGET_ALLOW_MOVMISALIGN))
4202 error ("%qs requires %qs", "-mallow-movmisalign", "-mvsx");
4203
4204 TARGET_ALLOW_MOVMISALIGN = 0;
4205 }
4206
4207 /* Determine when unaligned vector accesses are permitted, and when
4208 they are preferred over masked Altivec loads. Note that if
4209 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4210 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
4211 not true. */
4212 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4213 {
4214 if (!TARGET_VSX)
4215 {
4216 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4217 error ("%qs requires %qs", "-mefficient-unaligned-vsx", "-mvsx");
4218
4219 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4220 }
4221
4222 else if (!TARGET_ALLOW_MOVMISALIGN)
4223 {
4224 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4225 error ("%qs requires %qs", "-munefficient-unaligned-vsx",
4226 "-mallow-movmisalign");
4227
4228 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4229 }
4230 }
4231
4232 if (!(rs6000_isa_flags_explicit & OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX))
4233 {
4234 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4235 rs6000_isa_flags |= OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX;
4236 else
4237 rs6000_isa_flags &= ~OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX;
4238 }
4239
4240 /* Use long double size to select the appropriate long double. We use
4241 TYPE_PRECISION to differentiate the 3 different long double types. We map
4242 128 into the precision used for TFmode. */
4243 int default_long_double_size = (RS6000_DEFAULT_LONG_DOUBLE_SIZE == 64
4244 ? 64
4245 : FLOAT_PRECISION_TFmode);
4246
4247 /* Set long double size before the IEEE 128-bit tests. */
4248 if (!OPTION_SET_P (rs6000_long_double_type_size))
4249 {
4250 if (main_target_opt != NULL
4251 && (main_target_opt->x_rs6000_long_double_type_size
4252 != default_long_double_size))
4253 error ("target attribute or pragma changes %<long double%> size");
4254 else
4255 rs6000_long_double_type_size = default_long_double_size;
4256 }
4257 else if (rs6000_long_double_type_size == FLOAT_PRECISION_TFmode)
4258 ; /* The option value can be seen when cl_target_option_restore is called. */
4259 else if (rs6000_long_double_type_size == 128)
4260 rs6000_long_double_type_size = FLOAT_PRECISION_TFmode;
4261
4262 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
4263 systems will also set long double to be IEEE 128-bit. AIX and Darwin
4264 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
4265 those systems will not pick up this default. Warn if the user changes the
4266 default unless -Wno-psabi. */
4267 if (!OPTION_SET_P (rs6000_ieeequad))
4268 rs6000_ieeequad = TARGET_IEEEQUAD_DEFAULT;
4269
4270 else if (TARGET_LONG_DOUBLE_128)
4271 {
4272 if (global_options.x_rs6000_ieeequad
4273 && (!TARGET_POPCNTD || !TARGET_VSX))
4274 error ("%qs requires full ISA 2.06 support", "-mabi=ieeelongdouble");
4275
4276 if (rs6000_ieeequad != TARGET_IEEEQUAD_DEFAULT)
4277 {
4278 /* Determine if the user can change the default long double type at
4279 compilation time. You need GLIBC 2.32 or newer to be able to
4280 change the long double type. Only issue one warning. */
4281 static bool warned_change_long_double;
4282
4283 if (!warned_change_long_double && !glibc_supports_ieee_128bit ())
4284 {
4285 warned_change_long_double = true;
4286 if (TARGET_IEEEQUAD)
4287 warning (OPT_Wpsabi, "Using IEEE extended precision "
4288 "%<long double%>");
4289 else
4290 warning (OPT_Wpsabi, "Using IBM extended precision "
4291 "%<long double%>");
4292 }
4293 }
4294 }
4295
4296 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
4297 sytems. In GCC 7, we would enable the IEEE 128-bit floating point
4298 infrastructure (-mfloat128-type) but not enable the actual __float128 type
4299 unless the user used the explicit -mfloat128. In GCC 8, we enable both
4300 the keyword as well as the type. */
4301 TARGET_FLOAT128_TYPE = TARGET_FLOAT128_ENABLE_TYPE && TARGET_VSX;
4302
4303 /* IEEE 128-bit floating point requires VSX support. */
4304 if (TARGET_FLOAT128_KEYWORD)
4305 {
4306 if (!TARGET_VSX)
4307 {
4308 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
4309 error ("%qs requires VSX support", "-mfloat128");
4310
4311 TARGET_FLOAT128_TYPE = 0;
4312 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_KEYWORD
4313 | OPTION_MASK_FLOAT128_HW);
4314 }
4315 else if (!TARGET_FLOAT128_TYPE)
4316 {
4317 TARGET_FLOAT128_TYPE = 1;
4318 warning (0, "The %<-mfloat128%> option may not be fully supported");
4319 }
4320 }
4321
4322 /* Enable the __float128 keyword under Linux by default. */
4323 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_KEYWORD
4324 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) == 0)
4325 rs6000_isa_flags |= OPTION_MASK_FLOAT128_KEYWORD;
4326
4327 /* If we have are supporting the float128 type and full ISA 3.0 support,
4328 enable -mfloat128-hardware by default. However, don't enable the
4329 __float128 keyword if it was explicitly turned off. 64-bit mode is needed
4330 because sometimes the compiler wants to put things in an integer
4331 container, and if we don't have __int128 support, it is impossible. */
4332 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW && TARGET_64BIT
4333 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) == ISA_3_0_MASKS_IEEE
4334 && !(rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW))
4335 rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
4336
4337 if (TARGET_FLOAT128_HW
4338 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) != ISA_3_0_MASKS_IEEE)
4339 {
4340 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4341 error ("%qs requires full ISA 3.0 support", "%<-mfloat128-hardware%>");
4342
4343 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4344 }
4345
4346 if (TARGET_FLOAT128_HW && !TARGET_64BIT)
4347 {
4348 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4349 error ("%qs requires %qs", "%<-mfloat128-hardware%>", "-m64");
4350
4351 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4352 }
4353
4354 /* Enable -mprefixed by default on power10 systems. */
4355 if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED) == 0)
4356 rs6000_isa_flags |= OPTION_MASK_PREFIXED;
4357
4358 /* -mprefixed requires -mcpu=power10 (or later). */
4359 else if (TARGET_PREFIXED && !TARGET_POWER10)
4360 {
4361 if ((rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED) != 0)
4362 error ("%qs requires %qs", "-mprefixed", "-mcpu=power10");
4363
4364 rs6000_isa_flags &= ~OPTION_MASK_PREFIXED;
4365 }
4366
4367 /* -mpcrel requires prefixed load/store addressing. */
4368 if (TARGET_PCREL && !TARGET_PREFIXED)
4369 {
4370 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4371 error ("%qs requires %qs", "-mpcrel", "-mprefixed");
4372
4373 rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4374 }
4375
4376 /* Print the options after updating the defaults. */
4377 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4378 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
4379
4380 /* E500mc does "better" if we inline more aggressively. Respect the
4381 user's opinion, though. */
4382 if (rs6000_block_move_inline_limit == 0
4383 && (rs6000_tune == PROCESSOR_PPCE500MC
4384 || rs6000_tune == PROCESSOR_PPCE500MC64
4385 || rs6000_tune == PROCESSOR_PPCE5500
4386 || rs6000_tune == PROCESSOR_PPCE6500))
4387 rs6000_block_move_inline_limit = 128;
4388
4389 /* store_one_arg depends on expand_block_move to handle at least the
4390 size of reg_parm_stack_space. */
4391 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
4392 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
4393
4394 if (global_init_p)
4395 {
4396 /* If the appropriate debug option is enabled, replace the target hooks
4397 with debug versions that call the real version and then prints
4398 debugging information. */
4399 if (TARGET_DEBUG_COST)
4400 {
4401 targetm.rtx_costs = rs6000_debug_rtx_costs;
4402 targetm.address_cost = rs6000_debug_address_cost;
4403 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
4404 }
4405
4406 if (TARGET_DEBUG_ADDR)
4407 {
4408 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
4409 targetm.legitimize_address = rs6000_debug_legitimize_address;
4410 rs6000_secondary_reload_class_ptr
4411 = rs6000_debug_secondary_reload_class;
4412 targetm.secondary_memory_needed
4413 = rs6000_debug_secondary_memory_needed;
4414 targetm.can_change_mode_class
4415 = rs6000_debug_can_change_mode_class;
4416 rs6000_preferred_reload_class_ptr
4417 = rs6000_debug_preferred_reload_class;
4418 rs6000_mode_dependent_address_ptr
4419 = rs6000_debug_mode_dependent_address;
4420 }
4421
4422 if (rs6000_veclibabi_name)
4423 {
4424 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
4425 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
4426 else
4427 {
4428 error ("unknown vectorization library ABI type in "
4429 "%<-mveclibabi=%s%>", rs6000_veclibabi_name);
4430 ret = false;
4431 }
4432 }
4433 }
4434
4435 /* Enable Altivec ABI for AIX -maltivec. */
4436 if (TARGET_XCOFF
4437 && (TARGET_ALTIVEC || TARGET_VSX)
4438 && !OPTION_SET_P (rs6000_altivec_abi))
4439 {
4440 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4441 error ("target attribute or pragma changes AltiVec ABI");
4442 else
4443 rs6000_altivec_abi = 1;
4444 }
4445
4446 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4447 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4448 be explicitly overridden in either case. */
4449 if (TARGET_ELF)
4450 {
4451 if (!OPTION_SET_P (rs6000_altivec_abi)
4452 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
4453 {
4454 if (main_target_opt != NULL &&
4455 !main_target_opt->x_rs6000_altivec_abi)
4456 error ("target attribute or pragma changes AltiVec ABI");
4457 else
4458 rs6000_altivec_abi = 1;
4459 }
4460 }
4461
4462 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4463 So far, the only darwin64 targets are also MACH-O. */
4464 if (TARGET_MACHO
4465 && DEFAULT_ABI == ABI_DARWIN
4466 && TARGET_64BIT)
4467 {
4468 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
4469 error ("target attribute or pragma changes darwin64 ABI");
4470 else
4471 {
4472 rs6000_darwin64_abi = 1;
4473 /* Default to natural alignment, for better performance. */
4474 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
4475 }
4476 }
4477
4478 /* Place FP constants in the constant pool instead of TOC
4479 if section anchors enabled. */
4480 if (flag_section_anchors
4481 && !OPTION_SET_P (TARGET_NO_FP_IN_TOC))
4482 TARGET_NO_FP_IN_TOC = 1;
4483
4484 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4485 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
4486
4487 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4488 SUBTARGET_OVERRIDE_OPTIONS;
4489 #endif
4490 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4491 SUBSUBTARGET_OVERRIDE_OPTIONS;
4492 #endif
4493 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4494 SUB3TARGET_OVERRIDE_OPTIONS;
4495 #endif
4496
4497 /* If the ABI has support for PC-relative relocations, enable it by default.
4498 This test depends on the sub-target tests above setting the code model to
4499 medium for ELF v2 systems. */
4500 if (PCREL_SUPPORTED_BY_OS
4501 && (rs6000_isa_flags_explicit & OPTION_MASK_PCREL) == 0)
4502 rs6000_isa_flags |= OPTION_MASK_PCREL;
4503
4504 /* -mpcrel requires -mcmodel=medium, but we can't check TARGET_CMODEL until
4505 after the subtarget override options are done. */
4506 else if (TARGET_PCREL && TARGET_CMODEL != CMODEL_MEDIUM)
4507 {
4508 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4509 error ("%qs requires %qs", "-mpcrel", "-mcmodel=medium");
4510
4511 rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4512 }
4513
4514 /* Enable -mmma by default on power10 systems. */
4515 if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_MMA) == 0)
4516 rs6000_isa_flags |= OPTION_MASK_MMA;
4517
4518 /* Turn off vector pair/mma options on non-power10 systems. */
4519 else if (!TARGET_POWER10 && TARGET_MMA)
4520 {
4521 if ((rs6000_isa_flags_explicit & OPTION_MASK_MMA) != 0)
4522 error ("%qs requires %qs", "-mmma", "-mcpu=power10");
4523
4524 rs6000_isa_flags &= ~OPTION_MASK_MMA;
4525 }
4526
4527 /* Enable power10 fusion if we are tuning for power10, even if we aren't
4528 generating power10 instructions. */
4529 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION))
4530 {
4531 if (rs6000_tune == PROCESSOR_POWER10)
4532 rs6000_isa_flags |= OPTION_MASK_P10_FUSION;
4533 else
4534 rs6000_isa_flags &= ~OPTION_MASK_P10_FUSION;
4535 }
4536
4537 /* MMA requires SIMD support as ISA 3.1 claims and our implementation
4538 such as "*movoo" uses vector pair access which use VSX registers.
4539 So make MMA require VSX support here. */
4540 if (TARGET_MMA && !TARGET_VSX)
4541 {
4542 if ((rs6000_isa_flags_explicit & OPTION_MASK_MMA) != 0)
4543 error ("%qs requires %qs", "-mmma", "-mvsx");
4544 rs6000_isa_flags &= ~OPTION_MASK_MMA;
4545 }
4546
4547 if (!TARGET_PCREL && TARGET_PCREL_OPT)
4548 rs6000_isa_flags &= ~OPTION_MASK_PCREL_OPT;
4549
4550 /* Dense math requires MMA. */
4551 if (TARGET_DENSE_MATH && !TARGET_MMA)
4552 {
4553 if ((rs6000_isa_flags_explicit & OPTION_MASK_DENSE_MATH) != 0)
4554 error ("%qs requires %qs", "-mdense-math", "-mmma");
4555 rs6000_isa_flags &= ~OPTION_MASK_DENSE_MATH;
4556 }
4557
4558 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4559 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
4560
4561 rs6000_always_hint = (rs6000_tune != PROCESSOR_POWER4
4562 && rs6000_tune != PROCESSOR_POWER5
4563 && rs6000_tune != PROCESSOR_POWER6
4564 && rs6000_tune != PROCESSOR_POWER7
4565 && rs6000_tune != PROCESSOR_POWER8
4566 && rs6000_tune != PROCESSOR_POWER9
4567 && rs6000_tune != PROCESSOR_POWER10
4568 && rs6000_tune != PROCESSOR_FUTURE
4569 && rs6000_tune != PROCESSOR_PPCA2
4570 && rs6000_tune != PROCESSOR_CELL
4571 && rs6000_tune != PROCESSOR_PPC476);
4572 rs6000_sched_groups = (rs6000_tune == PROCESSOR_POWER4
4573 || rs6000_tune == PROCESSOR_POWER5
4574 || rs6000_tune == PROCESSOR_POWER7
4575 || rs6000_tune == PROCESSOR_POWER8);
4576 rs6000_align_branch_targets = (rs6000_tune == PROCESSOR_POWER4
4577 || rs6000_tune == PROCESSOR_POWER5
4578 || rs6000_tune == PROCESSOR_POWER6
4579 || rs6000_tune == PROCESSOR_POWER7
4580 || rs6000_tune == PROCESSOR_POWER8
4581 || rs6000_tune == PROCESSOR_POWER9
4582 || rs6000_tune == PROCESSOR_POWER10
4583 || rs6000_tune == PROCESSOR_FUTURE
4584 || rs6000_tune == PROCESSOR_PPCE500MC
4585 || rs6000_tune == PROCESSOR_PPCE500MC64
4586 || rs6000_tune == PROCESSOR_PPCE5500
4587 || rs6000_tune == PROCESSOR_PPCE6500);
4588
4589 /* Allow debug switches to override the above settings. These are set to -1
4590 in rs6000.opt to indicate the user hasn't directly set the switch. */
4591 if (TARGET_ALWAYS_HINT >= 0)
4592 rs6000_always_hint = TARGET_ALWAYS_HINT;
4593
4594 if (TARGET_SCHED_GROUPS >= 0)
4595 rs6000_sched_groups = TARGET_SCHED_GROUPS;
4596
4597 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
4598 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
4599
4600 rs6000_sched_restricted_insns_priority
4601 = (rs6000_sched_groups ? 1 : 0);
4602
4603 /* Handle -msched-costly-dep option. */
4604 rs6000_sched_costly_dep
4605 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
4606
4607 if (rs6000_sched_costly_dep_str)
4608 {
4609 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
4610 rs6000_sched_costly_dep = no_dep_costly;
4611 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
4612 rs6000_sched_costly_dep = all_deps_costly;
4613 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
4614 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
4615 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
4616 rs6000_sched_costly_dep = store_to_load_dep_costly;
4617 else
4618 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
4619 atoi (rs6000_sched_costly_dep_str));
4620 }
4621
4622 /* Handle -minsert-sched-nops option. */
4623 rs6000_sched_insert_nops
4624 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
4625
4626 if (rs6000_sched_insert_nops_str)
4627 {
4628 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
4629 rs6000_sched_insert_nops = sched_finish_none;
4630 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
4631 rs6000_sched_insert_nops = sched_finish_pad_groups;
4632 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
4633 rs6000_sched_insert_nops = sched_finish_regroup_exact;
4634 else
4635 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
4636 atoi (rs6000_sched_insert_nops_str));
4637 }
4638
4639 /* Handle stack protector */
4640 if (!OPTION_SET_P (rs6000_stack_protector_guard))
4641 #ifdef TARGET_THREAD_SSP_OFFSET
4642 rs6000_stack_protector_guard = SSP_TLS;
4643 #else
4644 rs6000_stack_protector_guard = SSP_GLOBAL;
4645 #endif
4646
4647 #ifdef TARGET_THREAD_SSP_OFFSET
4648 rs6000_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET;
4649 rs6000_stack_protector_guard_reg = TARGET_64BIT ? 13 : 2;
4650 #endif
4651
4652 if (OPTION_SET_P (rs6000_stack_protector_guard_offset_str))
4653 {
4654 char *endp;
4655 const char *str = rs6000_stack_protector_guard_offset_str;
4656
4657 errno = 0;
4658 long offset = strtol (str, &endp, 0);
4659 if (!*str || *endp || errno)
4660 error ("%qs is not a valid number in %qs", str,
4661 "-mstack-protector-guard-offset=");
4662
4663 if (!IN_RANGE (offset, -0x8000, 0x7fff)
4664 || (TARGET_64BIT && (offset & 3)))
4665 error ("%qs is not a valid offset in %qs", str,
4666 "-mstack-protector-guard-offset=");
4667
4668 rs6000_stack_protector_guard_offset = offset;
4669 }
4670
4671 if (OPTION_SET_P (rs6000_stack_protector_guard_reg_str))
4672 {
4673 const char *str = rs6000_stack_protector_guard_reg_str;
4674 int reg = decode_reg_name (str);
4675
4676 if (!IN_RANGE (reg, 1, 31))
4677 error ("%qs is not a valid base register in %qs", str,
4678 "-mstack-protector-guard-reg=");
4679
4680 rs6000_stack_protector_guard_reg = reg;
4681 }
4682
4683 if (rs6000_stack_protector_guard == SSP_TLS
4684 && !IN_RANGE (rs6000_stack_protector_guard_reg, 1, 31))
4685 error ("%qs needs a valid base register", "-mstack-protector-guard=tls");
4686
4687 if (global_init_p)
4688 {
4689 #ifdef TARGET_REGNAMES
4690 /* If the user desires alternate register names, copy in the
4691 alternate names now. */
4692 if (TARGET_REGNAMES)
4693 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
4694 #endif
4695
4696 /* Set aix_struct_return last, after the ABI is determined.
4697 If -maix-struct-return or -msvr4-struct-return was explicitly
4698 used, don't override with the ABI default. */
4699 if (!OPTION_SET_P (aix_struct_return))
4700 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
4701
4702 #if 0
4703 /* IBM XL compiler defaults to unsigned bitfields. */
4704 if (TARGET_XL_COMPAT)
4705 flag_signed_bitfields = 0;
4706 #endif
4707
4708 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
4709 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
4710
4711 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
4712
4713 /* We can only guarantee the availability of DI pseudo-ops when
4714 assembling for 64-bit targets. */
4715 if (!TARGET_64BIT)
4716 {
4717 targetm.asm_out.aligned_op.di = NULL;
4718 targetm.asm_out.unaligned_op.di = NULL;
4719 }
4720
4721
4722 /* Set branch target alignment, if not optimizing for size. */
4723 if (!optimize_size)
4724 {
4725 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4726 aligned 8byte to avoid misprediction by the branch predictor. */
4727 if (rs6000_tune == PROCESSOR_TITAN
4728 || rs6000_tune == PROCESSOR_CELL)
4729 {
4730 if (flag_align_functions && !str_align_functions)
4731 str_align_functions = "8";
4732 if (flag_align_jumps && !str_align_jumps)
4733 str_align_jumps = "8";
4734 if (flag_align_loops && !str_align_loops)
4735 str_align_loops = "8";
4736 }
4737 if (rs6000_align_branch_targets)
4738 {
4739 if (flag_align_functions && !str_align_functions)
4740 str_align_functions = "16";
4741 if (flag_align_jumps && !str_align_jumps)
4742 str_align_jumps = "16";
4743 if (flag_align_loops && !str_align_loops)
4744 {
4745 can_override_loop_align = 1;
4746 str_align_loops = "16";
4747 }
4748 }
4749 }
4750
4751 /* Arrange to save and restore machine status around nested functions. */
4752 init_machine_status = rs6000_init_machine_status;
4753
4754 /* We should always be splitting complex arguments, but we can't break
4755 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
4756 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
4757 targetm.calls.split_complex_arg = NULL;
4758
4759 /* The AIX and ELFv1 ABIs define standard function descriptors. */
4760 if (DEFAULT_ABI == ABI_AIX)
4761 targetm.calls.custom_function_descriptors = 0;
4762 }
4763
4764 /* Initialize rs6000_cost with the appropriate target costs. */
4765 if (optimize_size)
4766 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
4767 else
4768 switch (rs6000_tune)
4769 {
4770 case PROCESSOR_RS64A:
4771 rs6000_cost = &rs64a_cost;
4772 break;
4773
4774 case PROCESSOR_MPCCORE:
4775 rs6000_cost = &mpccore_cost;
4776 break;
4777
4778 case PROCESSOR_PPC403:
4779 rs6000_cost = &ppc403_cost;
4780 break;
4781
4782 case PROCESSOR_PPC405:
4783 rs6000_cost = &ppc405_cost;
4784 break;
4785
4786 case PROCESSOR_PPC440:
4787 rs6000_cost = &ppc440_cost;
4788 break;
4789
4790 case PROCESSOR_PPC476:
4791 rs6000_cost = &ppc476_cost;
4792 break;
4793
4794 case PROCESSOR_PPC601:
4795 rs6000_cost = &ppc601_cost;
4796 break;
4797
4798 case PROCESSOR_PPC603:
4799 rs6000_cost = &ppc603_cost;
4800 break;
4801
4802 case PROCESSOR_PPC604:
4803 rs6000_cost = &ppc604_cost;
4804 break;
4805
4806 case PROCESSOR_PPC604e:
4807 rs6000_cost = &ppc604e_cost;
4808 break;
4809
4810 case PROCESSOR_PPC620:
4811 rs6000_cost = &ppc620_cost;
4812 break;
4813
4814 case PROCESSOR_PPC630:
4815 rs6000_cost = &ppc630_cost;
4816 break;
4817
4818 case PROCESSOR_CELL:
4819 rs6000_cost = &ppccell_cost;
4820 break;
4821
4822 case PROCESSOR_PPC750:
4823 case PROCESSOR_PPC7400:
4824 rs6000_cost = &ppc750_cost;
4825 break;
4826
4827 case PROCESSOR_PPC7450:
4828 rs6000_cost = &ppc7450_cost;
4829 break;
4830
4831 case PROCESSOR_PPC8540:
4832 case PROCESSOR_PPC8548:
4833 rs6000_cost = &ppc8540_cost;
4834 break;
4835
4836 case PROCESSOR_PPCE300C2:
4837 case PROCESSOR_PPCE300C3:
4838 rs6000_cost = &ppce300c2c3_cost;
4839 break;
4840
4841 case PROCESSOR_PPCE500MC:
4842 rs6000_cost = &ppce500mc_cost;
4843 break;
4844
4845 case PROCESSOR_PPCE500MC64:
4846 rs6000_cost = &ppce500mc64_cost;
4847 break;
4848
4849 case PROCESSOR_PPCE5500:
4850 rs6000_cost = &ppce5500_cost;
4851 break;
4852
4853 case PROCESSOR_PPCE6500:
4854 rs6000_cost = &ppce6500_cost;
4855 break;
4856
4857 case PROCESSOR_TITAN:
4858 rs6000_cost = &titan_cost;
4859 break;
4860
4861 case PROCESSOR_POWER4:
4862 case PROCESSOR_POWER5:
4863 rs6000_cost = &power4_cost;
4864 break;
4865
4866 case PROCESSOR_POWER6:
4867 rs6000_cost = &power6_cost;
4868 break;
4869
4870 case PROCESSOR_POWER7:
4871 rs6000_cost = &power7_cost;
4872 break;
4873
4874 case PROCESSOR_POWER8:
4875 rs6000_cost = &power8_cost;
4876 break;
4877
4878 case PROCESSOR_POWER9:
4879 rs6000_cost = &power9_cost;
4880 break;
4881
4882 case PROCESSOR_POWER10:
4883 rs6000_cost = &power10_cost;
4884 break;
4885
4886 case PROCESSOR_FUTURE:
4887 rs6000_cost = &future_cost;
4888 break;
4889
4890 case PROCESSOR_PPCA2:
4891 rs6000_cost = &ppca2_cost;
4892 break;
4893
4894 default:
4895 gcc_unreachable ();
4896 }
4897
4898 if (global_init_p)
4899 {
4900 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4901 param_simultaneous_prefetches,
4902 rs6000_cost->simultaneous_prefetches);
4903 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4904 param_l1_cache_size,
4905 rs6000_cost->l1_cache_size);
4906 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4907 param_l1_cache_line_size,
4908 rs6000_cost->cache_line_size);
4909 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4910 param_l2_cache_size,
4911 rs6000_cost->l2_cache_size);
4912
4913 /* Increase loop peeling limits based on performance analysis. */
4914 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4915 param_max_peeled_insns, 400);
4916 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4917 param_max_completely_peeled_insns, 400);
4918
4919 /* The lxvl/stxvl instructions don't perform well before Power10. */
4920 if (TARGET_POWER10)
4921 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4922 param_vect_partial_vector_usage, 1);
4923 else
4924 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4925 param_vect_partial_vector_usage, 0);
4926
4927 /* Use the 'model' -fsched-pressure algorithm by default. */
4928 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4929 param_sched_pressure_algorithm,
4930 SCHED_PRESSURE_MODEL);
4931
4932 /* If using typedef char *va_list, signal that
4933 __builtin_va_start (&ap, 0) can be optimized to
4934 ap = __builtin_next_arg (0). */
4935 if (DEFAULT_ABI != ABI_V4)
4936 targetm.expand_builtin_va_start = NULL;
4937 }
4938
4939 rs6000_override_options_after_change ();
4940
4941 /* If not explicitly specified via option, decide whether to generate indexed
4942 load/store instructions. A value of -1 indicates that the
4943 initial value of this variable has not been overwritten. During
4944 compilation, TARGET_AVOID_XFORM is either 0 or 1. */
4945 if (TARGET_AVOID_XFORM == -1)
4946 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4947 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4948 need indexed accesses and the type used is the scalar type of the element
4949 being loaded or stored. */
4950 TARGET_AVOID_XFORM = (rs6000_tune == PROCESSOR_POWER6 && TARGET_CMPB
4951 && !TARGET_ALTIVEC);
4952
4953 /* Set the -mrecip options. */
4954 if (rs6000_recip_name)
4955 {
4956 char *p = ASTRDUP (rs6000_recip_name);
4957 char *q;
4958 unsigned int mask, i;
4959 bool invert;
4960
4961 while ((q = strtok (p, ",")) != NULL)
4962 {
4963 p = NULL;
4964 if (*q == '!')
4965 {
4966 invert = true;
4967 q++;
4968 }
4969 else
4970 invert = false;
4971
4972 if (!strcmp (q, "default"))
4973 mask = ((TARGET_RECIP_PRECISION)
4974 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
4975 else
4976 {
4977 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4978 if (!strcmp (q, recip_options[i].string))
4979 {
4980 mask = recip_options[i].mask;
4981 break;
4982 }
4983
4984 if (i == ARRAY_SIZE (recip_options))
4985 {
4986 error ("unknown option for %<%s=%s%>", "-mrecip", q);
4987 invert = false;
4988 mask = 0;
4989 ret = false;
4990 }
4991 }
4992
4993 if (invert)
4994 rs6000_recip_control &= ~mask;
4995 else
4996 rs6000_recip_control |= mask;
4997 }
4998 }
4999
5000 /* Initialize all of the registers. */
5001 rs6000_init_hard_regno_mode_ok (global_init_p);
5002
5003 /* Save the initial options in case the user does function specific options */
5004 if (global_init_p)
5005 target_option_default_node = target_option_current_node
5006 = build_target_option_node (&global_options, &global_options_set);
5007
5008 /* If not explicitly specified via option, decide whether to generate the
5009 extra blr's required to preserve the link stack on some cpus (eg, 476). */
5010 if (TARGET_LINK_STACK == -1)
5011 SET_TARGET_LINK_STACK (rs6000_tune == PROCESSOR_PPC476 && flag_pic);
5012
5013 /* Deprecate use of -mno-speculate-indirect-jumps. */
5014 if (!rs6000_speculate_indirect_jumps)
5015 warning (0, "%qs is deprecated and not recommended in any circumstances",
5016 "-mno-speculate-indirect-jumps");
5017
5018 return ret;
5019 }
5020
5021 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
5022 define the target cpu type. */
5023
5024 static void
5025 rs6000_option_override (void)
5026 {
5027 (void) rs6000_option_override_internal (true);
5028 }
5029
5030 \f
5031 /* Implement LOOP_ALIGN. */
5032 align_flags
5033 rs6000_loop_align (rtx label)
5034 {
5035 basic_block bb;
5036 int ninsns;
5037
5038 /* Don't override loop alignment if -falign-loops was specified. */
5039 if (!can_override_loop_align)
5040 return align_loops;
5041
5042 bb = BLOCK_FOR_INSN (label);
5043 ninsns = num_loop_insns(bb->loop_father);
5044
5045 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
5046 if (ninsns > 4 && ninsns <= 8
5047 && (rs6000_tune == PROCESSOR_POWER4
5048 || rs6000_tune == PROCESSOR_POWER5
5049 || rs6000_tune == PROCESSOR_POWER6
5050 || rs6000_tune == PROCESSOR_POWER7
5051 || rs6000_tune == PROCESSOR_POWER8))
5052 return align_flags (5);
5053 else
5054 return align_loops;
5055 }
5056
5057 /* Return true iff, data reference of TYPE can reach vector alignment (16)
5058 after applying N number of iterations. This routine does not determine
5059 how may iterations are required to reach desired alignment. */
5060
5061 static bool
5062 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
5063 {
5064 if (is_packed)
5065 return false;
5066
5067 if (TARGET_32BIT)
5068 {
5069 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
5070 return true;
5071
5072 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
5073 return true;
5074
5075 return false;
5076 }
5077 else
5078 {
5079 if (TARGET_MACHO)
5080 return false;
5081
5082 /* Assuming that all other types are naturally aligned. CHECKME! */
5083 return true;
5084 }
5085 }
5086
5087 /* Return true if the vector misalignment factor is supported by the
5088 target. */
5089 static bool
5090 rs6000_builtin_support_vector_misalignment (machine_mode mode,
5091 const_tree type,
5092 int misalignment,
5093 bool is_packed)
5094 {
5095 if (TARGET_VSX)
5096 {
5097 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5098 return true;
5099
5100 /* Return if movmisalign pattern is not supported for this mode. */
5101 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
5102 return false;
5103
5104 if (misalignment == -1)
5105 {
5106 /* Misalignment factor is unknown at compile time but we know
5107 it's word aligned. */
5108 if (rs6000_vector_alignment_reachable (type, is_packed))
5109 {
5110 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
5111
5112 if (element_size == 64 || element_size == 32)
5113 return true;
5114 }
5115
5116 return false;
5117 }
5118
5119 /* VSX supports word-aligned vector. */
5120 if (misalignment % 4 == 0)
5121 return true;
5122 }
5123 return false;
5124 }
5125
5126 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5127 static int
5128 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
5129 tree vectype, int misalign)
5130 {
5131 unsigned elements;
5132 tree elem_type;
5133
5134 switch (type_of_cost)
5135 {
5136 case scalar_stmt:
5137 case scalar_store:
5138 case vector_stmt:
5139 case vector_store:
5140 case vec_to_scalar:
5141 case scalar_to_vec:
5142 case cond_branch_not_taken:
5143 return 1;
5144 case scalar_load:
5145 case vector_load:
5146 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5147 return 2;
5148
5149 case vec_perm:
5150 /* Power7 has only one permute unit, make it a bit expensive. */
5151 if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7)
5152 return 3;
5153 else
5154 return 1;
5155
5156 case vec_promote_demote:
5157 /* Power7 has only one permute/pack unit, make it a bit expensive. */
5158 if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7)
5159 return 4;
5160 else
5161 return 1;
5162
5163 case cond_branch_taken:
5164 return 3;
5165
5166 case unaligned_load:
5167 case vector_gather_load:
5168 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5169 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5170 return 2;
5171
5172 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5173 {
5174 elements = TYPE_VECTOR_SUBPARTS (vectype);
5175 /* See PR102767, consider V1TI to keep consistency. */
5176 if (elements == 2 || elements == 1)
5177 /* Double word aligned. */
5178 return 4;
5179
5180 if (elements == 4)
5181 {
5182 switch (misalign)
5183 {
5184 case 8:
5185 /* Double word aligned. */
5186 return 4;
5187
5188 case -1:
5189 /* Unknown misalignment. */
5190 case 4:
5191 case 12:
5192 /* Word aligned. */
5193 return 33;
5194
5195 default:
5196 gcc_unreachable ();
5197 }
5198 }
5199 }
5200
5201 if (TARGET_ALTIVEC)
5202 /* Misaligned loads are not supported. */
5203 gcc_unreachable ();
5204
5205 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5206 return 4;
5207
5208 case unaligned_store:
5209 case vector_scatter_store:
5210 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5211 return 1;
5212
5213 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5214 {
5215 elements = TYPE_VECTOR_SUBPARTS (vectype);
5216 /* See PR102767, consider V1TI to keep consistency. */
5217 if (elements == 2 || elements == 1)
5218 /* Double word aligned. */
5219 return 2;
5220
5221 if (elements == 4)
5222 {
5223 switch (misalign)
5224 {
5225 case 8:
5226 /* Double word aligned. */
5227 return 2;
5228
5229 case -1:
5230 /* Unknown misalignment. */
5231 case 4:
5232 case 12:
5233 /* Word aligned. */
5234 return 23;
5235
5236 default:
5237 gcc_unreachable ();
5238 }
5239 }
5240 }
5241
5242 if (TARGET_ALTIVEC)
5243 /* Misaligned stores are not supported. */
5244 gcc_unreachable ();
5245
5246 return 2;
5247
5248 case vec_construct:
5249 /* This is a rough approximation assuming non-constant elements
5250 constructed into a vector via element insertion. FIXME:
5251 vec_construct is not granular enough for uniformly good
5252 decisions. If the initialization is a splat, this is
5253 cheaper than we estimate. Improve this someday. */
5254 elem_type = TREE_TYPE (vectype);
5255 /* 32-bit vectors loaded into registers are stored as double
5256 precision, so we need 2 permutes, 2 converts, and 1 merge
5257 to construct a vector of short floats from them. */
5258 if (SCALAR_FLOAT_TYPE_P (elem_type)
5259 && TYPE_PRECISION (elem_type) == 32)
5260 return 5;
5261 /* On POWER9, integer vector types are built up in GPRs and then
5262 use a direct move (2 cycles). For POWER8 this is even worse,
5263 as we need two direct moves and a merge, and the direct moves
5264 are five cycles. */
5265 else if (INTEGRAL_TYPE_P (elem_type))
5266 {
5267 if (TARGET_P9_VECTOR)
5268 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 2;
5269 else
5270 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 5;
5271 }
5272 else
5273 /* V2DFmode doesn't need a direct move. */
5274 return 2;
5275
5276 default:
5277 gcc_unreachable ();
5278 }
5279 }
5280
5281 /* Implement targetm.vectorize.preferred_simd_mode. */
5282
5283 static machine_mode
5284 rs6000_preferred_simd_mode (scalar_mode mode)
5285 {
5286 opt_machine_mode vmode = mode_for_vector (mode, 16 / GET_MODE_SIZE (mode));
5287
5288 if (vmode.exists () && !VECTOR_MEM_NONE_P (vmode.require ()))
5289 return vmode.require ();
5290
5291 return word_mode;
5292 }
5293
5294 class rs6000_cost_data : public vector_costs
5295 {
5296 public:
5297 using vector_costs::vector_costs;
5298
5299 unsigned int add_stmt_cost (int count, vect_cost_for_stmt kind,
5300 stmt_vec_info stmt_info, slp_tree, tree vectype,
5301 int misalign,
5302 vect_cost_model_location where) override;
5303 void finish_cost (const vector_costs *) override;
5304
5305 protected:
5306 void update_target_cost_per_stmt (vect_cost_for_stmt, stmt_vec_info,
5307 vect_cost_model_location, unsigned int);
5308 void density_test (loop_vec_info);
5309 void adjust_vect_cost_per_loop (loop_vec_info);
5310 unsigned int determine_suggested_unroll_factor (loop_vec_info);
5311
5312 /* Total number of vectorized stmts (loop only). */
5313 unsigned m_nstmts = 0;
5314 /* Total number of loads (loop only). */
5315 unsigned m_nloads = 0;
5316 /* Total number of stores (loop only). */
5317 unsigned m_nstores = 0;
5318 /* Reduction factor for suggesting unroll factor (loop only). */
5319 unsigned m_reduc_factor = 0;
5320 /* Possible extra penalized cost on vector construction (loop only). */
5321 unsigned m_extra_ctor_cost = 0;
5322 /* For each vectorized loop, this var holds TRUE iff a non-memory vector
5323 instruction is needed by the vectorization. */
5324 bool m_vect_nonmem = false;
5325 /* If this loop gets vectorized with emulated gather load. */
5326 bool m_gather_load = false;
5327 };
5328
5329 /* Test for likely overcommitment of vector hardware resources. If a
5330 loop iteration is relatively large, and too large a percentage of
5331 instructions in the loop are vectorized, the cost model may not
5332 adequately reflect delays from unavailable vector resources.
5333 Penalize the loop body cost for this case. */
5334
5335 void
5336 rs6000_cost_data::density_test (loop_vec_info loop_vinfo)
5337 {
5338 /* This density test only cares about the cost of vector version of the
5339 loop, so immediately return if we are passed costing for the scalar
5340 version (namely computing single scalar iteration cost). */
5341 if (m_costing_for_scalar)
5342 return;
5343
5344 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5345 basic_block *bbs = get_loop_body (loop);
5346 int nbbs = loop->num_nodes;
5347 int vec_cost = m_costs[vect_body], not_vec_cost = 0;
5348
5349 for (int i = 0; i < nbbs; i++)
5350 {
5351 basic_block bb = bbs[i];
5352 gimple_stmt_iterator gsi;
5353
5354 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5355 {
5356 gimple *stmt = gsi_stmt (gsi);
5357 if (is_gimple_debug (stmt))
5358 continue;
5359
5360 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
5361
5362 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5363 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
5364 not_vec_cost++;
5365 }
5366 }
5367
5368 free (bbs);
5369 int density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
5370
5371 if (density_pct > rs6000_density_pct_threshold
5372 && vec_cost + not_vec_cost > rs6000_density_size_threshold)
5373 {
5374 m_costs[vect_body] = vec_cost * (100 + rs6000_density_penalty) / 100;
5375 if (dump_enabled_p ())
5376 dump_printf_loc (MSG_NOTE, vect_location,
5377 "density %d%%, cost %d exceeds threshold, penalizing "
5378 "loop body cost by %u%%\n", density_pct,
5379 vec_cost + not_vec_cost, rs6000_density_penalty);
5380 }
5381
5382 /* Check whether we need to penalize the body cost to account
5383 for excess strided or elementwise loads. */
5384 if (m_extra_ctor_cost > 0)
5385 {
5386 gcc_assert (m_nloads <= m_nstmts);
5387 unsigned int load_pct = (m_nloads * 100) / m_nstmts;
5388
5389 /* It's likely to be bounded by latency and execution resources
5390 from many scalar loads which are strided or elementwise loads
5391 into a vector if both conditions below are found:
5392 1. there are many loads, it's easy to result in a long wait
5393 for load units;
5394 2. load has a big proportion of all vectorized statements,
5395 it's not easy to schedule other statements to spread among
5396 the loads.
5397 One typical case is the innermost loop of the hotspot of SPEC2017
5398 503.bwaves_r without loop interchange. */
5399 if (m_nloads > (unsigned int) rs6000_density_load_num_threshold
5400 && load_pct > (unsigned int) rs6000_density_load_pct_threshold)
5401 {
5402 m_costs[vect_body] += m_extra_ctor_cost;
5403 if (dump_enabled_p ())
5404 dump_printf_loc (MSG_NOTE, vect_location,
5405 "Found %u loads and "
5406 "load pct. %u%% exceed "
5407 "the threshold, "
5408 "penalizing loop body "
5409 "cost by extra cost %u "
5410 "for ctor.\n",
5411 m_nloads, load_pct,
5412 m_extra_ctor_cost);
5413 }
5414 }
5415 }
5416
5417 /* Implement targetm.vectorize.create_costs. */
5418
5419 static vector_costs *
5420 rs6000_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar)
5421 {
5422 return new rs6000_cost_data (vinfo, costing_for_scalar);
5423 }
5424
5425 /* Adjust vectorization cost after calling rs6000_builtin_vectorization_cost.
5426 For some statement, we would like to further fine-grain tweak the cost on
5427 top of rs6000_builtin_vectorization_cost handling which doesn't have any
5428 information on statement operation codes etc. One typical case here is
5429 COND_EXPR, it takes the same cost to simple FXU instruction when evaluating
5430 for scalar cost, but it should be priced more whatever transformed to either
5431 compare + branch or compare + isel instructions. */
5432
5433 static unsigned
5434 rs6000_adjust_vect_cost_per_stmt (enum vect_cost_for_stmt kind,
5435 struct _stmt_vec_info *stmt_info)
5436 {
5437 if (kind == scalar_stmt && stmt_info && stmt_info->stmt
5438 && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
5439 {
5440 tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
5441 if (subcode == COND_EXPR)
5442 return 2;
5443 }
5444
5445 return 0;
5446 }
5447
5448 /* Helper function for add_stmt_cost. Check each statement cost
5449 entry, gather information and update the target_cost fields
5450 accordingly. */
5451 void
5452 rs6000_cost_data::update_target_cost_per_stmt (vect_cost_for_stmt kind,
5453 stmt_vec_info stmt_info,
5454 vect_cost_model_location where,
5455 unsigned int orig_count)
5456 {
5457
5458 /* Check whether we're doing something other than just a copy loop.
5459 Not all such loops may be profitably vectorized; see
5460 rs6000_finish_cost. */
5461 if (kind == vec_to_scalar
5462 || kind == vec_perm
5463 || kind == vec_promote_demote
5464 || kind == vec_construct
5465 || kind == scalar_to_vec
5466 || (where == vect_body && kind == vector_stmt))
5467 m_vect_nonmem = true;
5468
5469 /* Gather some information when we are costing the vectorized instruction
5470 for the statements located in a loop body. */
5471 if (!m_costing_for_scalar
5472 && is_a<loop_vec_info> (m_vinfo)
5473 && where == vect_body)
5474 {
5475 m_nstmts += orig_count;
5476
5477 if (kind == scalar_load
5478 || kind == vector_load
5479 || kind == unaligned_load
5480 || kind == vector_gather_load)
5481 {
5482 m_nloads += orig_count;
5483 if (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
5484 m_gather_load = true;
5485 }
5486 else if (kind == scalar_store
5487 || kind == vector_store
5488 || kind == unaligned_store
5489 || kind == vector_scatter_store)
5490 m_nstores += orig_count;
5491 else if ((kind == scalar_stmt
5492 || kind == vector_stmt
5493 || kind == vec_to_scalar)
5494 && stmt_info
5495 && vect_is_reduction (stmt_info))
5496 {
5497 /* Loop body contains normal int or fp operations and epilogue
5498 contains vector reduction. For simplicity, we assume int
5499 operation takes one cycle and fp operation takes one more. */
5500 tree lhs = gimple_get_lhs (stmt_info->stmt);
5501 bool is_float = FLOAT_TYPE_P (TREE_TYPE (lhs));
5502 unsigned int basic_cost = is_float ? 2 : 1;
5503 m_reduc_factor = MAX (basic_cost * orig_count, m_reduc_factor);
5504 }
5505
5506 /* Power processors do not currently have instructions for strided
5507 and elementwise loads, and instead we must generate multiple
5508 scalar loads. This leads to undercounting of the cost. We
5509 account for this by scaling the construction cost by the number
5510 of elements involved, and saving this as extra cost that we may
5511 or may not need to apply. When finalizing the cost of the loop,
5512 the extra penalty is applied when the load density heuristics
5513 are satisfied. */
5514 if (kind == vec_construct && stmt_info
5515 && STMT_VINFO_TYPE (stmt_info) == load_vec_info_type
5516 && (STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_ELEMENTWISE
5517 || STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_STRIDED_SLP))
5518 {
5519 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5520 unsigned int nunits = vect_nunits_for_cost (vectype);
5521 /* As PR103702 shows, it's possible that vectorizer wants to do
5522 costings for only one unit here, it's no need to do any
5523 penalization for it, so simply early return here. */
5524 if (nunits == 1)
5525 return;
5526 /* i386 port adopts nunits * stmt_cost as the penalized cost
5527 for this kind of penalization, we used to follow it but
5528 found it could result in an unreliable body cost especially
5529 for V16QI/V8HI modes. To make it better, we choose this
5530 new heuristic: for each scalar load, we use 2 as penalized
5531 cost for the case with 2 nunits and use 1 for the other
5532 cases. It's without much supporting theory, mainly
5533 concluded from the broad performance evaluations on Power8,
5534 Power9 and Power10. One possibly related point is that:
5535 vector construction for more units would use more insns,
5536 it has more chances to schedule them better (even run in
5537 parallelly when enough available units at that time), so
5538 it seems reasonable not to penalize that much for them. */
5539 unsigned int adjusted_cost = (nunits == 2) ? 2 : 1;
5540 unsigned int extra_cost = nunits * adjusted_cost;
5541 m_extra_ctor_cost += extra_cost;
5542 }
5543 }
5544 }
5545
5546 unsigned
5547 rs6000_cost_data::add_stmt_cost (int count, vect_cost_for_stmt kind,
5548 stmt_vec_info stmt_info, slp_tree,
5549 tree vectype, int misalign,
5550 vect_cost_model_location where)
5551 {
5552 unsigned retval = 0;
5553
5554 if (flag_vect_cost_model)
5555 {
5556 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
5557 misalign);
5558 stmt_cost += rs6000_adjust_vect_cost_per_stmt (kind, stmt_info);
5559 /* Statements in an inner loop relative to the loop being
5560 vectorized are weighted more heavily. The value here is
5561 arbitrary and could potentially be improved with analysis. */
5562 unsigned int orig_count = count;
5563 retval = adjust_cost_for_freq (stmt_info, where, count * stmt_cost);
5564 m_costs[where] += retval;
5565
5566 update_target_cost_per_stmt (kind, stmt_info, where, orig_count);
5567 }
5568
5569 return retval;
5570 }
5571
5572 /* For some target specific vectorization cost which can't be handled per stmt,
5573 we check the requisite conditions and adjust the vectorization cost
5574 accordingly if satisfied. One typical example is to model shift cost for
5575 vector with length by counting number of required lengths under condition
5576 LOOP_VINFO_FULLY_WITH_LENGTH_P. */
5577
5578 void
5579 rs6000_cost_data::adjust_vect_cost_per_loop (loop_vec_info loop_vinfo)
5580 {
5581 if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
5582 {
5583 rgroup_controls *rgc;
5584 unsigned int num_vectors_m1;
5585 unsigned int shift_cnt = 0;
5586 FOR_EACH_VEC_ELT (LOOP_VINFO_LENS (loop_vinfo), num_vectors_m1, rgc)
5587 if (rgc->type)
5588 /* Each length needs one shift to fill into bits 0-7. */
5589 shift_cnt += num_vectors_m1 + 1;
5590
5591 add_stmt_cost (shift_cnt, scalar_stmt, NULL, NULL,
5592 NULL_TREE, 0, vect_body);
5593 }
5594 }
5595
5596 /* Determine suggested unroll factor by considering some below factors:
5597
5598 - unroll option/pragma which can disable unrolling for this loop;
5599 - simple hardware resource model for non memory vector insns;
5600 - aggressive heuristics when iteration count is unknown:
5601 - reduction case to break cross iteration dependency;
5602 - emulated gather load;
5603 - estimated iteration count when iteration count is unknown;
5604 */
5605
5606
5607 unsigned int
5608 rs6000_cost_data::determine_suggested_unroll_factor (loop_vec_info loop_vinfo)
5609 {
5610 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5611
5612 /* Don't unroll if it's specified explicitly not to be unrolled. */
5613 if (loop->unroll == 1
5614 || (OPTION_SET_P (flag_unroll_loops) && !flag_unroll_loops)
5615 || (OPTION_SET_P (flag_unroll_all_loops) && !flag_unroll_all_loops))
5616 return 1;
5617
5618 unsigned int nstmts_nonldst = m_nstmts - m_nloads - m_nstores;
5619 /* Don't unroll if no vector instructions excepting for memory access. */
5620 if (nstmts_nonldst == 0)
5621 return 1;
5622
5623 /* Consider breaking cross iteration dependency for reduction. */
5624 unsigned int reduc_factor = m_reduc_factor > 1 ? m_reduc_factor : 1;
5625
5626 /* Use this simple hardware resource model that how many non ld/st
5627 vector instructions can be issued per cycle. */
5628 unsigned int issue_width = rs6000_vect_unroll_issue;
5629 unsigned int uf = CEIL (reduc_factor * issue_width, nstmts_nonldst);
5630 uf = MIN ((unsigned int) rs6000_vect_unroll_limit, uf);
5631 /* Make sure it is power of 2. */
5632 uf = 1 << ceil_log2 (uf);
5633
5634 /* If the iteration count is known, the costing would be exact enough,
5635 don't worry it could be worse. */
5636 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5637 return uf;
5638
5639 /* Inspired by SPEC2017 parest_r, we want to aggressively unroll the
5640 loop if either condition is satisfied:
5641 - reduction factor exceeds the threshold;
5642 - emulated gather load adopted. */
5643 if (reduc_factor > (unsigned int) rs6000_vect_unroll_reduc_threshold
5644 || m_gather_load)
5645 return uf;
5646
5647 /* Check if we can conclude it's good to unroll from the estimated
5648 iteration count. */
5649 HOST_WIDE_INT est_niter = get_estimated_loop_iterations_int (loop);
5650 unsigned int vf = vect_vf_for_cost (loop_vinfo);
5651 unsigned int unrolled_vf = vf * uf;
5652 if (est_niter == -1 || est_niter < unrolled_vf)
5653 /* When the estimated iteration of this loop is unknown, it's possible
5654 that we are able to vectorize this loop with the original VF but fail
5655 to vectorize it with the unrolled VF any more if the actual iteration
5656 count is in between. */
5657 return 1;
5658 else
5659 {
5660 unsigned int epil_niter_unr = est_niter % unrolled_vf;
5661 unsigned int epil_niter = est_niter % vf;
5662 /* Even if we have partial vector support, it can be still inefficent
5663 to calculate the length when the iteration count is unknown, so
5664 only expect it's good to unroll when the epilogue iteration count
5665 is not bigger than VF (only one time length calculation). */
5666 if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
5667 && epil_niter_unr <= vf)
5668 return uf;
5669 /* Without partial vector support, conservatively unroll this when
5670 the epilogue iteration count is less than the original one
5671 (epilogue execution time wouldn't be longer than before). */
5672 else if (!LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
5673 && epil_niter_unr <= epil_niter)
5674 return uf;
5675 }
5676
5677 return 1;
5678 }
5679
5680 void
5681 rs6000_cost_data::finish_cost (const vector_costs *scalar_costs)
5682 {
5683 if (loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (m_vinfo))
5684 {
5685 adjust_vect_cost_per_loop (loop_vinfo);
5686 density_test (loop_vinfo);
5687
5688 /* Don't vectorize minimum-vectorization-factor, simple copy loops
5689 that require versioning for any reason. The vectorization is at
5690 best a wash inside the loop, and the versioning checks make
5691 profitability highly unlikely and potentially quite harmful. */
5692 if (!m_vect_nonmem
5693 && LOOP_VINFO_VECT_FACTOR (loop_vinfo) == 2
5694 && LOOP_REQUIRES_VERSIONING (loop_vinfo))
5695 m_costs[vect_body] += 10000;
5696
5697 m_suggested_unroll_factor
5698 = determine_suggested_unroll_factor (loop_vinfo);
5699 }
5700
5701 vector_costs::finish_cost (scalar_costs);
5702 }
5703
5704 /* Implement targetm.loop_unroll_adjust. */
5705
5706 static unsigned
5707 rs6000_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
5708 {
5709 if (unroll_only_small_loops)
5710 {
5711 /* TODO: These are hardcoded values right now. We probably should use
5712 a PARAM here. */
5713 if (loop->ninsns <= 6)
5714 return MIN (4, nunroll);
5715 if (loop->ninsns <= 10)
5716 return MIN (2, nunroll);
5717
5718 return 0;
5719 }
5720
5721 return nunroll;
5722 }
5723
5724 /* Returns a function decl for a vectorized version of the builtin function
5725 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5726 if it is not available.
5727
5728 Implement targetm.vectorize.builtin_vectorized_function. */
5729
5730 static tree
5731 rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
5732 tree type_in)
5733 {
5734 machine_mode in_mode, out_mode;
5735 int in_n, out_n;
5736
5737 if (TARGET_DEBUG_BUILTIN)
5738 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5739 combined_fn_name (combined_fn (fn)),
5740 GET_MODE_NAME (TYPE_MODE (type_out)),
5741 GET_MODE_NAME (TYPE_MODE (type_in)));
5742
5743 /* TODO: Should this be gcc_assert? */
5744 if (TREE_CODE (type_out) != VECTOR_TYPE
5745 || TREE_CODE (type_in) != VECTOR_TYPE)
5746 return NULL_TREE;
5747
5748 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5749 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5750 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5751 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5752
5753 switch (fn)
5754 {
5755 CASE_CFN_COPYSIGN:
5756 if (VECTOR_UNIT_VSX_P (V2DFmode)
5757 && out_mode == DFmode && out_n == 2
5758 && in_mode == DFmode && in_n == 2)
5759 return rs6000_builtin_decls[RS6000_BIF_CPSGNDP];
5760 if (VECTOR_UNIT_VSX_P (V4SFmode)
5761 && out_mode == SFmode && out_n == 4
5762 && in_mode == SFmode && in_n == 4)
5763 return rs6000_builtin_decls[RS6000_BIF_CPSGNSP];
5764 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5765 && out_mode == SFmode && out_n == 4
5766 && in_mode == SFmode && in_n == 4)
5767 return rs6000_builtin_decls[RS6000_BIF_COPYSIGN_V4SF];
5768 break;
5769 CASE_CFN_CEIL:
5770 if (VECTOR_UNIT_VSX_P (V2DFmode)
5771 && out_mode == DFmode && out_n == 2
5772 && in_mode == DFmode && in_n == 2)
5773 return rs6000_builtin_decls[RS6000_BIF_XVRDPIP];
5774 if (VECTOR_UNIT_VSX_P (V4SFmode)
5775 && out_mode == SFmode && out_n == 4
5776 && in_mode == SFmode && in_n == 4)
5777 return rs6000_builtin_decls[RS6000_BIF_XVRSPIP];
5778 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5779 && out_mode == SFmode && out_n == 4
5780 && in_mode == SFmode && in_n == 4)
5781 return rs6000_builtin_decls[RS6000_BIF_VRFIP];
5782 break;
5783 CASE_CFN_FLOOR:
5784 if (VECTOR_UNIT_VSX_P (V2DFmode)
5785 && out_mode == DFmode && out_n == 2
5786 && in_mode == DFmode && in_n == 2)
5787 return rs6000_builtin_decls[RS6000_BIF_XVRDPIM];
5788 if (VECTOR_UNIT_VSX_P (V4SFmode)
5789 && out_mode == SFmode && out_n == 4
5790 && in_mode == SFmode && in_n == 4)
5791 return rs6000_builtin_decls[RS6000_BIF_XVRSPIM];
5792 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5793 && out_mode == SFmode && out_n == 4
5794 && in_mode == SFmode && in_n == 4)
5795 return rs6000_builtin_decls[RS6000_BIF_VRFIM];
5796 break;
5797 CASE_CFN_FMA:
5798 if (VECTOR_UNIT_VSX_P (V2DFmode)
5799 && out_mode == DFmode && out_n == 2
5800 && in_mode == DFmode && in_n == 2)
5801 return rs6000_builtin_decls[RS6000_BIF_XVMADDDP];
5802 if (VECTOR_UNIT_VSX_P (V4SFmode)
5803 && out_mode == SFmode && out_n == 4
5804 && in_mode == SFmode && in_n == 4)
5805 return rs6000_builtin_decls[RS6000_BIF_XVMADDSP];
5806 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5807 && out_mode == SFmode && out_n == 4
5808 && in_mode == SFmode && in_n == 4)
5809 return rs6000_builtin_decls[RS6000_BIF_VMADDFP];
5810 break;
5811 CASE_CFN_TRUNC:
5812 if (VECTOR_UNIT_VSX_P (V2DFmode)
5813 && out_mode == DFmode && out_n == 2
5814 && in_mode == DFmode && in_n == 2)
5815 return rs6000_builtin_decls[RS6000_BIF_XVRDPIZ];
5816 if (VECTOR_UNIT_VSX_P (V4SFmode)
5817 && out_mode == SFmode && out_n == 4
5818 && in_mode == SFmode && in_n == 4)
5819 return rs6000_builtin_decls[RS6000_BIF_XVRSPIZ];
5820 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5821 && out_mode == SFmode && out_n == 4
5822 && in_mode == SFmode && in_n == 4)
5823 return rs6000_builtin_decls[RS6000_BIF_VRFIZ];
5824 break;
5825 CASE_CFN_NEARBYINT:
5826 if (VECTOR_UNIT_VSX_P (V2DFmode)
5827 && flag_unsafe_math_optimizations
5828 && out_mode == DFmode && out_n == 2
5829 && in_mode == DFmode && in_n == 2)
5830 return rs6000_builtin_decls[RS6000_BIF_XVRDPI];
5831 if (VECTOR_UNIT_VSX_P (V4SFmode)
5832 && flag_unsafe_math_optimizations
5833 && out_mode == SFmode && out_n == 4
5834 && in_mode == SFmode && in_n == 4)
5835 return rs6000_builtin_decls[RS6000_BIF_XVRSPI];
5836 break;
5837 CASE_CFN_RINT:
5838 if (VECTOR_UNIT_VSX_P (V2DFmode)
5839 && !flag_trapping_math
5840 && out_mode == DFmode && out_n == 2
5841 && in_mode == DFmode && in_n == 2)
5842 return rs6000_builtin_decls[RS6000_BIF_XVRDPIC];
5843 if (VECTOR_UNIT_VSX_P (V4SFmode)
5844 && !flag_trapping_math
5845 && out_mode == SFmode && out_n == 4
5846 && in_mode == SFmode && in_n == 4)
5847 return rs6000_builtin_decls[RS6000_BIF_XVRSPIC];
5848 break;
5849 default:
5850 break;
5851 }
5852
5853 /* Generate calls to libmass if appropriate. */
5854 if (rs6000_veclib_handler)
5855 return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
5856
5857 return NULL_TREE;
5858 }
5859
5860 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5861 library with vectorized intrinsics. */
5862
5863 static tree
5864 rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out,
5865 tree type_in)
5866 {
5867 char name[32];
5868 const char *suffix = NULL;
5869 tree fntype, new_fndecl, bdecl = NULL_TREE;
5870 int n_args = 1;
5871 const char *bname;
5872 machine_mode el_mode, in_mode;
5873 int n, in_n;
5874
5875 /* Libmass is suitable for unsafe math only as it does not correctly support
5876 parts of IEEE with the required precision such as denormals. Only support
5877 it if we have VSX to use the simd d2 or f4 functions.
5878 XXX: Add variable length support. */
5879 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
5880 return NULL_TREE;
5881
5882 el_mode = TYPE_MODE (TREE_TYPE (type_out));
5883 n = TYPE_VECTOR_SUBPARTS (type_out);
5884 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5885 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5886 if (el_mode != in_mode
5887 || n != in_n)
5888 return NULL_TREE;
5889
5890 switch (fn)
5891 {
5892 CASE_CFN_ATAN2:
5893 CASE_CFN_HYPOT:
5894 CASE_CFN_POW:
5895 n_args = 2;
5896 gcc_fallthrough ();
5897
5898 CASE_CFN_ACOS:
5899 CASE_CFN_ACOSH:
5900 CASE_CFN_ASIN:
5901 CASE_CFN_ASINH:
5902 CASE_CFN_ATAN:
5903 CASE_CFN_ATANH:
5904 CASE_CFN_CBRT:
5905 CASE_CFN_COS:
5906 CASE_CFN_COSH:
5907 CASE_CFN_ERF:
5908 CASE_CFN_ERFC:
5909 CASE_CFN_EXP2:
5910 CASE_CFN_EXP:
5911 CASE_CFN_EXPM1:
5912 CASE_CFN_LGAMMA:
5913 CASE_CFN_LOG10:
5914 CASE_CFN_LOG1P:
5915 CASE_CFN_LOG2:
5916 CASE_CFN_LOG:
5917 CASE_CFN_SIN:
5918 CASE_CFN_SINH:
5919 CASE_CFN_SQRT:
5920 CASE_CFN_TAN:
5921 CASE_CFN_TANH:
5922 if (el_mode == DFmode && n == 2)
5923 {
5924 bdecl = mathfn_built_in (double_type_node, fn);
5925 suffix = "d2"; /* pow -> powd2 */
5926 }
5927 else if (el_mode == SFmode && n == 4)
5928 {
5929 bdecl = mathfn_built_in (float_type_node, fn);
5930 suffix = "4"; /* powf -> powf4 */
5931 }
5932 else
5933 return NULL_TREE;
5934 if (!bdecl)
5935 return NULL_TREE;
5936 break;
5937
5938 default:
5939 return NULL_TREE;
5940 }
5941
5942 gcc_assert (suffix != NULL);
5943 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
5944 if (!bname)
5945 return NULL_TREE;
5946
5947 strcpy (name, bname + strlen ("__builtin_"));
5948 strcat (name, suffix);
5949
5950 if (n_args == 1)
5951 fntype = build_function_type_list (type_out, type_in, NULL);
5952 else if (n_args == 2)
5953 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
5954 else
5955 gcc_unreachable ();
5956
5957 /* Build a function declaration for the vectorized function. */
5958 new_fndecl = build_decl (BUILTINS_LOCATION,
5959 FUNCTION_DECL, get_identifier (name), fntype);
5960 TREE_PUBLIC (new_fndecl) = 1;
5961 DECL_EXTERNAL (new_fndecl) = 1;
5962 DECL_IS_NOVOPS (new_fndecl) = 1;
5963 TREE_READONLY (new_fndecl) = 1;
5964
5965 return new_fndecl;
5966 }
5967
5968 \f
5969 /* Default CPU string for rs6000*_file_start functions. */
5970 static const char *rs6000_default_cpu;
5971
5972 #ifdef USING_ELFOS_H
5973 const char *rs6000_machine;
5974
5975 const char *
5976 rs6000_machine_from_flags (void)
5977 {
5978 /* e300 and e500 */
5979 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3)
5980 return "e300";
5981 if (rs6000_cpu == PROCESSOR_PPC8540 || rs6000_cpu == PROCESSOR_PPC8548)
5982 return "e500";
5983 if (rs6000_cpu == PROCESSOR_PPCE500MC)
5984 return "e500mc";
5985 if (rs6000_cpu == PROCESSOR_PPCE500MC64)
5986 return "e500mc64";
5987 if (rs6000_cpu == PROCESSOR_PPCE5500)
5988 return "e5500";
5989 if (rs6000_cpu == PROCESSOR_PPCE6500)
5990 return "e6500";
5991
5992 /* 400 series */
5993 if (rs6000_cpu == PROCESSOR_PPC403)
5994 return "\"403\"";
5995 if (rs6000_cpu == PROCESSOR_PPC405)
5996 return "\"405\"";
5997 if (rs6000_cpu == PROCESSOR_PPC440)
5998 return "\"440\"";
5999 if (rs6000_cpu == PROCESSOR_PPC476)
6000 return "\"476\"";
6001
6002 /* A2 */
6003 if (rs6000_cpu == PROCESSOR_PPCA2)
6004 return "a2";
6005
6006 /* Cell BE */
6007 if (rs6000_cpu == PROCESSOR_CELL)
6008 return "cell";
6009
6010 /* Titan */
6011 if (rs6000_cpu == PROCESSOR_TITAN)
6012 return "titan";
6013
6014 /* 500 series and 800 series */
6015 if (rs6000_cpu == PROCESSOR_MPCCORE)
6016 return "\"821\"";
6017
6018 #if 0
6019 /* This (and ppc64 below) are disabled here (for now at least) because
6020 PROCESSOR_POWERPC, PROCESSOR_POWERPC64, and PROCESSOR_COMMON
6021 are #define'd as some of these. Untangling that is a job for later. */
6022
6023 /* 600 series and 700 series, "classic" */
6024 if (rs6000_cpu == PROCESSOR_PPC601 || rs6000_cpu == PROCESSOR_PPC603
6025 || rs6000_cpu == PROCESSOR_PPC604 || rs6000_cpu == PROCESSOR_PPC604e
6026 || rs6000_cpu == PROCESSOR_PPC750)
6027 return "ppc";
6028 #endif
6029
6030 /* Classic with AltiVec, "G4" */
6031 if (rs6000_cpu == PROCESSOR_PPC7400 || rs6000_cpu == PROCESSOR_PPC7450)
6032 return "\"7450\"";
6033
6034 #if 0
6035 /* The older 64-bit CPUs */
6036 if (rs6000_cpu == PROCESSOR_PPC620 || rs6000_cpu == PROCESSOR_PPC630
6037 || rs6000_cpu == PROCESSOR_RS64A)
6038 return "ppc64";
6039 #endif
6040
6041 HOST_WIDE_INT flags = rs6000_isa_flags;
6042
6043 /* Disable the flags that should never influence the .machine selection. */
6044 flags &= ~(OPTION_MASK_PPC_GFXOPT | OPTION_MASK_PPC_GPOPT | OPTION_MASK_ISEL);
6045
6046 if ((flags & (ISA_FUTURE_MASKS & ~ISA_3_1_MASKS_SERVER)) != 0)
6047 return "future";
6048 if ((flags & (ISA_3_1_MASKS_SERVER & ~ISA_3_0_MASKS_SERVER)) != 0)
6049 return "power10";
6050 if ((flags & (ISA_3_0_MASKS_SERVER & ~ISA_2_7_MASKS_SERVER)) != 0)
6051 return "power9";
6052 if ((flags & (ISA_2_7_MASKS_SERVER & ~ISA_2_6_MASKS_SERVER)) != 0)
6053 return "power8";
6054 if ((flags & (ISA_2_6_MASKS_SERVER & ~ISA_2_5_MASKS_SERVER)) != 0)
6055 return "power7";
6056 if ((flags & (ISA_2_5_MASKS_SERVER & ~ISA_2_4_MASKS)) != 0)
6057 return "power6";
6058 if ((flags & (ISA_2_4_MASKS & ~ISA_2_1_MASKS)) != 0)
6059 return "power5";
6060 if ((flags & ISA_2_1_MASKS) != 0)
6061 return "power4";
6062 if ((flags & OPTION_MASK_POWERPC64) != 0)
6063 return "ppc64";
6064 return "ppc";
6065 }
6066
6067 void
6068 emit_asm_machine (void)
6069 {
6070 fprintf (asm_out_file, "\t.machine %s\n", rs6000_machine);
6071 }
6072 #endif
6073
6074 /* Do anything needed at the start of the asm file. */
6075
6076 static void
6077 rs6000_file_start (void)
6078 {
6079 char buffer[80];
6080 const char *start = buffer;
6081 FILE *file = asm_out_file;
6082
6083 rs6000_default_cpu = TARGET_CPU_DEFAULT;
6084
6085 default_file_start ();
6086
6087 if (flag_verbose_asm)
6088 {
6089 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
6090
6091 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
6092 {
6093 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
6094 start = "";
6095 }
6096
6097 if (OPTION_SET_P (rs6000_cpu_index))
6098 {
6099 fprintf (file, "%s -mcpu=%s", start,
6100 processor_target_table[rs6000_cpu_index].name);
6101 start = "";
6102 }
6103
6104 if (OPTION_SET_P (rs6000_tune_index))
6105 {
6106 fprintf (file, "%s -mtune=%s", start,
6107 processor_target_table[rs6000_tune_index].name);
6108 start = "";
6109 }
6110
6111 if (PPC405_ERRATUM77)
6112 {
6113 fprintf (file, "%s PPC405CR_ERRATUM77", start);
6114 start = "";
6115 }
6116
6117 #ifdef USING_ELFOS_H
6118 switch (rs6000_sdata)
6119 {
6120 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
6121 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
6122 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
6123 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
6124 }
6125
6126 if (rs6000_sdata && g_switch_value)
6127 {
6128 fprintf (file, "%s -G %d", start,
6129 g_switch_value);
6130 start = "";
6131 }
6132 #endif
6133
6134 if (*start == '\0')
6135 putc ('\n', file);
6136 }
6137
6138 #ifdef USING_ELFOS_H
6139 rs6000_machine = rs6000_machine_from_flags ();
6140 emit_asm_machine ();
6141 #endif
6142
6143 if (DEFAULT_ABI == ABI_ELFv2)
6144 fprintf (file, "\t.abiversion 2\n");
6145 }
6146
6147 \f
6148 /* Return nonzero if this function is known to have a null epilogue. */
6149
6150 int
6151 direct_return (void)
6152 {
6153 if (reload_completed)
6154 {
6155 rs6000_stack_t *info = rs6000_stack_info ();
6156
6157 if (info->first_gp_reg_save == 32
6158 && info->first_fp_reg_save == 64
6159 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
6160 && ! info->lr_save_p
6161 && ! info->cr_save_p
6162 && info->vrsave_size == 0
6163 && ! info->push_p)
6164 return 1;
6165 }
6166
6167 return 0;
6168 }
6169
6170 /* Helper for num_insns_constant. Calculate number of instructions to
6171 load VALUE to a single gpr using combinations of addi, addis, ori,
6172 oris, sldi and rldimi instructions. */
6173
6174 static int
6175 num_insns_constant_gpr (HOST_WIDE_INT value)
6176 {
6177 /* signed constant loadable with addi */
6178 if (SIGNED_INTEGER_16BIT_P (value))
6179 return 1;
6180
6181 /* constant loadable with addis */
6182 else if ((value & 0xffff) == 0
6183 && (value >> 31 == -1 || value >> 31 == 0))
6184 return 1;
6185
6186 /* PADDI can support up to 34 bit signed integers. */
6187 else if (TARGET_PREFIXED && SIGNED_INTEGER_34BIT_P (value))
6188 return 1;
6189
6190 else if (TARGET_POWERPC64)
6191 {
6192 HOST_WIDE_INT low = sext_hwi (value, 32);
6193 HOST_WIDE_INT high = value >> 31;
6194
6195 if (high == 0 || high == -1)
6196 return 2;
6197
6198 high >>= 1;
6199
6200 if (low == 0 || low == high)
6201 return num_insns_constant_gpr (high) + 1;
6202 else if (high == 0)
6203 return num_insns_constant_gpr (low) + 1;
6204 else
6205 return (num_insns_constant_gpr (high)
6206 + num_insns_constant_gpr (low) + 1);
6207 }
6208
6209 else
6210 return 2;
6211 }
6212
6213 /* Helper for num_insns_constant. Allow constants formed by the
6214 num_insns_constant_gpr sequences, plus li -1, rldicl/rldicr/rlwinm,
6215 and handle modes that require multiple gprs. */
6216
6217 static int
6218 num_insns_constant_multi (HOST_WIDE_INT value, machine_mode mode)
6219 {
6220 int nregs = (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6221 int total = 0;
6222 while (nregs-- > 0)
6223 {
6224 HOST_WIDE_INT low = sext_hwi (value, BITS_PER_WORD);
6225 int insns = num_insns_constant_gpr (low);
6226 if (insns > 2
6227 /* We won't get more than 2 from num_insns_constant_gpr
6228 except when TARGET_POWERPC64 and mode is DImode or
6229 wider, so the register mode must be DImode. */
6230 && rs6000_is_valid_and_mask (GEN_INT (low), DImode))
6231 insns = 2;
6232 total += insns;
6233 /* If BITS_PER_WORD is the number of bits in HOST_WIDE_INT, doing
6234 it all at once would be UB. */
6235 value >>= (BITS_PER_WORD - 1);
6236 value >>= 1;
6237 }
6238 return total;
6239 }
6240
6241 /* Return the number of instructions it takes to form a constant in as
6242 many gprs are needed for MODE. */
6243
6244 int
6245 num_insns_constant (rtx op, machine_mode mode)
6246 {
6247 HOST_WIDE_INT val;
6248
6249 switch (GET_CODE (op))
6250 {
6251 case CONST_INT:
6252 val = INTVAL (op);
6253 break;
6254
6255 case CONST_WIDE_INT:
6256 {
6257 int insns = 0;
6258 for (int i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
6259 insns += num_insns_constant_multi (CONST_WIDE_INT_ELT (op, i),
6260 DImode);
6261 return insns;
6262 }
6263
6264 case CONST_DOUBLE:
6265 {
6266 const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (op);
6267
6268 if (mode == SFmode || mode == SDmode)
6269 {
6270 long l;
6271
6272 if (mode == SDmode)
6273 REAL_VALUE_TO_TARGET_DECIMAL32 (*rv, l);
6274 else
6275 REAL_VALUE_TO_TARGET_SINGLE (*rv, l);
6276 /* See the first define_split in rs6000.md handling a
6277 const_double_operand. */
6278 val = l;
6279 mode = SImode;
6280 }
6281 else if (mode == DFmode || mode == DDmode)
6282 {
6283 long l[2];
6284
6285 if (mode == DDmode)
6286 REAL_VALUE_TO_TARGET_DECIMAL64 (*rv, l);
6287 else
6288 REAL_VALUE_TO_TARGET_DOUBLE (*rv, l);
6289
6290 /* See the second (32-bit) and third (64-bit) define_split
6291 in rs6000.md handling a const_double_operand. */
6292 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 1] << 32;
6293 val |= l[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffffUL;
6294 mode = DImode;
6295 }
6296 else if (mode == TFmode || mode == TDmode
6297 || mode == KFmode || mode == IFmode)
6298 {
6299 long l[4];
6300 int insns;
6301
6302 if (mode == TDmode)
6303 REAL_VALUE_TO_TARGET_DECIMAL128 (*rv, l);
6304 else
6305 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*rv, l);
6306
6307 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 3] << 32;
6308 val |= l[WORDS_BIG_ENDIAN ? 1 : 2] & 0xffffffffUL;
6309 insns = num_insns_constant_multi (val, DImode);
6310 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 2 : 1] << 32;
6311 val |= l[WORDS_BIG_ENDIAN ? 3 : 0] & 0xffffffffUL;
6312 insns += num_insns_constant_multi (val, DImode);
6313 return insns;
6314 }
6315 else
6316 gcc_unreachable ();
6317 }
6318 break;
6319
6320 default:
6321 gcc_unreachable ();
6322 }
6323
6324 return num_insns_constant_multi (val, mode);
6325 }
6326
6327 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
6328 If the mode of OP is MODE_VECTOR_INT, this simply returns the
6329 corresponding element of the vector, but for V4SFmode, the
6330 corresponding "float" is interpreted as an SImode integer. */
6331
6332 HOST_WIDE_INT
6333 const_vector_elt_as_int (rtx op, unsigned int elt)
6334 {
6335 rtx tmp;
6336
6337 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
6338 gcc_assert (GET_MODE (op) != V2DImode
6339 && GET_MODE (op) != V2DFmode);
6340
6341 tmp = CONST_VECTOR_ELT (op, elt);
6342 if (GET_MODE (op) == V4SFmode)
6343 tmp = gen_lowpart (SImode, tmp);
6344 return INTVAL (tmp);
6345 }
6346
6347 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
6348 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
6349 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
6350 all items are set to the same value and contain COPIES replicas of the
6351 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
6352 operand and the others are set to the value of the operand's msb. */
6353
6354 static bool
6355 vspltis_constant (rtx op, unsigned step, unsigned copies)
6356 {
6357 machine_mode mode = GET_MODE (op);
6358 machine_mode inner = GET_MODE_INNER (mode);
6359
6360 unsigned i;
6361 unsigned nunits;
6362 unsigned bitsize;
6363 unsigned mask;
6364
6365 HOST_WIDE_INT val;
6366 HOST_WIDE_INT splat_val;
6367 HOST_WIDE_INT msb_val;
6368
6369 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
6370 return false;
6371
6372 nunits = GET_MODE_NUNITS (mode);
6373 bitsize = GET_MODE_BITSIZE (inner);
6374 mask = GET_MODE_MASK (inner);
6375
6376 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6377 splat_val = val;
6378 msb_val = val >= 0 ? 0 : -1;
6379
6380 if (val == 0 && step > 1)
6381 {
6382 /* Special case for loading most significant bit with step > 1.
6383 In that case, match 0s in all but step-1s elements, where match
6384 EASY_VECTOR_MSB. */
6385 for (i = 1; i < nunits; ++i)
6386 {
6387 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
6388 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
6389 if ((i & (step - 1)) == step - 1)
6390 {
6391 if (!EASY_VECTOR_MSB (elt_val, inner))
6392 break;
6393 }
6394 else if (elt_val)
6395 break;
6396 }
6397 if (i == nunits)
6398 return true;
6399 }
6400
6401 /* Construct the value to be splatted, if possible. If not, return 0. */
6402 for (i = 2; i <= copies; i *= 2)
6403 {
6404 HOST_WIDE_INT small_val;
6405 bitsize /= 2;
6406 small_val = splat_val >> bitsize;
6407 mask >>= bitsize;
6408 if (splat_val != ((HOST_WIDE_INT)
6409 ((unsigned HOST_WIDE_INT) small_val << bitsize)
6410 | (small_val & mask)))
6411 return false;
6412 splat_val = small_val;
6413 inner = smallest_int_mode_for_size (bitsize);
6414 }
6415
6416 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
6417 if (EASY_VECTOR_15 (splat_val))
6418 ;
6419
6420 /* Also check if we can splat, and then add the result to itself. Do so if
6421 the value is positive, of if the splat instruction is using OP's mode;
6422 for splat_val < 0, the splat and the add should use the same mode. */
6423 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
6424 && (splat_val >= 0 || (step == 1 && copies == 1)))
6425 ;
6426
6427 /* Also check if are loading up the most significant bit which can be done by
6428 loading up -1 and shifting the value left by -1. Only do this for
6429 step 1 here, for larger steps it is done earlier. */
6430 else if (EASY_VECTOR_MSB (splat_val, inner) && step == 1)
6431 ;
6432
6433 else
6434 return false;
6435
6436 /* Check if VAL is present in every STEP-th element, and the
6437 other elements are filled with its most significant bit. */
6438 for (i = 1; i < nunits; ++i)
6439 {
6440 HOST_WIDE_INT desired_val;
6441 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
6442 if ((i & (step - 1)) == 0)
6443 desired_val = val;
6444 else
6445 desired_val = msb_val;
6446
6447 if (desired_val != const_vector_elt_as_int (op, elt))
6448 return false;
6449 }
6450
6451 return true;
6452 }
6453
6454 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
6455 instruction, filling in the bottom elements with 0 or -1.
6456
6457 Return 0 if the constant cannot be generated with VSLDOI. Return positive
6458 for the number of zeroes to shift in, or negative for the number of 0xff
6459 bytes to shift in.
6460
6461 OP is a CONST_VECTOR. */
6462
6463 int
6464 vspltis_shifted (rtx op)
6465 {
6466 machine_mode mode = GET_MODE (op);
6467 machine_mode inner = GET_MODE_INNER (mode);
6468
6469 unsigned i, j;
6470 unsigned nunits;
6471 unsigned mask;
6472
6473 HOST_WIDE_INT val;
6474
6475 if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
6476 return false;
6477
6478 /* We need to create pseudo registers to do the shift, so don't recognize
6479 shift vector constants after reload. Don't match it even before RA
6480 after split1 is done, because there won't be further splitting pass
6481 before RA to do the splitting. */
6482 if (!can_create_pseudo_p ()
6483 || (cfun->curr_properties & PROP_rtl_split_insns))
6484 return false;
6485
6486 nunits = GET_MODE_NUNITS (mode);
6487 mask = GET_MODE_MASK (inner);
6488
6489 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
6490
6491 /* Check if the value can really be the operand of a vspltis[bhw]. */
6492 if (EASY_VECTOR_15 (val))
6493 ;
6494
6495 /* Also check if we are loading up the most significant bit which can be done
6496 by loading up -1 and shifting the value left by -1. */
6497 else if (EASY_VECTOR_MSB (val, inner))
6498 ;
6499
6500 else
6501 return 0;
6502
6503 /* Check if VAL is present in every STEP-th element until we find elements
6504 that are 0 or all 1 bits. */
6505 for (i = 1; i < nunits; ++i)
6506 {
6507 unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
6508 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
6509
6510 /* If the value isn't the splat value, check for the remaining elements
6511 being 0/-1. */
6512 if (val != elt_val)
6513 {
6514 if (elt_val == 0)
6515 {
6516 for (j = i+1; j < nunits; ++j)
6517 {
6518 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6519 if (const_vector_elt_as_int (op, elt2) != 0)
6520 return 0;
6521 }
6522
6523 return (nunits - i) * GET_MODE_SIZE (inner);
6524 }
6525
6526 else if ((elt_val & mask) == mask)
6527 {
6528 for (j = i+1; j < nunits; ++j)
6529 {
6530 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6531 if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
6532 return 0;
6533 }
6534
6535 return -((nunits - i) * GET_MODE_SIZE (inner));
6536 }
6537
6538 else
6539 return 0;
6540 }
6541 }
6542
6543 /* If all elements are equal, we don't need to do VSLDOI. */
6544 return 0;
6545 }
6546
6547
6548 /* Return non-zero (element mode byte size) if OP is of the given MODE
6549 and can be synthesized with a vspltisb, vspltish or vspltisw. */
6550
6551 int
6552 easy_altivec_constant (rtx op, machine_mode mode)
6553 {
6554 unsigned step, copies;
6555
6556 if (mode == VOIDmode)
6557 mode = GET_MODE (op);
6558 else if (mode != GET_MODE (op))
6559 return 0;
6560
6561 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
6562 constants. */
6563 if (mode == V2DFmode)
6564 return zero_constant (op, mode) ? 8 : 0;
6565
6566 else if (mode == V2DImode)
6567 {
6568 if (!CONST_INT_P (CONST_VECTOR_ELT (op, 0))
6569 || !CONST_INT_P (CONST_VECTOR_ELT (op, 1)))
6570 return 0;
6571
6572 if (zero_constant (op, mode))
6573 return 8;
6574
6575 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
6576 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
6577 return 8;
6578
6579 return 0;
6580 }
6581
6582 /* V1TImode is a special container for TImode. Ignore for now. */
6583 else if (mode == V1TImode)
6584 return 0;
6585
6586 /* Start with a vspltisw. */
6587 step = GET_MODE_NUNITS (mode) / 4;
6588 copies = 1;
6589
6590 if (vspltis_constant (op, step, copies))
6591 return 4;
6592
6593 /* Then try with a vspltish. */
6594 if (step == 1)
6595 copies <<= 1;
6596 else
6597 step >>= 1;
6598
6599 if (vspltis_constant (op, step, copies))
6600 return 2;
6601
6602 /* And finally a vspltisb. */
6603 if (step == 1)
6604 copies <<= 1;
6605 else
6606 step >>= 1;
6607
6608 if (vspltis_constant (op, step, copies))
6609 return 1;
6610
6611 if (vspltis_shifted (op) != 0)
6612 return GET_MODE_SIZE (GET_MODE_INNER (mode));
6613
6614 return 0;
6615 }
6616
6617 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6618 result is OP. Abort if it is not possible. */
6619
6620 rtx
6621 gen_easy_altivec_constant (rtx op)
6622 {
6623 machine_mode mode = GET_MODE (op);
6624 int nunits = GET_MODE_NUNITS (mode);
6625 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6626 unsigned step = nunits / 4;
6627 unsigned copies = 1;
6628
6629 /* Start with a vspltisw. */
6630 if (vspltis_constant (op, step, copies))
6631 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
6632
6633 /* Then try with a vspltish. */
6634 if (step == 1)
6635 copies <<= 1;
6636 else
6637 step >>= 1;
6638
6639 if (vspltis_constant (op, step, copies))
6640 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
6641
6642 /* And finally a vspltisb. */
6643 if (step == 1)
6644 copies <<= 1;
6645 else
6646 step >>= 1;
6647
6648 if (vspltis_constant (op, step, copies))
6649 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
6650
6651 gcc_unreachable ();
6652 }
6653
6654 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6655 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6656
6657 Return the number of instructions needed (1 or 2) into the address pointed
6658 via NUM_INSNS_PTR.
6659
6660 Return the constant that is being split via CONSTANT_PTR. */
6661
6662 bool
6663 xxspltib_constant_p (rtx op,
6664 machine_mode mode,
6665 int *num_insns_ptr,
6666 int *constant_ptr)
6667 {
6668 size_t nunits = GET_MODE_NUNITS (mode);
6669 size_t i;
6670 HOST_WIDE_INT value;
6671 rtx element;
6672
6673 /* Set the returned values to out of bound values. */
6674 *num_insns_ptr = -1;
6675 *constant_ptr = 256;
6676
6677 if (!TARGET_P9_VECTOR)
6678 return false;
6679
6680 if (mode == VOIDmode)
6681 mode = GET_MODE (op);
6682
6683 else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode)
6684 return false;
6685
6686 /* Handle (vec_duplicate <constant>). */
6687 if (GET_CODE (op) == VEC_DUPLICATE)
6688 {
6689 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6690 && mode != V2DImode)
6691 return false;
6692
6693 element = XEXP (op, 0);
6694 if (!CONST_INT_P (element))
6695 return false;
6696
6697 value = INTVAL (element);
6698 if (!IN_RANGE (value, -128, 127))
6699 return false;
6700 }
6701
6702 /* Handle (const_vector [...]). */
6703 else if (GET_CODE (op) == CONST_VECTOR)
6704 {
6705 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6706 && mode != V2DImode)
6707 return false;
6708
6709 element = CONST_VECTOR_ELT (op, 0);
6710 if (!CONST_INT_P (element))
6711 return false;
6712
6713 value = INTVAL (element);
6714 if (!IN_RANGE (value, -128, 127))
6715 return false;
6716
6717 for (i = 1; i < nunits; i++)
6718 {
6719 element = CONST_VECTOR_ELT (op, i);
6720 if (!CONST_INT_P (element))
6721 return false;
6722
6723 if (value != INTVAL (element))
6724 return false;
6725 }
6726 }
6727
6728 /* Handle integer constants being loaded into the upper part of the VSX
6729 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6730 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6731 else if (CONST_INT_P (op))
6732 {
6733 if (!SCALAR_INT_MODE_P (mode))
6734 return false;
6735
6736 value = INTVAL (op);
6737 if (!IN_RANGE (value, -128, 127))
6738 return false;
6739
6740 if (!IN_RANGE (value, -1, 0))
6741 {
6742 if (!(reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID))
6743 return false;
6744
6745 if (EASY_VECTOR_15 (value))
6746 return false;
6747 }
6748 }
6749
6750 else
6751 return false;
6752
6753 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6754 sign extend. Special case 0/-1 to allow getting any VSX register instead
6755 of an Altivec register. */
6756 if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0)
6757 && EASY_VECTOR_15 (value))
6758 return false;
6759
6760 /* Return # of instructions and the constant byte for XXSPLTIB. */
6761 if (mode == V16QImode)
6762 *num_insns_ptr = 1;
6763
6764 else if (IN_RANGE (value, -1, 0))
6765 *num_insns_ptr = 1;
6766
6767 /* Do not generate XXSPLTIB and a sign extend operation if we can generate a
6768 single XXSPLTIW or XXSPLTIDP instruction. */
6769 else if (vsx_prefixed_constant (op, mode))
6770 return false;
6771
6772 /* Return XXSPLITB followed by a sign extend operation to convert the
6773 constant to V8HImode or V4SImode. */
6774 else
6775 *num_insns_ptr = 2;
6776
6777 *constant_ptr = (int) value;
6778 return true;
6779 }
6780
6781 const char *
6782 output_vec_const_move (rtx *operands)
6783 {
6784 int shift;
6785 machine_mode mode;
6786 rtx dest, vec;
6787
6788 dest = operands[0];
6789 vec = operands[1];
6790 mode = GET_MODE (dest);
6791
6792 if (TARGET_VSX)
6793 {
6794 bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest));
6795 int xxspltib_value = 256;
6796 int num_insns = -1;
6797
6798 if (zero_constant (vec, mode))
6799 {
6800 if (TARGET_P9_VECTOR)
6801 return "xxspltib %x0,0";
6802
6803 else if (dest_vmx_p)
6804 return "vspltisw %0,0";
6805
6806 else
6807 return "xxlxor %x0,%x0,%x0";
6808 }
6809
6810 if (all_ones_constant (vec, mode))
6811 {
6812 if (TARGET_P9_VECTOR)
6813 return "xxspltib %x0,255";
6814
6815 else if (dest_vmx_p)
6816 return "vspltisw %0,-1";
6817
6818 else if (TARGET_P8_VECTOR)
6819 return "xxlorc %x0,%x0,%x0";
6820
6821 else
6822 gcc_unreachable ();
6823 }
6824
6825 vec_const_128bit_type vsx_const;
6826 if (TARGET_POWER10 && vec_const_128bit_to_bytes (vec, mode, &vsx_const))
6827 {
6828 unsigned imm = constant_generates_lxvkq (&vsx_const);
6829 if (imm)
6830 {
6831 operands[2] = GEN_INT (imm);
6832 return "lxvkq %x0,%2";
6833 }
6834
6835 imm = constant_generates_xxspltiw (&vsx_const);
6836 if (imm)
6837 {
6838 operands[2] = GEN_INT (imm);
6839 return "xxspltiw %x0,%2";
6840 }
6841
6842 imm = constant_generates_xxspltidp (&vsx_const);
6843 if (imm)
6844 {
6845 operands[2] = GEN_INT (imm);
6846 return "xxspltidp %x0,%2";
6847 }
6848 }
6849
6850 if (TARGET_P9_VECTOR
6851 && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
6852 {
6853 if (num_insns == 1)
6854 {
6855 operands[2] = GEN_INT (xxspltib_value & 0xff);
6856 return "xxspltib %x0,%2";
6857 }
6858
6859 return "#";
6860 }
6861 }
6862
6863 if (TARGET_ALTIVEC)
6864 {
6865 rtx splat_vec;
6866
6867 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest)));
6868 if (zero_constant (vec, mode))
6869 return "vspltisw %0,0";
6870
6871 if (all_ones_constant (vec, mode))
6872 return "vspltisw %0,-1";
6873
6874 /* Do we need to construct a value using VSLDOI? */
6875 shift = vspltis_shifted (vec);
6876 if (shift != 0)
6877 return "#";
6878
6879 splat_vec = gen_easy_altivec_constant (vec);
6880 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
6881 operands[1] = XEXP (splat_vec, 0);
6882 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
6883 return "#";
6884
6885 switch (GET_MODE (splat_vec))
6886 {
6887 case E_V4SImode:
6888 return "vspltisw %0,%1";
6889
6890 case E_V8HImode:
6891 return "vspltish %0,%1";
6892
6893 case E_V16QImode:
6894 return "vspltisb %0,%1";
6895
6896 default:
6897 gcc_unreachable ();
6898 }
6899 }
6900
6901 gcc_unreachable ();
6902 }
6903
6904 /* Initialize vector TARGET to VALS. */
6905
6906 void
6907 rs6000_expand_vector_init (rtx target, rtx vals)
6908 {
6909 machine_mode mode = GET_MODE (target);
6910 machine_mode inner_mode = GET_MODE_INNER (mode);
6911 unsigned int n_elts = GET_MODE_NUNITS (mode);
6912 int n_var = 0, one_var = -1;
6913 bool all_same = true, all_const_zero = true;
6914 rtx x, mem;
6915 unsigned int i;
6916
6917 for (i = 0; i < n_elts; ++i)
6918 {
6919 x = XVECEXP (vals, 0, i);
6920 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6921 ++n_var, one_var = i;
6922 else if (x != CONST0_RTX (inner_mode))
6923 all_const_zero = false;
6924
6925 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6926 all_same = false;
6927 }
6928
6929 if (n_var == 0)
6930 {
6931 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6932 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
6933 if ((int_vector_p || TARGET_VSX) && all_const_zero)
6934 {
6935 /* Zero register. */
6936 emit_move_insn (target, CONST0_RTX (mode));
6937 return;
6938 }
6939 else if (int_vector_p && easy_vector_constant (const_vec, mode))
6940 {
6941 /* Splat immediate. */
6942 emit_insn (gen_rtx_SET (target, const_vec));
6943 return;
6944 }
6945 else
6946 {
6947 /* Load from constant pool. */
6948 emit_move_insn (target, const_vec);
6949 return;
6950 }
6951 }
6952
6953 /* Double word values on VSX can use xxpermdi or lxvdsx. */
6954 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
6955 {
6956 rtx op[2];
6957 size_t i;
6958 size_t num_elements = all_same ? 1 : 2;
6959 for (i = 0; i < num_elements; i++)
6960 {
6961 op[i] = XVECEXP (vals, 0, i);
6962 /* Just in case there is a SUBREG with a smaller mode, do a
6963 conversion. */
6964 if (GET_MODE (op[i]) != inner_mode)
6965 {
6966 rtx tmp = gen_reg_rtx (inner_mode);
6967 convert_move (tmp, op[i], 0);
6968 op[i] = tmp;
6969 }
6970 /* Allow load with splat double word. */
6971 else if (MEM_P (op[i]))
6972 {
6973 if (!all_same)
6974 op[i] = force_reg (inner_mode, op[i]);
6975 }
6976 else if (!REG_P (op[i]))
6977 op[i] = force_reg (inner_mode, op[i]);
6978 }
6979
6980 if (all_same)
6981 {
6982 if (mode == V2DFmode)
6983 emit_insn (gen_vsx_splat_v2df (target, op[0]));
6984 else
6985 emit_insn (gen_vsx_splat_v2di (target, op[0]));
6986 }
6987 else
6988 {
6989 if (mode == V2DFmode)
6990 emit_insn (gen_vsx_concat_v2df (target, op[0], op[1]));
6991 else
6992 emit_insn (gen_vsx_concat_v2di (target, op[0], op[1]));
6993 }
6994 return;
6995 }
6996
6997 /* Special case initializing vector int if we are on 64-bit systems with
6998 direct move or we have the ISA 3.0 instructions. */
6999 if (mode == V4SImode && VECTOR_MEM_VSX_P (V4SImode)
7000 && TARGET_DIRECT_MOVE_64BIT)
7001 {
7002 if (all_same)
7003 {
7004 rtx element0 = XVECEXP (vals, 0, 0);
7005 if (MEM_P (element0))
7006 element0 = rs6000_force_indexed_or_indirect_mem (element0);
7007 else
7008 element0 = force_reg (SImode, element0);
7009
7010 if (TARGET_P9_VECTOR)
7011 emit_insn (gen_vsx_splat_v4si (target, element0));
7012 else
7013 {
7014 rtx tmp = gen_reg_rtx (DImode);
7015 emit_insn (gen_zero_extendsidi2 (tmp, element0));
7016 emit_insn (gen_vsx_splat_v4si_di (target, tmp));
7017 }
7018 return;
7019 }
7020 else
7021 {
7022 rtx elements[4];
7023 size_t i;
7024
7025 for (i = 0; i < 4; i++)
7026 elements[i] = force_reg (SImode, XVECEXP (vals, 0, i));
7027
7028 emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1],
7029 elements[2], elements[3]));
7030 return;
7031 }
7032 }
7033
7034 /* With single precision floating point on VSX, know that internally single
7035 precision is actually represented as a double, and either make 2 V2DF
7036 vectors, and convert these vectors to single precision, or do one
7037 conversion, and splat the result to the other elements. */
7038 if (mode == V4SFmode && VECTOR_MEM_VSX_P (V4SFmode))
7039 {
7040 if (all_same)
7041 {
7042 rtx element0 = XVECEXP (vals, 0, 0);
7043
7044 if (TARGET_P9_VECTOR)
7045 {
7046 if (MEM_P (element0))
7047 element0 = rs6000_force_indexed_or_indirect_mem (element0);
7048
7049 emit_insn (gen_vsx_splat_v4sf (target, element0));
7050 }
7051
7052 else
7053 {
7054 rtx freg = gen_reg_rtx (V4SFmode);
7055 rtx sreg = force_reg (SFmode, element0);
7056 rtx cvt = (TARGET_XSCVDPSPN
7057 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
7058 : gen_vsx_xscvdpsp_scalar (freg, sreg));
7059
7060 emit_insn (cvt);
7061 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg,
7062 const0_rtx));
7063 }
7064 }
7065 else
7066 {
7067 if (TARGET_P8_VECTOR && TARGET_POWERPC64)
7068 {
7069 rtx tmp_sf[4];
7070 rtx tmp_si[4];
7071 rtx tmp_di[4];
7072 rtx mrg_di[4];
7073 for (i = 0; i < 4; i++)
7074 {
7075 tmp_si[i] = gen_reg_rtx (SImode);
7076 tmp_di[i] = gen_reg_rtx (DImode);
7077 mrg_di[i] = gen_reg_rtx (DImode);
7078 tmp_sf[i] = force_reg (SFmode, XVECEXP (vals, 0, i));
7079 emit_insn (gen_movsi_from_sf (tmp_si[i], tmp_sf[i]));
7080 emit_insn (gen_zero_extendsidi2 (tmp_di[i], tmp_si[i]));
7081 }
7082
7083 if (!BYTES_BIG_ENDIAN)
7084 {
7085 std::swap (tmp_di[0], tmp_di[1]);
7086 std::swap (tmp_di[2], tmp_di[3]);
7087 }
7088
7089 emit_insn (gen_ashldi3 (mrg_di[0], tmp_di[0], GEN_INT (32)));
7090 emit_insn (gen_iordi3 (mrg_di[1], mrg_di[0], tmp_di[1]));
7091 emit_insn (gen_ashldi3 (mrg_di[2], tmp_di[2], GEN_INT (32)));
7092 emit_insn (gen_iordi3 (mrg_di[3], mrg_di[2], tmp_di[3]));
7093
7094 rtx tmp_v2di = gen_reg_rtx (V2DImode);
7095 emit_insn (gen_vsx_concat_v2di (tmp_v2di, mrg_di[1], mrg_di[3]));
7096 emit_move_insn (target, gen_lowpart (V4SFmode, tmp_v2di));
7097 }
7098 else
7099 {
7100 rtx dbl_even = gen_reg_rtx (V2DFmode);
7101 rtx dbl_odd = gen_reg_rtx (V2DFmode);
7102 rtx flt_even = gen_reg_rtx (V4SFmode);
7103 rtx flt_odd = gen_reg_rtx (V4SFmode);
7104 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
7105 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
7106 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
7107 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
7108
7109 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
7110 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
7111 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
7112 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
7113 rs6000_expand_extract_even (target, flt_even, flt_odd);
7114 }
7115 }
7116 return;
7117 }
7118
7119 /* Special case initializing vector short/char that are splats if we are on
7120 64-bit systems with direct move. */
7121 if (all_same && TARGET_DIRECT_MOVE_64BIT
7122 && (mode == V16QImode || mode == V8HImode))
7123 {
7124 rtx op0 = XVECEXP (vals, 0, 0);
7125 rtx di_tmp = gen_reg_rtx (DImode);
7126
7127 if (!REG_P (op0))
7128 op0 = force_reg (GET_MODE_INNER (mode), op0);
7129
7130 if (mode == V16QImode)
7131 {
7132 emit_insn (gen_zero_extendqidi2 (di_tmp, op0));
7133 emit_insn (gen_vsx_vspltb_di (target, di_tmp));
7134 return;
7135 }
7136
7137 if (mode == V8HImode)
7138 {
7139 emit_insn (gen_zero_extendhidi2 (di_tmp, op0));
7140 emit_insn (gen_vsx_vsplth_di (target, di_tmp));
7141 return;
7142 }
7143 }
7144
7145 /* Store value to stack temp. Load vector element. Splat. However, splat
7146 of 64-bit items is not supported on Altivec. */
7147 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
7148 {
7149 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
7150 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
7151 XVECEXP (vals, 0, 0));
7152 x = gen_rtx_UNSPEC (VOIDmode,
7153 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
7154 emit_insn (gen_rtx_PARALLEL (VOIDmode,
7155 gen_rtvec (2,
7156 gen_rtx_SET (target, mem),
7157 x)));
7158 x = gen_rtx_VEC_SELECT (inner_mode, target,
7159 gen_rtx_PARALLEL (VOIDmode,
7160 gen_rtvec (1, const0_rtx)));
7161 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
7162 return;
7163 }
7164
7165 /* One field is non-constant. Load constant then overwrite
7166 varying field. */
7167 if (n_var == 1)
7168 {
7169 rtx copy = copy_rtx (vals);
7170
7171 /* Load constant part of vector, substitute neighboring value for
7172 varying element. */
7173 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
7174 rs6000_expand_vector_init (target, copy);
7175
7176 /* Insert variable. */
7177 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var),
7178 GEN_INT (one_var));
7179 return;
7180 }
7181
7182 if (TARGET_DIRECT_MOVE && (mode == V16QImode || mode == V8HImode))
7183 {
7184 rtx op[16];
7185 /* Force the values into word_mode registers. */
7186 for (i = 0; i < n_elts; i++)
7187 {
7188 rtx tmp = force_reg (inner_mode, XVECEXP (vals, 0, i));
7189 machine_mode tmode = TARGET_POWERPC64 ? DImode : SImode;
7190 op[i] = simplify_gen_subreg (tmode, tmp, inner_mode, 0);
7191 }
7192
7193 /* Take unsigned char big endianness on 64bit as example for below
7194 construction, the input values are: A, B, C, D, ..., O, P. */
7195
7196 if (TARGET_DIRECT_MOVE_128)
7197 {
7198 /* Move to VSX register with vec_concat, each has 2 values.
7199 eg: vr1[0] = { xxxxxxxA, xxxxxxxB };
7200 vr1[1] = { xxxxxxxC, xxxxxxxD };
7201 ...
7202 vr1[7] = { xxxxxxxO, xxxxxxxP }; */
7203 rtx vr1[8];
7204 for (i = 0; i < n_elts / 2; i++)
7205 {
7206 vr1[i] = gen_reg_rtx (V2DImode);
7207 emit_insn (gen_vsx_concat_v2di (vr1[i], op[i * 2],
7208 op[i * 2 + 1]));
7209 }
7210
7211 /* Pack vectors with 2 values into vectors with 4 values.
7212 eg: vr2[0] = { xxxAxxxB, xxxCxxxD };
7213 vr2[1] = { xxxExxxF, xxxGxxxH };
7214 vr2[1] = { xxxIxxxJ, xxxKxxxL };
7215 vr2[3] = { xxxMxxxN, xxxOxxxP }; */
7216 rtx vr2[4];
7217 for (i = 0; i < n_elts / 4; i++)
7218 {
7219 vr2[i] = gen_reg_rtx (V4SImode);
7220 emit_insn (gen_altivec_vpkudum (vr2[i], vr1[i * 2],
7221 vr1[i * 2 + 1]));
7222 }
7223
7224 /* Pack vectors with 4 values into vectors with 8 values.
7225 eg: vr3[0] = { xAxBxCxD, xExFxGxH };
7226 vr3[1] = { xIxJxKxL, xMxNxOxP }; */
7227 rtx vr3[2];
7228 for (i = 0; i < n_elts / 8; i++)
7229 {
7230 vr3[i] = gen_reg_rtx (V8HImode);
7231 emit_insn (gen_altivec_vpkuwum (vr3[i], vr2[i * 2],
7232 vr2[i * 2 + 1]));
7233 }
7234
7235 /* If it's V8HImode, it's done and return it. */
7236 if (mode == V8HImode)
7237 {
7238 emit_insn (gen_rtx_SET (target, vr3[0]));
7239 return;
7240 }
7241
7242 /* Pack vectors with 8 values into 16 values. */
7243 rtx res = gen_reg_rtx (V16QImode);
7244 emit_insn (gen_altivec_vpkuhum (res, vr3[0], vr3[1]));
7245 emit_insn (gen_rtx_SET (target, res));
7246 }
7247 else
7248 {
7249 rtx (*merge_v16qi) (rtx, rtx, rtx) = NULL;
7250 rtx (*merge_v8hi) (rtx, rtx, rtx) = NULL;
7251 rtx (*merge_v4si) (rtx, rtx, rtx) = NULL;
7252 rtx perm_idx;
7253
7254 /* Set up some common gen routines and values. */
7255 if (BYTES_BIG_ENDIAN)
7256 {
7257 if (mode == V16QImode)
7258 {
7259 merge_v16qi = gen_altivec_vmrghb;
7260 merge_v8hi = gen_altivec_vmrglh;
7261 }
7262 else
7263 merge_v8hi = gen_altivec_vmrghh;
7264
7265 merge_v4si = gen_altivec_vmrglw;
7266 perm_idx = GEN_INT (3);
7267 }
7268 else
7269 {
7270 if (mode == V16QImode)
7271 {
7272 merge_v16qi = gen_altivec_vmrglb;
7273 merge_v8hi = gen_altivec_vmrghh;
7274 }
7275 else
7276 merge_v8hi = gen_altivec_vmrglh;
7277
7278 merge_v4si = gen_altivec_vmrghw;
7279 perm_idx = GEN_INT (0);
7280 }
7281
7282 /* Move to VSX register with direct move.
7283 eg: vr_qi[0] = { xxxxxxxA, xxxxxxxx };
7284 vr_qi[1] = { xxxxxxxB, xxxxxxxx };
7285 ...
7286 vr_qi[15] = { xxxxxxxP, xxxxxxxx }; */
7287 rtx vr_qi[16];
7288 for (i = 0; i < n_elts; i++)
7289 {
7290 vr_qi[i] = gen_reg_rtx (V16QImode);
7291 if (TARGET_POWERPC64)
7292 emit_insn (gen_p8_mtvsrd_v16qidi2 (vr_qi[i], op[i]));
7293 else
7294 emit_insn (gen_p8_mtvsrwz_v16qisi2 (vr_qi[i], op[i]));
7295 }
7296
7297 /* Merge/move to vector short.
7298 eg: vr_hi[0] = { xxxxxxxx, xxxxxxAB };
7299 vr_hi[1] = { xxxxxxxx, xxxxxxCD };
7300 ...
7301 vr_hi[7] = { xxxxxxxx, xxxxxxOP }; */
7302 rtx vr_hi[8];
7303 for (i = 0; i < 8; i++)
7304 {
7305 rtx tmp = vr_qi[i];
7306 if (mode == V16QImode)
7307 {
7308 tmp = gen_reg_rtx (V16QImode);
7309 emit_insn (merge_v16qi (tmp, vr_qi[2 * i], vr_qi[2 * i + 1]));
7310 }
7311 vr_hi[i] = gen_reg_rtx (V8HImode);
7312 emit_move_insn (vr_hi[i], gen_lowpart (V8HImode, tmp));
7313 }
7314
7315 /* Merge vector short to vector int.
7316 eg: vr_si[0] = { xxxxxxxx, xxxxABCD };
7317 vr_si[1] = { xxxxxxxx, xxxxEFGH };
7318 ...
7319 vr_si[3] = { xxxxxxxx, xxxxMNOP }; */
7320 rtx vr_si[4];
7321 for (i = 0; i < 4; i++)
7322 {
7323 rtx tmp = gen_reg_rtx (V8HImode);
7324 emit_insn (merge_v8hi (tmp, vr_hi[2 * i], vr_hi[2 * i + 1]));
7325 vr_si[i] = gen_reg_rtx (V4SImode);
7326 emit_move_insn (vr_si[i], gen_lowpart (V4SImode, tmp));
7327 }
7328
7329 /* Merge vector int to vector long.
7330 eg: vr_di[0] = { xxxxxxxx, ABCDEFGH };
7331 vr_di[1] = { xxxxxxxx, IJKLMNOP }; */
7332 rtx vr_di[2];
7333 for (i = 0; i < 2; i++)
7334 {
7335 rtx tmp = gen_reg_rtx (V4SImode);
7336 emit_insn (merge_v4si (tmp, vr_si[2 * i], vr_si[2 * i + 1]));
7337 vr_di[i] = gen_reg_rtx (V2DImode);
7338 emit_move_insn (vr_di[i], gen_lowpart (V2DImode, tmp));
7339 }
7340
7341 rtx res = gen_reg_rtx (V2DImode);
7342 emit_insn (gen_vsx_xxpermdi_v2di (res, vr_di[0], vr_di[1], perm_idx));
7343 emit_insn (gen_rtx_SET (target, gen_lowpart (mode, res)));
7344 }
7345
7346 return;
7347 }
7348
7349 /* Construct the vector in memory one field at a time
7350 and load the whole vector. */
7351 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7352 for (i = 0; i < n_elts; i++)
7353 emit_move_insn (adjust_address_nv (mem, inner_mode,
7354 i * GET_MODE_SIZE (inner_mode)),
7355 XVECEXP (vals, 0, i));
7356 emit_move_insn (target, mem);
7357 }
7358
7359 /* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX
7360 is variable and also counts by vector element size for p9 and above. */
7361
7362 static void
7363 rs6000_expand_vector_set_var_p9 (rtx target, rtx val, rtx idx)
7364 {
7365 machine_mode mode = GET_MODE (target);
7366
7367 gcc_assert (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (idx));
7368
7369 machine_mode inner_mode = GET_MODE (val);
7370
7371 int width = GET_MODE_SIZE (inner_mode);
7372
7373 gcc_assert (width >= 1 && width <= 8);
7374
7375 int shift = exact_log2 (width);
7376
7377 machine_mode idx_mode = GET_MODE (idx);
7378
7379 machine_mode shift_mode;
7380 rtx (*gen_ashl)(rtx, rtx, rtx);
7381 rtx (*gen_lvsl)(rtx, rtx);
7382 rtx (*gen_lvsr)(rtx, rtx);
7383
7384 if (TARGET_POWERPC64)
7385 {
7386 shift_mode = DImode;
7387 gen_ashl = gen_ashldi3;
7388 gen_lvsl = gen_altivec_lvsl_reg_di;
7389 gen_lvsr = gen_altivec_lvsr_reg_di;
7390 }
7391 else
7392 {
7393 shift_mode = SImode;
7394 gen_ashl = gen_ashlsi3;
7395 gen_lvsl = gen_altivec_lvsl_reg_si;
7396 gen_lvsr = gen_altivec_lvsr_reg_si;
7397 }
7398 /* Generate the IDX for permute shift, width is the vector element size.
7399 idx = idx * width. */
7400 rtx tmp = gen_reg_rtx (shift_mode);
7401 idx = convert_modes (shift_mode, idx_mode, idx, 1);
7402
7403 emit_insn (gen_ashl (tmp, idx, GEN_INT (shift)));
7404
7405 /* lvsr v1,0,idx. */
7406 rtx pcvr = gen_reg_rtx (V16QImode);
7407 emit_insn (gen_lvsr (pcvr, tmp));
7408
7409 /* lvsl v2,0,idx. */
7410 rtx pcvl = gen_reg_rtx (V16QImode);
7411 emit_insn (gen_lvsl (pcvl, tmp));
7412
7413 rtx sub_target = simplify_gen_subreg (V16QImode, target, mode, 0);
7414
7415 rtx permr
7416 = gen_altivec_vperm_v8hiv16qi (sub_target, sub_target, sub_target, pcvr);
7417 emit_insn (permr);
7418
7419 rs6000_expand_vector_set (target, val, const0_rtx);
7420
7421 rtx perml
7422 = gen_altivec_vperm_v8hiv16qi (sub_target, sub_target, sub_target, pcvl);
7423 emit_insn (perml);
7424 }
7425
7426 /* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX
7427 is variable and also counts by vector element size for p7 & p8. */
7428
7429 static void
7430 rs6000_expand_vector_set_var_p7 (rtx target, rtx val, rtx idx)
7431 {
7432 machine_mode mode = GET_MODE (target);
7433
7434 gcc_assert (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (idx));
7435
7436 machine_mode inner_mode = GET_MODE (val);
7437 HOST_WIDE_INT mode_mask = GET_MODE_MASK (inner_mode);
7438
7439 int width = GET_MODE_SIZE (inner_mode);
7440 gcc_assert (width >= 1 && width <= 4);
7441
7442 int shift = exact_log2 (width);
7443
7444 machine_mode idx_mode = GET_MODE (idx);
7445
7446 machine_mode shift_mode;
7447 rtx (*gen_ashl)(rtx, rtx, rtx);
7448 rtx (*gen_add)(rtx, rtx, rtx);
7449 rtx (*gen_sub)(rtx, rtx, rtx);
7450 rtx (*gen_lvsl)(rtx, rtx);
7451
7452 if (TARGET_POWERPC64)
7453 {
7454 shift_mode = DImode;
7455 gen_ashl = gen_ashldi3;
7456 gen_add = gen_adddi3;
7457 gen_sub = gen_subdi3;
7458 gen_lvsl = gen_altivec_lvsl_reg_di;
7459 }
7460 else
7461 {
7462 shift_mode = SImode;
7463 gen_ashl = gen_ashlsi3;
7464 gen_add = gen_addsi3;
7465 gen_sub = gen_subsi3;
7466 gen_lvsl = gen_altivec_lvsl_reg_si;
7467 }
7468
7469 /* idx = idx * width. */
7470 rtx tmp = gen_reg_rtx (shift_mode);
7471 idx = convert_modes (shift_mode, idx_mode, idx, 1);
7472
7473 emit_insn (gen_ashl (tmp, idx, GEN_INT (shift)));
7474
7475 /* For LE: idx = idx + 8. */
7476 if (!BYTES_BIG_ENDIAN)
7477 emit_insn (gen_add (tmp, tmp, GEN_INT (8)));
7478 else
7479 emit_insn (gen_sub (tmp, GEN_INT (24 - width), tmp));
7480
7481 /* lxv vs33, mask.
7482 DImode: 0xffffffffffffffff0000000000000000
7483 SImode: 0x00000000ffffffff0000000000000000
7484 HImode: 0x000000000000ffff0000000000000000.
7485 QImode: 0x00000000000000ff0000000000000000. */
7486 rtx mask = gen_reg_rtx (V16QImode);
7487 rtx mask_v2di = gen_reg_rtx (V2DImode);
7488 rtvec v = rtvec_alloc (2);
7489 if (!BYTES_BIG_ENDIAN)
7490 {
7491 RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, 0);
7492 RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, mode_mask);
7493 }
7494 else
7495 {
7496 RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, mode_mask);
7497 RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, 0);
7498 }
7499 emit_insn (gen_vec_initv2didi (mask_v2di, gen_rtx_PARALLEL (V2DImode, v)));
7500 rtx sub_mask = simplify_gen_subreg (V16QImode, mask_v2di, V2DImode, 0);
7501 emit_insn (gen_rtx_SET (mask, sub_mask));
7502
7503 /* mtvsrd[wz] f0,tmp_val. */
7504 rtx tmp_val = gen_reg_rtx (SImode);
7505 if (inner_mode == E_SFmode)
7506 if (TARGET_DIRECT_MOVE_64BIT)
7507 emit_insn (gen_movsi_from_sf (tmp_val, val));
7508 else
7509 {
7510 rtx stack = rs6000_allocate_stack_temp (SFmode, false, true);
7511 emit_insn (gen_movsf_hardfloat (stack, val));
7512 rtx stack2 = copy_rtx (stack);
7513 PUT_MODE (stack2, SImode);
7514 emit_move_insn (tmp_val, stack2);
7515 }
7516 else
7517 tmp_val = force_reg (SImode, val);
7518
7519 rtx val_v16qi = gen_reg_rtx (V16QImode);
7520 rtx val_v2di = gen_reg_rtx (V2DImode);
7521 rtvec vec_val = rtvec_alloc (2);
7522 if (!BYTES_BIG_ENDIAN)
7523 {
7524 RTVEC_ELT (vec_val, 0) = gen_rtx_CONST_INT (DImode, 0);
7525 RTVEC_ELT (vec_val, 1) = tmp_val;
7526 }
7527 else
7528 {
7529 RTVEC_ELT (vec_val, 0) = tmp_val;
7530 RTVEC_ELT (vec_val, 1) = gen_rtx_CONST_INT (DImode, 0);
7531 }
7532 emit_insn (
7533 gen_vec_initv2didi (val_v2di, gen_rtx_PARALLEL (V2DImode, vec_val)));
7534 rtx sub_val = simplify_gen_subreg (V16QImode, val_v2di, V2DImode, 0);
7535 emit_insn (gen_rtx_SET (val_v16qi, sub_val));
7536
7537 /* lvsl 13,0,idx. */
7538 rtx pcv = gen_reg_rtx (V16QImode);
7539 emit_insn (gen_lvsl (pcv, tmp));
7540
7541 /* vperm 1,1,1,13. */
7542 /* vperm 0,0,0,13. */
7543 rtx val_perm = gen_reg_rtx (V16QImode);
7544 rtx mask_perm = gen_reg_rtx (V16QImode);
7545 emit_insn (gen_altivec_vperm_v8hiv16qi (val_perm, val_v16qi, val_v16qi, pcv));
7546 emit_insn (gen_altivec_vperm_v8hiv16qi (mask_perm, mask, mask, pcv));
7547
7548 rtx target_v16qi = simplify_gen_subreg (V16QImode, target, mode, 0);
7549
7550 /* xxsel 34,34,32,33. */
7551 emit_insn (
7552 gen_vector_select_v16qi (target_v16qi, target_v16qi, val_perm, mask_perm));
7553 }
7554
7555 /* Set field ELT_RTX of TARGET to VAL. */
7556
7557 void
7558 rs6000_expand_vector_set (rtx target, rtx val, rtx elt_rtx)
7559 {
7560 machine_mode mode = GET_MODE (target);
7561 machine_mode inner_mode = GET_MODE_INNER (mode);
7562 rtx reg = gen_reg_rtx (mode);
7563 rtx mask, mem, x;
7564 int width = GET_MODE_SIZE (inner_mode);
7565 int i;
7566
7567 val = force_reg (GET_MODE (val), val);
7568
7569 if (VECTOR_MEM_VSX_P (mode))
7570 {
7571 if (!CONST_INT_P (elt_rtx))
7572 {
7573 /* For V2DI/V2DF, could leverage the P9 version to generate xxpermdi
7574 when elt_rtx is variable. */
7575 if ((TARGET_P9_VECTOR && TARGET_POWERPC64) || width == 8)
7576 {
7577 rs6000_expand_vector_set_var_p9 (target, val, elt_rtx);
7578 return;
7579 }
7580 else if (TARGET_VSX)
7581 {
7582 rs6000_expand_vector_set_var_p7 (target, val, elt_rtx);
7583 return;
7584 }
7585 else
7586 gcc_assert (CONST_INT_P (elt_rtx));
7587 }
7588
7589 rtx insn = NULL_RTX;
7590
7591 if (mode == V2DFmode)
7592 insn = gen_vsx_set_v2df (target, target, val, elt_rtx);
7593
7594 else if (mode == V2DImode)
7595 insn = gen_vsx_set_v2di (target, target, val, elt_rtx);
7596
7597 else if (TARGET_P9_VECTOR && TARGET_POWERPC64)
7598 {
7599 if (mode == V4SImode)
7600 insn = gen_vsx_set_v4si_p9 (target, target, val, elt_rtx);
7601 else if (mode == V8HImode)
7602 insn = gen_vsx_set_v8hi_p9 (target, target, val, elt_rtx);
7603 else if (mode == V16QImode)
7604 insn = gen_vsx_set_v16qi_p9 (target, target, val, elt_rtx);
7605 else if (mode == V4SFmode)
7606 insn = gen_vsx_set_v4sf_p9 (target, target, val, elt_rtx);
7607 }
7608
7609 if (insn)
7610 {
7611 emit_insn (insn);
7612 return;
7613 }
7614 }
7615
7616 /* Simplify setting single element vectors like V1TImode. */
7617 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode)
7618 && INTVAL (elt_rtx) == 0)
7619 {
7620 emit_move_insn (target, gen_lowpart (mode, val));
7621 return;
7622 }
7623
7624 /* Load single variable value. */
7625 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
7626 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
7627 x = gen_rtx_UNSPEC (VOIDmode,
7628 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
7629 emit_insn (gen_rtx_PARALLEL (VOIDmode,
7630 gen_rtvec (2,
7631 gen_rtx_SET (reg, mem),
7632 x)));
7633
7634 /* Linear sequence. */
7635 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
7636 for (i = 0; i < 16; ++i)
7637 XVECEXP (mask, 0, i) = GEN_INT (i);
7638
7639 /* Set permute mask to insert element into target. */
7640 for (i = 0; i < width; ++i)
7641 XVECEXP (mask, 0, INTVAL (elt_rtx) * width + i) = GEN_INT (i + 0x10);
7642 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
7643
7644 if (BYTES_BIG_ENDIAN)
7645 x = gen_rtx_UNSPEC (mode,
7646 gen_rtvec (3, target, reg,
7647 force_reg (V16QImode, x)),
7648 UNSPEC_VPERM);
7649 else
7650 {
7651 if (TARGET_P9_VECTOR)
7652 x = gen_rtx_UNSPEC (mode,
7653 gen_rtvec (3, reg, target,
7654 force_reg (V16QImode, x)),
7655 UNSPEC_VPERMR);
7656 else
7657 {
7658 /* Invert selector. We prefer to generate VNAND on P8 so
7659 that future fusion opportunities can kick in, but must
7660 generate VNOR elsewhere. */
7661 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
7662 rtx iorx = (TARGET_P8_VECTOR
7663 ? gen_rtx_IOR (V16QImode, notx, notx)
7664 : gen_rtx_AND (V16QImode, notx, notx));
7665 rtx tmp = gen_reg_rtx (V16QImode);
7666 emit_insn (gen_rtx_SET (tmp, iorx));
7667
7668 /* Permute with operands reversed and adjusted selector. */
7669 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
7670 UNSPEC_VPERM);
7671 }
7672 }
7673
7674 emit_insn (gen_rtx_SET (target, x));
7675 }
7676
7677 /* Extract field ELT from VEC into TARGET. */
7678
7679 void
7680 rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
7681 {
7682 machine_mode mode = GET_MODE (vec);
7683 machine_mode inner_mode = GET_MODE_INNER (mode);
7684 rtx mem;
7685
7686 if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (elt))
7687 {
7688 switch (mode)
7689 {
7690 default:
7691 break;
7692 case E_V1TImode:
7693 emit_move_insn (target, gen_lowpart (TImode, vec));
7694 break;
7695 case E_V2DFmode:
7696 emit_insn (gen_vsx_extract_v2df (target, vec, elt));
7697 return;
7698 case E_V2DImode:
7699 emit_insn (gen_vsx_extract_v2di (target, vec, elt));
7700 return;
7701 case E_V4SFmode:
7702 emit_insn (gen_vsx_extract_v4sf (target, vec, elt));
7703 return;
7704 case E_V16QImode:
7705 if (TARGET_DIRECT_MOVE_64BIT)
7706 {
7707 emit_insn (gen_vsx_extract_v16qi (target, vec, elt));
7708 return;
7709 }
7710 else
7711 break;
7712 case E_V8HImode:
7713 if (TARGET_DIRECT_MOVE_64BIT)
7714 {
7715 emit_insn (gen_vsx_extract_v8hi (target, vec, elt));
7716 return;
7717 }
7718 else
7719 break;
7720 case E_V4SImode:
7721 if (TARGET_DIRECT_MOVE_64BIT)
7722 {
7723 emit_insn (gen_vsx_extract_v4si (target, vec, elt));
7724 return;
7725 }
7726 break;
7727 }
7728 }
7729 else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt)
7730 && TARGET_DIRECT_MOVE_64BIT)
7731 {
7732 if (GET_MODE (elt) != DImode)
7733 {
7734 rtx tmp = gen_reg_rtx (DImode);
7735 convert_move (tmp, elt, 0);
7736 elt = tmp;
7737 }
7738 else if (!REG_P (elt))
7739 elt = force_reg (DImode, elt);
7740
7741 switch (mode)
7742 {
7743 case E_V1TImode:
7744 emit_move_insn (target, gen_lowpart (TImode, vec));
7745 return;
7746
7747 case E_V2DFmode:
7748 emit_insn (gen_vsx_extract_v2df_var (target, vec, elt));
7749 return;
7750
7751 case E_V2DImode:
7752 emit_insn (gen_vsx_extract_v2di_var (target, vec, elt));
7753 return;
7754
7755 case E_V4SFmode:
7756 emit_insn (gen_vsx_extract_v4sf_var (target, vec, elt));
7757 return;
7758
7759 case E_V4SImode:
7760 emit_insn (gen_vsx_extract_v4si_var (target, vec, elt));
7761 return;
7762
7763 case E_V8HImode:
7764 emit_insn (gen_vsx_extract_v8hi_var (target, vec, elt));
7765 return;
7766
7767 case E_V16QImode:
7768 emit_insn (gen_vsx_extract_v16qi_var (target, vec, elt));
7769 return;
7770
7771 default:
7772 gcc_unreachable ();
7773 }
7774 }
7775
7776 /* Allocate mode-sized buffer. */
7777 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7778
7779 emit_move_insn (mem, vec);
7780 if (CONST_INT_P (elt))
7781 {
7782 int modulo_elt = INTVAL (elt) % GET_MODE_NUNITS (mode);
7783
7784 /* Add offset to field within buffer matching vector element. */
7785 mem = adjust_address_nv (mem, inner_mode,
7786 modulo_elt * GET_MODE_SIZE (inner_mode));
7787 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
7788 }
7789 else
7790 {
7791 unsigned int ele_size = GET_MODE_SIZE (inner_mode);
7792 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1);
7793 rtx new_addr = gen_reg_rtx (Pmode);
7794
7795 elt = gen_rtx_AND (Pmode, elt, num_ele_m1);
7796 if (ele_size > 1)
7797 elt = gen_rtx_MULT (Pmode, elt, GEN_INT (ele_size));
7798 new_addr = gen_rtx_PLUS (Pmode, XEXP (mem, 0), elt);
7799 new_addr = change_address (mem, inner_mode, new_addr);
7800 emit_move_insn (target, new_addr);
7801 }
7802 }
7803
7804 /* Return the offset within a memory object (MEM) of a vector type to a given
7805 element within the vector (ELEMENT) with an element size (SCALAR_SIZE). If
7806 the element is constant, we return a constant integer.
7807
7808 Otherwise, we use a base register temporary to calculate the offset after
7809 masking it to fit within the bounds of the vector and scaling it. The
7810 masking is required by the 64-bit ELF version 2 ABI for the vec_extract
7811 built-in function. */
7812
7813 static rtx
7814 get_vector_offset (rtx mem, rtx element, rtx base_tmp, unsigned scalar_size)
7815 {
7816 if (CONST_INT_P (element))
7817 return GEN_INT (INTVAL (element) * scalar_size);
7818
7819 /* All insns should use the 'Q' constraint (address is a single register) if
7820 the element number is not a constant. */
7821 gcc_assert (satisfies_constraint_Q (mem));
7822
7823 /* Mask the element to make sure the element number is between 0 and the
7824 maximum number of elements - 1 so that we don't generate an address
7825 outside the vector. */
7826 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (GET_MODE (mem)) - 1);
7827 rtx and_op = gen_rtx_AND (Pmode, element, num_ele_m1);
7828 emit_insn (gen_rtx_SET (base_tmp, and_op));
7829
7830 /* Shift the element to get the byte offset from the element number. */
7831 int shift = exact_log2 (scalar_size);
7832 gcc_assert (shift >= 0);
7833
7834 if (shift > 0)
7835 {
7836 rtx shift_op = gen_rtx_ASHIFT (Pmode, base_tmp, GEN_INT (shift));
7837 emit_insn (gen_rtx_SET (base_tmp, shift_op));
7838 }
7839
7840 return base_tmp;
7841 }
7842
7843 /* Helper function update PC-relative addresses when we are adjusting a memory
7844 address (ADDR) to a vector to point to a scalar field within the vector with
7845 a constant offset (ELEMENT_OFFSET). If the address is not valid, we can
7846 use the base register temporary (BASE_TMP) to form the address. */
7847
7848 static rtx
7849 adjust_vec_address_pcrel (rtx addr, rtx element_offset, rtx base_tmp)
7850 {
7851 rtx new_addr = NULL;
7852
7853 gcc_assert (CONST_INT_P (element_offset));
7854
7855 if (GET_CODE (addr) == CONST)
7856 addr = XEXP (addr, 0);
7857
7858 if (GET_CODE (addr) == PLUS)
7859 {
7860 rtx op0 = XEXP (addr, 0);
7861 rtx op1 = XEXP (addr, 1);
7862
7863 if (CONST_INT_P (op1))
7864 {
7865 HOST_WIDE_INT offset
7866 = INTVAL (XEXP (addr, 1)) + INTVAL (element_offset);
7867
7868 if (offset == 0)
7869 new_addr = op0;
7870
7871 else
7872 {
7873 rtx plus = gen_rtx_PLUS (Pmode, op0, GEN_INT (offset));
7874 new_addr = gen_rtx_CONST (Pmode, plus);
7875 }
7876 }
7877
7878 else
7879 {
7880 emit_move_insn (base_tmp, addr);
7881 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7882 }
7883 }
7884
7885 else if (SYMBOL_REF_P (addr) || LABEL_REF_P (addr))
7886 {
7887 rtx plus = gen_rtx_PLUS (Pmode, addr, element_offset);
7888 new_addr = gen_rtx_CONST (Pmode, plus);
7889 }
7890
7891 else
7892 gcc_unreachable ();
7893
7894 return new_addr;
7895 }
7896
7897 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
7898 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
7899 temporary (BASE_TMP) to fixup the address. Return the new memory address
7900 that is valid for reads or writes to a given register (SCALAR_REG).
7901
7902 This function is expected to be called after reload is completed when we are
7903 splitting insns. The temporary BASE_TMP might be set multiple times with
7904 this code. */
7905
7906 rtx
7907 rs6000_adjust_vec_address (rtx scalar_reg,
7908 rtx mem,
7909 rtx element,
7910 rtx base_tmp,
7911 machine_mode scalar_mode)
7912 {
7913 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7914 rtx addr = XEXP (mem, 0);
7915 rtx new_addr;
7916
7917 gcc_assert (!reg_mentioned_p (base_tmp, addr));
7918 gcc_assert (!reg_mentioned_p (base_tmp, element));
7919
7920 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
7921 gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
7922
7923 /* Calculate what we need to add to the address to get the element
7924 address. */
7925 rtx element_offset = get_vector_offset (mem, element, base_tmp, scalar_size);
7926
7927 /* Create the new address pointing to the element within the vector. If we
7928 are adding 0, we don't have to change the address. */
7929 if (element_offset == const0_rtx)
7930 new_addr = addr;
7931
7932 /* A simple indirect address can be converted into a reg + offset
7933 address. */
7934 else if (REG_P (addr) || SUBREG_P (addr))
7935 new_addr = gen_rtx_PLUS (Pmode, addr, element_offset);
7936
7937 /* For references to local static variables, fold a constant offset into the
7938 address. */
7939 else if (pcrel_local_address (addr, Pmode) && CONST_INT_P (element_offset))
7940 new_addr = adjust_vec_address_pcrel (addr, element_offset, base_tmp);
7941
7942 /* Optimize D-FORM addresses with constant offset with a constant element, to
7943 include the element offset in the address directly. */
7944 else if (GET_CODE (addr) == PLUS)
7945 {
7946 rtx op0 = XEXP (addr, 0);
7947 rtx op1 = XEXP (addr, 1);
7948
7949 gcc_assert (REG_P (op0) || SUBREG_P (op0));
7950 if (CONST_INT_P (op1) && CONST_INT_P (element_offset))
7951 {
7952 /* op0 should never be r0, because r0+offset is not valid. But it
7953 doesn't hurt to make sure it is not r0. */
7954 gcc_assert (reg_or_subregno (op0) != 0);
7955
7956 /* D-FORM address with constant element number. */
7957 HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset);
7958 rtx offset_rtx = GEN_INT (offset);
7959 new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
7960 }
7961 else
7962 {
7963 /* If we don't have a D-FORM address with a constant element number,
7964 add the two elements in the current address. Then add the offset.
7965
7966 Previously, we tried to add the offset to OP1 and change the
7967 address to an X-FORM format adding OP0 and BASE_TMP, but it became
7968 complicated because we had to verify that op1 was not GPR0 and we
7969 had a constant element offset (due to the way ADDI is defined).
7970 By doing the add of OP0 and OP1 first, and then adding in the
7971 offset, it has the benefit that if D-FORM instructions are
7972 allowed, the offset is part of the memory access to the vector
7973 element. */
7974 emit_insn (gen_rtx_SET (base_tmp, gen_rtx_PLUS (Pmode, op0, op1)));
7975 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7976 }
7977 }
7978
7979 else
7980 {
7981 emit_move_insn (base_tmp, addr);
7982 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7983 }
7984
7985 /* If the address isn't valid, move the address into the temporary base
7986 register. Some reasons it could not be valid include:
7987
7988 The address offset overflowed the 16 or 34 bit offset size;
7989 We need to use a DS-FORM load, and the bottom 2 bits are non-zero;
7990 We need to use a DQ-FORM load, and the bottom 4 bits are non-zero;
7991 Only X_FORM loads can be done, and the address is D_FORM. */
7992
7993 enum insn_form iform
7994 = address_to_insn_form (new_addr, scalar_mode,
7995 reg_to_non_prefixed (scalar_reg, scalar_mode));
7996
7997 if (iform == INSN_FORM_BAD)
7998 {
7999 emit_move_insn (base_tmp, new_addr);
8000 new_addr = base_tmp;
8001 }
8002
8003 return change_address (mem, scalar_mode, new_addr);
8004 }
8005
8006 /* Split a variable vec_extract operation into the component instructions. */
8007
8008 void
8009 rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
8010 rtx tmp_altivec)
8011 {
8012 machine_mode mode = GET_MODE (src);
8013 machine_mode scalar_mode = GET_MODE_INNER (GET_MODE (src));
8014 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
8015 int byte_shift = exact_log2 (scalar_size);
8016
8017 gcc_assert (byte_shift >= 0);
8018
8019 /* If we are given a memory address, optimize to load just the element. We
8020 don't have to adjust the vector element number on little endian
8021 systems. */
8022 if (MEM_P (src))
8023 {
8024 emit_move_insn (dest,
8025 rs6000_adjust_vec_address (dest, src, element, tmp_gpr,
8026 scalar_mode));
8027 return;
8028 }
8029
8030 else if (REG_P (src) || SUBREG_P (src))
8031 {
8032 int num_elements = GET_MODE_NUNITS (mode);
8033 int bits_in_element = mode_to_bits (GET_MODE_INNER (mode));
8034 int bit_shift = 7 - exact_log2 (num_elements);
8035 rtx element2;
8036 unsigned int dest_regno = reg_or_subregno (dest);
8037 unsigned int src_regno = reg_or_subregno (src);
8038 unsigned int element_regno = reg_or_subregno (element);
8039
8040 gcc_assert (REG_P (tmp_gpr));
8041
8042 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
8043 a general purpose register. */
8044 if (TARGET_P9_VECTOR
8045 && (mode == V16QImode || mode == V8HImode || mode == V4SImode)
8046 && INT_REGNO_P (dest_regno)
8047 && ALTIVEC_REGNO_P (src_regno)
8048 && INT_REGNO_P (element_regno))
8049 {
8050 rtx dest_si = gen_rtx_REG (SImode, dest_regno);
8051 rtx element_si = gen_rtx_REG (SImode, element_regno);
8052
8053 if (mode == V16QImode)
8054 emit_insn (BYTES_BIG_ENDIAN
8055 ? gen_vextublx (dest_si, element_si, src)
8056 : gen_vextubrx (dest_si, element_si, src));
8057
8058 else if (mode == V8HImode)
8059 {
8060 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
8061 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const1_rtx));
8062 emit_insn (BYTES_BIG_ENDIAN
8063 ? gen_vextuhlx (dest_si, tmp_gpr_si, src)
8064 : gen_vextuhrx (dest_si, tmp_gpr_si, src));
8065 }
8066
8067
8068 else
8069 {
8070 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
8071 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const2_rtx));
8072 emit_insn (BYTES_BIG_ENDIAN
8073 ? gen_vextuwlx (dest_si, tmp_gpr_si, src)
8074 : gen_vextuwrx (dest_si, tmp_gpr_si, src));
8075 }
8076
8077 return;
8078 }
8079
8080
8081 gcc_assert (REG_P (tmp_altivec));
8082
8083 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
8084 an XOR, otherwise we need to subtract. The shift amount is so VSLO
8085 will shift the element into the upper position (adding 3 to convert a
8086 byte shift into a bit shift). */
8087 if (scalar_size == 8)
8088 {
8089 if (!BYTES_BIG_ENDIAN)
8090 {
8091 emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx));
8092 element2 = tmp_gpr;
8093 }
8094 else
8095 element2 = element;
8096
8097 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
8098 bit. */
8099 emit_insn (gen_rtx_SET (tmp_gpr,
8100 gen_rtx_AND (DImode,
8101 gen_rtx_ASHIFT (DImode,
8102 element2,
8103 GEN_INT (6)),
8104 GEN_INT (64))));
8105 }
8106 else
8107 {
8108 if (!BYTES_BIG_ENDIAN)
8109 {
8110 rtx num_ele_m1 = GEN_INT (num_elements - 1);
8111
8112 emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1));
8113 emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr));
8114 element2 = tmp_gpr;
8115 }
8116 else
8117 element2 = element;
8118
8119 emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift)));
8120 }
8121
8122 /* Get the value into the lower byte of the Altivec register where VSLO
8123 expects it. */
8124 if (TARGET_P9_VECTOR)
8125 emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr));
8126 else if (can_create_pseudo_p ())
8127 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr));
8128 else
8129 {
8130 rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
8131 emit_move_insn (tmp_di, tmp_gpr);
8132 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di));
8133 }
8134
8135 /* Do the VSLO to get the value into the final location. */
8136 switch (mode)
8137 {
8138 case E_V2DFmode:
8139 emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec));
8140 return;
8141
8142 case E_V2DImode:
8143 emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec));
8144 return;
8145
8146 case E_V4SFmode:
8147 {
8148 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
8149 rtx tmp_altivec_v4sf = gen_rtx_REG (V4SFmode, REGNO (tmp_altivec));
8150 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
8151 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
8152 tmp_altivec));
8153
8154 emit_insn (gen_vsx_xscvspdp_scalar2 (dest, tmp_altivec_v4sf));
8155 return;
8156 }
8157
8158 case E_V4SImode:
8159 case E_V8HImode:
8160 case E_V16QImode:
8161 {
8162 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
8163 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
8164 rtx tmp_gpr_di = gen_rtx_REG (DImode, REGNO (dest));
8165 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
8166 tmp_altivec));
8167 emit_move_insn (tmp_gpr_di, tmp_altivec_di);
8168 emit_insn (gen_lshrdi3 (tmp_gpr_di, tmp_gpr_di,
8169 GEN_INT (64 - bits_in_element)));
8170 return;
8171 }
8172
8173 default:
8174 gcc_unreachable ();
8175 }
8176
8177 return;
8178 }
8179 else
8180 gcc_unreachable ();
8181 }
8182
8183 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
8184 selects whether the alignment is abi mandated, optional, or
8185 both abi and optional alignment. */
8186
8187 unsigned int
8188 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
8189 {
8190 if (how != align_opt)
8191 {
8192 if (TREE_CODE (type) == VECTOR_TYPE && align < 128)
8193 align = 128;
8194 }
8195
8196 if (how != align_abi)
8197 {
8198 if (TREE_CODE (type) == ARRAY_TYPE
8199 && TYPE_MODE (TREE_TYPE (type)) == QImode)
8200 {
8201 if (align < BITS_PER_WORD)
8202 align = BITS_PER_WORD;
8203 }
8204 }
8205
8206 return align;
8207 }
8208
8209 /* Implement TARGET_SLOW_UNALIGNED_ACCESS. Altivec vector memory
8210 instructions simply ignore the low bits; VSX memory instructions
8211 are aligned to 4 or 8 bytes. */
8212
8213 static bool
8214 rs6000_slow_unaligned_access (machine_mode mode, unsigned int align)
8215 {
8216 return (STRICT_ALIGNMENT
8217 || (!TARGET_EFFICIENT_UNALIGNED_VSX
8218 && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && align < 32)
8219 || ((VECTOR_MODE_P (mode) || VECTOR_ALIGNMENT_P (mode))
8220 && (int) align < VECTOR_ALIGN (mode)))));
8221 }
8222
8223 /* AIX word-aligns FP doubles but doubleword-aligns 64-bit ints. */
8224
8225 unsigned int
8226 rs6000_special_adjust_field_align (tree type, unsigned int computed)
8227 {
8228 if (computed <= 32 || TYPE_PACKED (type))
8229 return computed;
8230
8231 /* Strip initial arrays. */
8232 while (TREE_CODE (type) == ARRAY_TYPE)
8233 type = TREE_TYPE (type);
8234
8235 /* If RECORD or UNION, recursively find the first field. */
8236 while (AGGREGATE_TYPE_P (type))
8237 {
8238 tree field = TYPE_FIELDS (type);
8239
8240 /* Skip all non field decls */
8241 while (field != NULL
8242 && (TREE_CODE (field) != FIELD_DECL
8243 || DECL_FIELD_ABI_IGNORED (field)))
8244 field = DECL_CHAIN (field);
8245
8246 if (! field)
8247 break;
8248
8249 /* A packed field does not contribute any extra alignment. */
8250 if (DECL_PACKED (field))
8251 return computed;
8252
8253 type = TREE_TYPE (field);
8254
8255 /* Strip arrays. */
8256 while (TREE_CODE (type) == ARRAY_TYPE)
8257 type = TREE_TYPE (type);
8258 }
8259
8260 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node
8261 && (TYPE_MODE (type) == DFmode || TYPE_MODE (type) == DCmode))
8262 computed = MIN (computed, 32);
8263
8264 return computed;
8265 }
8266
8267 /* AIX increases natural record alignment to doubleword if the innermost first
8268 field is an FP double while the FP fields remain word aligned.
8269 Only called if TYPE initially is a RECORD or UNION. */
8270
8271 unsigned int
8272 rs6000_special_round_type_align (tree type, unsigned int computed,
8273 unsigned int specified)
8274 {
8275 unsigned int align = MAX (computed, specified);
8276
8277 if (TYPE_PACKED (type) || align >= 64)
8278 return align;
8279
8280 /* If RECORD or UNION, recursively find the first field. */
8281 do
8282 {
8283 tree field = TYPE_FIELDS (type);
8284
8285 /* Skip all non field decls */
8286 while (field != NULL
8287 && (TREE_CODE (field) != FIELD_DECL
8288 || DECL_FIELD_ABI_IGNORED (field)))
8289 field = DECL_CHAIN (field);
8290
8291 if (! field)
8292 break;
8293
8294 /* A packed field does not contribute any extra alignment. */
8295 if (DECL_PACKED (field))
8296 return align;
8297
8298 type = TREE_TYPE (field);
8299
8300 /* Strip arrays. */
8301 while (TREE_CODE (type) == ARRAY_TYPE)
8302 type = TREE_TYPE (type);
8303 } while (AGGREGATE_TYPE_P (type));
8304
8305 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node
8306 && (TYPE_MODE (type) == DFmode || TYPE_MODE (type) == DCmode))
8307 align = MAX (align, 64);
8308
8309 return align;
8310 }
8311
8312 /* Darwin increases record alignment to the natural alignment of
8313 the first field. */
8314
8315 unsigned int
8316 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
8317 unsigned int specified)
8318 {
8319 unsigned int align = MAX (computed, specified);
8320
8321 if (TYPE_PACKED (type))
8322 return align;
8323
8324 /* Find the first field, looking down into aggregates. */
8325 do {
8326 tree field = TYPE_FIELDS (type);
8327 /* Skip all non field decls */
8328 while (field != NULL
8329 && (TREE_CODE (field) != FIELD_DECL
8330 || DECL_FIELD_ABI_IGNORED (field)))
8331 field = DECL_CHAIN (field);
8332 if (! field)
8333 break;
8334 /* A packed field does not contribute any extra alignment. */
8335 if (DECL_PACKED (field))
8336 return align;
8337 type = TREE_TYPE (field);
8338 while (TREE_CODE (type) == ARRAY_TYPE)
8339 type = TREE_TYPE (type);
8340 } while (AGGREGATE_TYPE_P (type));
8341
8342 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
8343 align = MAX (align, TYPE_ALIGN (type));
8344
8345 return align;
8346 }
8347
8348 /* Return 1 for an operand in small memory on V.4/eabi. */
8349
8350 int
8351 small_data_operand (rtx op ATTRIBUTE_UNUSED,
8352 machine_mode mode ATTRIBUTE_UNUSED)
8353 {
8354 #if TARGET_ELF
8355 rtx sym_ref;
8356
8357 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
8358 return 0;
8359
8360 if (DEFAULT_ABI != ABI_V4)
8361 return 0;
8362
8363 if (SYMBOL_REF_P (op))
8364 sym_ref = op;
8365
8366 else if (GET_CODE (op) != CONST
8367 || GET_CODE (XEXP (op, 0)) != PLUS
8368 || !SYMBOL_REF_P (XEXP (XEXP (op, 0), 0))
8369 || !CONST_INT_P (XEXP (XEXP (op, 0), 1)))
8370 return 0;
8371
8372 else
8373 {
8374 rtx sum = XEXP (op, 0);
8375 HOST_WIDE_INT summand;
8376
8377 /* We have to be careful here, because it is the referenced address
8378 that must be 32k from _SDA_BASE_, not just the symbol. */
8379 summand = INTVAL (XEXP (sum, 1));
8380 if (summand < 0 || summand > g_switch_value)
8381 return 0;
8382
8383 sym_ref = XEXP (sum, 0);
8384 }
8385
8386 return SYMBOL_REF_SMALL_P (sym_ref);
8387 #else
8388 return 0;
8389 #endif
8390 }
8391
8392 /* Return true if either operand is a general purpose register. */
8393
8394 bool
8395 gpr_or_gpr_p (rtx op0, rtx op1)
8396 {
8397 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
8398 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
8399 }
8400
8401 /* Return true if this is a move direct operation between GPR registers and
8402 floating point/VSX registers. */
8403
8404 bool
8405 direct_move_p (rtx op0, rtx op1)
8406 {
8407 if (!REG_P (op0) || !REG_P (op1))
8408 return false;
8409
8410 if (!TARGET_DIRECT_MOVE)
8411 return false;
8412
8413 int regno0 = REGNO (op0);
8414 int regno1 = REGNO (op1);
8415 if (!HARD_REGISTER_NUM_P (regno0) || !HARD_REGISTER_NUM_P (regno1))
8416 return false;
8417
8418 if (INT_REGNO_P (regno0) && VSX_REGNO_P (regno1))
8419 return true;
8420
8421 if (VSX_REGNO_P (regno0) && INT_REGNO_P (regno1))
8422 return true;
8423
8424 return false;
8425 }
8426
8427 /* Return true if the ADDR is an acceptable address for a quad memory
8428 operation of mode MODE (either LQ/STQ for general purpose registers, or
8429 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
8430 is intended for LQ/STQ. If it is false, the address is intended for the ISA
8431 3.0 LXV/STXV instruction. */
8432
8433 bool
8434 quad_address_p (rtx addr, machine_mode mode, bool strict)
8435 {
8436 rtx op0, op1;
8437
8438 if (GET_MODE_SIZE (mode) < 16)
8439 return false;
8440
8441 if (legitimate_indirect_address_p (addr, strict))
8442 return true;
8443
8444 if (VECTOR_MODE_P (mode) && !mode_supports_dq_form (mode))
8445 return false;
8446
8447 /* Is this a valid prefixed address? If the bottom four bits of the offset
8448 are non-zero, we could use a prefixed instruction (which does not have the
8449 DQ-form constraint that the traditional instruction had) instead of
8450 forcing the unaligned offset to a GPR. */
8451 if (address_is_prefixed (addr, mode, NON_PREFIXED_DQ))
8452 return true;
8453
8454 if (GET_CODE (addr) != PLUS)
8455 return false;
8456
8457 op0 = XEXP (addr, 0);
8458 if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, strict))
8459 return false;
8460
8461 op1 = XEXP (addr, 1);
8462 if (!CONST_INT_P (op1))
8463 return false;
8464
8465 return quad_address_offset_p (INTVAL (op1));
8466 }
8467
8468 /* Return true if this is a load or store quad operation. This function does
8469 not handle the atomic quad memory instructions. */
8470
8471 bool
8472 quad_load_store_p (rtx op0, rtx op1)
8473 {
8474 bool ret;
8475
8476 if (!TARGET_QUAD_MEMORY)
8477 ret = false;
8478
8479 else if (REG_P (op0) && MEM_P (op1))
8480 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
8481 && quad_memory_operand (op1, GET_MODE (op1))
8482 && !reg_overlap_mentioned_p (op0, op1));
8483
8484 else if (MEM_P (op0) && REG_P (op1))
8485 ret = (quad_memory_operand (op0, GET_MODE (op0))
8486 && quad_int_reg_operand (op1, GET_MODE (op1)));
8487
8488 else
8489 ret = false;
8490
8491 if (TARGET_DEBUG_ADDR)
8492 {
8493 fprintf (stderr, "\n========== quad_load_store, return %s\n",
8494 ret ? "true" : "false");
8495 debug_rtx (gen_rtx_SET (op0, op1));
8496 }
8497
8498 return ret;
8499 }
8500
8501 /* Given an address, return a constant offset term if one exists. */
8502
8503 static rtx
8504 address_offset (rtx op)
8505 {
8506 if (GET_CODE (op) == PRE_INC
8507 || GET_CODE (op) == PRE_DEC)
8508 op = XEXP (op, 0);
8509 else if (GET_CODE (op) == PRE_MODIFY
8510 || GET_CODE (op) == LO_SUM)
8511 op = XEXP (op, 1);
8512
8513 if (GET_CODE (op) == CONST)
8514 op = XEXP (op, 0);
8515
8516 if (GET_CODE (op) == PLUS)
8517 op = XEXP (op, 1);
8518
8519 if (CONST_INT_P (op))
8520 return op;
8521
8522 return NULL_RTX;
8523 }
8524
8525 /* This tests that a lo_sum {constant, symbol, symbol+offset} is valid for
8526 the mode. If we can't find (or don't know) the alignment of the symbol
8527 we assume (optimistically) that it's sufficiently aligned [??? maybe we
8528 should be pessimistic]. Offsets are validated in the same way as for
8529 reg + offset. */
8530 static bool
8531 darwin_rs6000_legitimate_lo_sum_const_p (rtx x, machine_mode mode)
8532 {
8533 /* We should not get here with this. */
8534 gcc_checking_assert (! mode_supports_dq_form (mode));
8535
8536 if (GET_CODE (x) == CONST)
8537 x = XEXP (x, 0);
8538
8539 /* If we are building PIC code, then any symbol must be wrapped in an
8540 UNSPEC_MACHOPIC_OFFSET so that it will get the picbase subtracted. */
8541 bool machopic_offs_p = false;
8542 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_MACHOPIC_OFFSET)
8543 {
8544 x = XVECEXP (x, 0, 0);
8545 machopic_offs_p = true;
8546 }
8547
8548 rtx sym = NULL_RTX;
8549 unsigned HOST_WIDE_INT offset = 0;
8550
8551 if (GET_CODE (x) == PLUS)
8552 {
8553 sym = XEXP (x, 0);
8554 if (! SYMBOL_REF_P (sym))
8555 return false;
8556 if (!CONST_INT_P (XEXP (x, 1)))
8557 return false;
8558 offset = INTVAL (XEXP (x, 1));
8559 }
8560 else if (SYMBOL_REF_P (x))
8561 sym = x;
8562 else if (CONST_INT_P (x))
8563 offset = INTVAL (x);
8564 else if (GET_CODE (x) == LABEL_REF)
8565 offset = 0; // We assume code labels are Pmode aligned
8566 else
8567 return false; // not sure what we have here.
8568
8569 /* If we don't know the alignment of the thing to which the symbol refers,
8570 we assume optimistically it is "enough".
8571 ??? maybe we should be pessimistic instead. */
8572 unsigned align = 0;
8573
8574 if (sym)
8575 {
8576 tree decl = SYMBOL_REF_DECL (sym);
8577 /* As noted above, PIC code cannot use a bare SYMBOL_REF. */
8578 if (TARGET_MACHO && flag_pic && !machopic_offs_p)
8579 return false;
8580 #if TARGET_MACHO
8581 if (MACHO_SYMBOL_INDIRECTION_P (sym))
8582 /* The decl in an indirection symbol is the original one, which might
8583 be less aligned than the indirection. Our indirections are always
8584 pointer-aligned. */
8585 ;
8586 else
8587 #endif
8588 if (decl && DECL_ALIGN (decl))
8589 align = DECL_ALIGN_UNIT (decl);
8590 }
8591
8592 unsigned int extra = 0;
8593 switch (mode)
8594 {
8595 case E_DFmode:
8596 case E_DDmode:
8597 case E_DImode:
8598 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8599 addressing. */
8600 if (VECTOR_MEM_VSX_P (mode))
8601 return false;
8602
8603 if (!TARGET_POWERPC64)
8604 extra = 4;
8605 else if ((offset & 3) || (align & 3))
8606 return false;
8607 break;
8608
8609 case E_TFmode:
8610 case E_IFmode:
8611 case E_KFmode:
8612 case E_TDmode:
8613 case E_TImode:
8614 case E_PTImode:
8615 extra = 8;
8616 if (!TARGET_POWERPC64)
8617 extra = 12;
8618 else if ((offset & 3) || (align & 3))
8619 return false;
8620 break;
8621
8622 default:
8623 break;
8624 }
8625
8626 /* We only care if the access(es) would cause a change to the high part. */
8627 offset = sext_hwi (offset, 16);
8628 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8629 }
8630
8631 /* Return true if the MEM operand is a memory operand suitable for use
8632 with a (full width, possibly multiple) gpr load/store. On
8633 powerpc64 this means the offset must be divisible by 4.
8634 Implements 'Y' constraint.
8635
8636 Accept direct, indexed, offset, lo_sum and tocref. Since this is
8637 a constraint function we know the operand has satisfied a suitable
8638 memory predicate.
8639
8640 Offsetting a lo_sum should not be allowed, except where we know by
8641 alignment that a 32k boundary is not crossed. Note that by
8642 "offsetting" here we mean a further offset to access parts of the
8643 MEM. It's fine to have a lo_sum where the inner address is offset
8644 from a sym, since the same sym+offset will appear in the high part
8645 of the address calculation. */
8646
8647 bool
8648 mem_operand_gpr (rtx op, machine_mode mode)
8649 {
8650 unsigned HOST_WIDE_INT offset;
8651 int extra;
8652 rtx addr = XEXP (op, 0);
8653
8654 /* PR85755: Allow PRE_INC and PRE_DEC addresses. */
8655 if (TARGET_UPDATE
8656 && (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
8657 && mode_supports_pre_incdec_p (mode)
8658 && legitimate_indirect_address_p (XEXP (addr, 0), false))
8659 return true;
8660
8661 /* Allow prefixed instructions if supported. If the bottom two bits of the
8662 offset are non-zero, we could use a prefixed instruction (which does not
8663 have the DS-form constraint that the traditional instruction had) instead
8664 of forcing the unaligned offset to a GPR. */
8665 if (address_is_prefixed (addr, mode, NON_PREFIXED_DS))
8666 return true;
8667
8668 /* We need to look through Mach-O PIC unspecs to determine if a lo_sum is
8669 really OK. Doing this early avoids teaching all the other machinery
8670 about them. */
8671 if (TARGET_MACHO && GET_CODE (addr) == LO_SUM)
8672 return darwin_rs6000_legitimate_lo_sum_const_p (XEXP (addr, 1), mode);
8673
8674 /* Only allow offsettable addresses. See PRs 83969 and 84279. */
8675 if (!rs6000_offsettable_memref_p (op, mode, false))
8676 return false;
8677
8678 op = address_offset (addr);
8679 if (op == NULL_RTX)
8680 return true;
8681
8682 offset = INTVAL (op);
8683 if (TARGET_POWERPC64 && (offset & 3) != 0)
8684 return false;
8685
8686 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8687 if (extra < 0)
8688 extra = 0;
8689
8690 if (GET_CODE (addr) == LO_SUM)
8691 /* For lo_sum addresses, we must allow any offset except one that
8692 causes a wrap, so test only the low 16 bits. */
8693 offset = sext_hwi (offset, 16);
8694
8695 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8696 }
8697
8698 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
8699 enforce an offset divisible by 4 even for 32-bit. */
8700
8701 bool
8702 mem_operand_ds_form (rtx op, machine_mode mode)
8703 {
8704 unsigned HOST_WIDE_INT offset;
8705 int extra;
8706 rtx addr = XEXP (op, 0);
8707
8708 /* Allow prefixed instructions if supported. If the bottom two bits of the
8709 offset are non-zero, we could use a prefixed instruction (which does not
8710 have the DS-form constraint that the traditional instruction had) instead
8711 of forcing the unaligned offset to a GPR. */
8712 if (address_is_prefixed (addr, mode, NON_PREFIXED_DS))
8713 return true;
8714
8715 if (!offsettable_address_p (false, mode, addr))
8716 return false;
8717
8718 op = address_offset (addr);
8719 if (op == NULL_RTX)
8720 return true;
8721
8722 offset = INTVAL (op);
8723 if ((offset & 3) != 0)
8724 return false;
8725
8726 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8727 if (extra < 0)
8728 extra = 0;
8729
8730 if (GET_CODE (addr) == LO_SUM)
8731 /* For lo_sum addresses, we must allow any offset except one that
8732 causes a wrap, so test only the low 16 bits. */
8733 offset = sext_hwi (offset, 16);
8734
8735 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8736 }
8737 \f
8738 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
8739
8740 static bool
8741 reg_offset_addressing_ok_p (machine_mode mode)
8742 {
8743 switch (mode)
8744 {
8745 case E_V16QImode:
8746 case E_V8HImode:
8747 case E_V4SFmode:
8748 case E_V4SImode:
8749 case E_V2DFmode:
8750 case E_V2DImode:
8751 case E_V1TImode:
8752 case E_TImode:
8753 case E_TFmode:
8754 case E_KFmode:
8755 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
8756 ISA 3.0 vector d-form addressing mode was added. While TImode is not
8757 a vector mode, if we want to use the VSX registers to move it around,
8758 we need to restrict ourselves to reg+reg addressing. Similarly for
8759 IEEE 128-bit floating point that is passed in a single vector
8760 register. */
8761 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
8762 return mode_supports_dq_form (mode);
8763 break;
8764
8765 /* The vector pair/quad types and the dense math types support offset
8766 addressing if the underlying vectors support offset addressing. */
8767 case E_OOmode:
8768 case E_XOmode:
8769 return TARGET_MMA;
8770
8771 case E_TDOmode:
8772 return TARGET_DENSE_MATH;
8773
8774 case E_SDmode:
8775 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
8776 addressing for the LFIWZX and STFIWX instructions. */
8777 if (TARGET_NO_SDMODE_STACK)
8778 return false;
8779 break;
8780
8781 default:
8782 break;
8783 }
8784
8785 return true;
8786 }
8787
8788 static bool
8789 virtual_stack_registers_memory_p (rtx op)
8790 {
8791 int regnum;
8792
8793 if (REG_P (op))
8794 regnum = REGNO (op);
8795
8796 else if (GET_CODE (op) == PLUS
8797 && REG_P (XEXP (op, 0))
8798 && CONST_INT_P (XEXP (op, 1)))
8799 regnum = REGNO (XEXP (op, 0));
8800
8801 else
8802 return false;
8803
8804 return (regnum >= FIRST_VIRTUAL_REGISTER
8805 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
8806 }
8807
8808 /* Return true if a MODE sized memory accesses to OP plus OFFSET
8809 is known to not straddle a 32k boundary. This function is used
8810 to determine whether -mcmodel=medium code can use TOC pointer
8811 relative addressing for OP. This means the alignment of the TOC
8812 pointer must also be taken into account, and unfortunately that is
8813 only 8 bytes. */
8814
8815 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
8816 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
8817 #endif
8818
8819 static bool
8820 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
8821 machine_mode mode)
8822 {
8823 tree decl;
8824 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
8825
8826 if (!SYMBOL_REF_P (op))
8827 return false;
8828
8829 /* ISA 3.0 vector d-form addressing is restricted, don't allow
8830 SYMBOL_REF. */
8831 if (mode_supports_dq_form (mode))
8832 return false;
8833
8834 dsize = GET_MODE_SIZE (mode);
8835 decl = SYMBOL_REF_DECL (op);
8836 if (!decl)
8837 {
8838 if (dsize == 0)
8839 return false;
8840
8841 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
8842 replacing memory addresses with an anchor plus offset. We
8843 could find the decl by rummaging around in the block->objects
8844 VEC for the given offset but that seems like too much work. */
8845 dalign = BITS_PER_UNIT;
8846 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
8847 && SYMBOL_REF_ANCHOR_P (op)
8848 && SYMBOL_REF_BLOCK (op) != NULL)
8849 {
8850 struct object_block *block = SYMBOL_REF_BLOCK (op);
8851
8852 dalign = block->alignment;
8853 offset += SYMBOL_REF_BLOCK_OFFSET (op);
8854 }
8855 else if (CONSTANT_POOL_ADDRESS_P (op))
8856 {
8857 /* It would be nice to have get_pool_align().. */
8858 machine_mode cmode = get_pool_mode (op);
8859
8860 dalign = GET_MODE_ALIGNMENT (cmode);
8861 }
8862 }
8863 else if (DECL_P (decl))
8864 {
8865 dalign = DECL_ALIGN (decl);
8866
8867 if (dsize == 0)
8868 {
8869 /* Allow BLKmode when the entire object is known to not
8870 cross a 32k boundary. */
8871 if (!DECL_SIZE_UNIT (decl))
8872 return false;
8873
8874 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
8875 return false;
8876
8877 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
8878 if (dsize > 32768)
8879 return false;
8880
8881 dalign /= BITS_PER_UNIT;
8882 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8883 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8884 return dalign >= dsize;
8885 }
8886 }
8887 else
8888 gcc_unreachable ();
8889
8890 /* Find how many bits of the alignment we know for this access. */
8891 dalign /= BITS_PER_UNIT;
8892 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8893 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8894 mask = dalign - 1;
8895 lsb = offset & -offset;
8896 mask &= lsb - 1;
8897 dalign = mask + 1;
8898
8899 return dalign >= dsize;
8900 }
8901
8902 static bool
8903 constant_pool_expr_p (rtx op)
8904 {
8905 rtx base, offset;
8906
8907 split_const (op, &base, &offset);
8908 return (SYMBOL_REF_P (base)
8909 && CONSTANT_POOL_ADDRESS_P (base)
8910 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
8911 }
8912
8913 /* Create a TOC reference for symbol_ref SYMBOL. If LARGETOC_REG is non-null,
8914 use that as the register to put the HIGH value into if register allocation
8915 is already done. */
8916
8917 rtx
8918 create_TOC_reference (rtx symbol, rtx largetoc_reg)
8919 {
8920 rtx tocrel, tocreg, hi;
8921
8922 gcc_assert (TARGET_TOC);
8923
8924 if (TARGET_DEBUG_ADDR)
8925 {
8926 if (SYMBOL_REF_P (symbol))
8927 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
8928 XSTR (symbol, 0));
8929 else
8930 {
8931 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
8932 GET_RTX_NAME (GET_CODE (symbol)));
8933 debug_rtx (symbol);
8934 }
8935 }
8936
8937 if (!can_create_pseudo_p ())
8938 df_set_regs_ever_live (TOC_REGISTER, true);
8939
8940 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
8941 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
8942 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
8943 return tocrel;
8944
8945 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
8946 if (largetoc_reg != NULL)
8947 {
8948 emit_move_insn (largetoc_reg, hi);
8949 hi = largetoc_reg;
8950 }
8951 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
8952 }
8953
8954 /* These are only used to pass through from print_operand/print_operand_address
8955 to rs6000_output_addr_const_extra over the intervening function
8956 output_addr_const which is not target code. */
8957 static const_rtx tocrel_base_oac, tocrel_offset_oac;
8958
8959 /* Return true if OP is a toc pointer relative address (the output
8960 of create_TOC_reference). If STRICT, do not match non-split
8961 -mcmodel=large/medium toc pointer relative addresses. If the pointers
8962 are non-NULL, place base and offset pieces in TOCREL_BASE_RET and
8963 TOCREL_OFFSET_RET respectively. */
8964
8965 bool
8966 toc_relative_expr_p (const_rtx op, bool strict, const_rtx *tocrel_base_ret,
8967 const_rtx *tocrel_offset_ret)
8968 {
8969 if (!TARGET_TOC)
8970 return false;
8971
8972 if (TARGET_CMODEL != CMODEL_SMALL)
8973 {
8974 /* When strict ensure we have everything tidy. */
8975 if (strict
8976 && !(GET_CODE (op) == LO_SUM
8977 && REG_P (XEXP (op, 0))
8978 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict)))
8979 return false;
8980
8981 /* When not strict, allow non-split TOC addresses and also allow
8982 (lo_sum (high ..)) TOC addresses created during reload. */
8983 if (GET_CODE (op) == LO_SUM)
8984 op = XEXP (op, 1);
8985 }
8986
8987 const_rtx tocrel_base = op;
8988 const_rtx tocrel_offset = const0_rtx;
8989
8990 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
8991 {
8992 tocrel_base = XEXP (op, 0);
8993 tocrel_offset = XEXP (op, 1);
8994 }
8995
8996 if (tocrel_base_ret)
8997 *tocrel_base_ret = tocrel_base;
8998 if (tocrel_offset_ret)
8999 *tocrel_offset_ret = tocrel_offset;
9000
9001 return (GET_CODE (tocrel_base) == UNSPEC
9002 && XINT (tocrel_base, 1) == UNSPEC_TOCREL
9003 && REG_P (XVECEXP (tocrel_base, 0, 1))
9004 && REGNO (XVECEXP (tocrel_base, 0, 1)) == TOC_REGISTER);
9005 }
9006
9007 /* Return true if X is a constant pool address, and also for cmodel=medium
9008 if X is a toc-relative address known to be offsettable within MODE. */
9009
9010 bool
9011 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
9012 bool strict)
9013 {
9014 const_rtx tocrel_base, tocrel_offset;
9015 return (toc_relative_expr_p (x, strict, &tocrel_base, &tocrel_offset)
9016 && (TARGET_CMODEL != CMODEL_MEDIUM
9017 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
9018 || mode == QImode
9019 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
9020 INTVAL (tocrel_offset), mode)));
9021 }
9022
9023 static bool
9024 legitimate_small_data_p (machine_mode mode, rtx x)
9025 {
9026 return (DEFAULT_ABI == ABI_V4
9027 && !flag_pic && !TARGET_TOC
9028 && (SYMBOL_REF_P (x) || GET_CODE (x) == CONST)
9029 && small_data_operand (x, mode));
9030 }
9031
9032 bool
9033 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
9034 bool strict, bool worst_case)
9035 {
9036 unsigned HOST_WIDE_INT offset;
9037 unsigned int extra;
9038
9039 if (GET_CODE (x) != PLUS)
9040 return false;
9041 if (!REG_P (XEXP (x, 0)))
9042 return false;
9043 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
9044 return false;
9045 if (mode_supports_dq_form (mode))
9046 return quad_address_p (x, mode, strict);
9047 if (!reg_offset_addressing_ok_p (mode))
9048 return virtual_stack_registers_memory_p (x);
9049 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
9050 return true;
9051 if (!CONST_INT_P (XEXP (x, 1)))
9052 return false;
9053
9054 offset = INTVAL (XEXP (x, 1));
9055 extra = 0;
9056 switch (mode)
9057 {
9058 case E_DFmode:
9059 case E_DDmode:
9060 case E_DImode:
9061 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
9062 addressing. */
9063 if (VECTOR_MEM_VSX_P (mode))
9064 return false;
9065
9066 if (!worst_case)
9067 break;
9068 if (!TARGET_POWERPC64)
9069 extra = 4;
9070 else if (offset & 3)
9071 return false;
9072 break;
9073
9074 case E_TFmode:
9075 case E_IFmode:
9076 case E_KFmode:
9077 case E_TDmode:
9078 case E_TImode:
9079 case E_PTImode:
9080 extra = 8;
9081 if (!worst_case)
9082 break;
9083 if (!TARGET_POWERPC64)
9084 extra = 12;
9085 else if (offset & 3)
9086 return false;
9087 break;
9088
9089 default:
9090 break;
9091 }
9092
9093 if (TARGET_PREFIXED)
9094 return SIGNED_34BIT_OFFSET_EXTRA_P (offset, extra);
9095 else
9096 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
9097 }
9098
9099 bool
9100 legitimate_indexed_address_p (rtx x, int strict)
9101 {
9102 rtx op0, op1;
9103
9104 if (GET_CODE (x) != PLUS)
9105 return false;
9106
9107 op0 = XEXP (x, 0);
9108 op1 = XEXP (x, 1);
9109
9110 return (REG_P (op0) && REG_P (op1)
9111 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
9112 && INT_REG_OK_FOR_INDEX_P (op1, strict))
9113 || (INT_REG_OK_FOR_BASE_P (op1, strict)
9114 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
9115 }
9116
9117 bool
9118 avoiding_indexed_address_p (machine_mode mode)
9119 {
9120 unsigned int msize = GET_MODE_SIZE (mode);
9121
9122 /* Avoid indexed addressing for modes that have non-indexed load/store
9123 instruction forms. On power10, vector pairs have an indexed
9124 form, but vector quads don't. */
9125 if (msize > 16)
9126 return msize != 32;
9127
9128 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
9129 }
9130
9131 bool
9132 legitimate_indirect_address_p (rtx x, int strict)
9133 {
9134 return REG_P (x) && INT_REG_OK_FOR_BASE_P (x, strict);
9135 }
9136
9137 bool
9138 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
9139 {
9140 if (!TARGET_MACHO || !flag_pic
9141 || mode != SImode || !MEM_P (x))
9142 return false;
9143 x = XEXP (x, 0);
9144
9145 if (GET_CODE (x) != LO_SUM)
9146 return false;
9147 if (!REG_P (XEXP (x, 0)))
9148 return false;
9149 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
9150 return false;
9151 x = XEXP (x, 1);
9152
9153 return CONSTANT_P (x);
9154 }
9155
9156 static bool
9157 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
9158 {
9159 if (GET_CODE (x) != LO_SUM)
9160 return false;
9161 if (!REG_P (XEXP (x, 0)))
9162 return false;
9163 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
9164 return false;
9165 /* quad word addresses are restricted, and we can't use LO_SUM. */
9166 if (mode_supports_dq_form (mode))
9167 return false;
9168 x = XEXP (x, 1);
9169
9170 if (TARGET_ELF)
9171 {
9172 bool large_toc_ok;
9173
9174 if (DEFAULT_ABI == ABI_V4 && flag_pic)
9175 return false;
9176 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
9177 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
9178 recognizes some LO_SUM addresses as valid although this
9179 function says opposite. In most cases, LRA through different
9180 transformations can generate correct code for address reloads.
9181 It cannot manage only some LO_SUM cases. So we need to add
9182 code here saying that some addresses are still valid. */
9183 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
9184 && small_toc_ref (x, VOIDmode));
9185 if (TARGET_TOC && ! large_toc_ok)
9186 return false;
9187 if (GET_MODE_NUNITS (mode) != 1)
9188 return false;
9189 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
9190 && !(/* ??? Assume floating point reg based on mode? */
9191 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
9192 return false;
9193
9194 return CONSTANT_P (x) || large_toc_ok;
9195 }
9196 else if (TARGET_MACHO)
9197 {
9198 if (GET_MODE_NUNITS (mode) != 1)
9199 return false;
9200 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
9201 && !(/* see above */
9202 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
9203 return false;
9204 #if TARGET_MACHO
9205 if (MACHO_DYNAMIC_NO_PIC_P || !flag_pic)
9206 return CONSTANT_P (x);
9207 #endif
9208 /* Macho-O PIC code from here. */
9209 if (GET_CODE (x) == CONST)
9210 x = XEXP (x, 0);
9211
9212 /* SYMBOL_REFs need to be wrapped in an UNSPEC_MACHOPIC_OFFSET. */
9213 if (SYMBOL_REF_P (x))
9214 return false;
9215
9216 /* So this is OK if the wrapped object is const. */
9217 if (GET_CODE (x) == UNSPEC
9218 && XINT (x, 1) == UNSPEC_MACHOPIC_OFFSET)
9219 return CONSTANT_P (XVECEXP (x, 0, 0));
9220 return CONSTANT_P (x);
9221 }
9222 return false;
9223 }
9224
9225
9226 /* Try machine-dependent ways of modifying an illegitimate address
9227 to be legitimate. If we find one, return the new, valid address.
9228 This is used from only one place: `memory_address' in explow.cc.
9229
9230 OLDX is the address as it was before break_out_memory_refs was
9231 called. In some cases it is useful to look at this to decide what
9232 needs to be done.
9233
9234 It is always safe for this function to do nothing. It exists to
9235 recognize opportunities to optimize the output.
9236
9237 On RS/6000, first check for the sum of a register with a constant
9238 integer that is out of range. If so, generate code to add the
9239 constant with the low-order 16 bits masked to the register and force
9240 this result into another register (this can be done with `cau').
9241 Then generate an address of REG+(CONST&0xffff), allowing for the
9242 possibility of bit 16 being a one.
9243
9244 Then check for the sum of a register and something not constant, try to
9245 load the other things into a register and return the sum. */
9246
9247 static rtx
9248 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
9249 machine_mode mode)
9250 {
9251 unsigned int extra;
9252
9253 if (!reg_offset_addressing_ok_p (mode)
9254 || mode_supports_dq_form (mode))
9255 {
9256 if (virtual_stack_registers_memory_p (x))
9257 return x;
9258
9259 /* In theory we should not be seeing addresses of the form reg+0,
9260 but just in case it is generated, optimize it away. */
9261 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
9262 return force_reg (Pmode, XEXP (x, 0));
9263
9264 /* For TImode with load/store quad, restrict addresses to just a single
9265 pointer, so it works with both GPRs and VSX registers. */
9266 /* Make sure both operands are registers. */
9267 else if (GET_CODE (x) == PLUS
9268 && (mode != TImode || !TARGET_VSX))
9269 return gen_rtx_PLUS (Pmode,
9270 force_reg (Pmode, XEXP (x, 0)),
9271 force_reg (Pmode, XEXP (x, 1)));
9272 else
9273 return force_reg (Pmode, x);
9274 }
9275 if (SYMBOL_REF_P (x) && !TARGET_MACHO)
9276 {
9277 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
9278 if (model != 0)
9279 return rs6000_legitimize_tls_address (x, model);
9280 }
9281
9282 extra = 0;
9283 switch (mode)
9284 {
9285 case E_TFmode:
9286 case E_TDmode:
9287 case E_TImode:
9288 case E_PTImode:
9289 case E_IFmode:
9290 case E_KFmode:
9291 /* As in legitimate_offset_address_p we do not assume
9292 worst-case. The mode here is just a hint as to the registers
9293 used. A TImode is usually in gprs, but may actually be in
9294 fprs. Leave worst-case scenario for reload to handle via
9295 insn constraints. PTImode is only GPRs. */
9296 extra = 8;
9297 break;
9298 default:
9299 break;
9300 }
9301
9302 if (GET_CODE (x) == PLUS
9303 && REG_P (XEXP (x, 0))
9304 && CONST_INT_P (XEXP (x, 1))
9305 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
9306 >= 0x10000 - extra))
9307 {
9308 HOST_WIDE_INT high_int, low_int;
9309 rtx sum;
9310 low_int = sext_hwi (INTVAL (XEXP (x, 1)), 16);
9311 if (low_int >= 0x8000 - extra)
9312 low_int = 0;
9313 high_int = INTVAL (XEXP (x, 1)) - low_int;
9314 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
9315 gen_int_mode (high_int, Pmode)), 0);
9316 return plus_constant (Pmode, sum, low_int);
9317 }
9318 else if (GET_CODE (x) == PLUS
9319 && REG_P (XEXP (x, 0))
9320 && !CONST_INT_P (XEXP (x, 1))
9321 && GET_MODE_NUNITS (mode) == 1
9322 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
9323 || (/* ??? Assume floating point reg based on mode? */
9324 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
9325 && !avoiding_indexed_address_p (mode))
9326 {
9327 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
9328 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
9329 }
9330 else if ((TARGET_ELF
9331 #if TARGET_MACHO
9332 || !MACHO_DYNAMIC_NO_PIC_P
9333 #endif
9334 )
9335 && TARGET_32BIT
9336 && TARGET_NO_TOC_OR_PCREL
9337 && !flag_pic
9338 && !CONST_INT_P (x)
9339 && !CONST_WIDE_INT_P (x)
9340 && !CONST_DOUBLE_P (x)
9341 && CONSTANT_P (x)
9342 && GET_MODE_NUNITS (mode) == 1
9343 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
9344 || (/* ??? Assume floating point reg based on mode? */
9345 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode))))
9346 {
9347 rtx reg = gen_reg_rtx (Pmode);
9348 if (TARGET_ELF)
9349 emit_insn (gen_elf_high (reg, x));
9350 else
9351 emit_insn (gen_macho_high (Pmode, reg, x));
9352 return gen_rtx_LO_SUM (Pmode, reg, x);
9353 }
9354 else if (TARGET_TOC
9355 && SYMBOL_REF_P (x)
9356 && constant_pool_expr_p (x)
9357 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
9358 return create_TOC_reference (x, NULL_RTX);
9359 else
9360 return x;
9361 }
9362
9363 /* Debug version of rs6000_legitimize_address. */
9364 static rtx
9365 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
9366 {
9367 rtx ret;
9368 rtx_insn *insns;
9369
9370 start_sequence ();
9371 ret = rs6000_legitimize_address (x, oldx, mode);
9372 insns = get_insns ();
9373 end_sequence ();
9374
9375 if (ret != x)
9376 {
9377 fprintf (stderr,
9378 "\nrs6000_legitimize_address: mode %s, old code %s, "
9379 "new code %s, modified\n",
9380 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
9381 GET_RTX_NAME (GET_CODE (ret)));
9382
9383 fprintf (stderr, "Original address:\n");
9384 debug_rtx (x);
9385
9386 fprintf (stderr, "oldx:\n");
9387 debug_rtx (oldx);
9388
9389 fprintf (stderr, "New address:\n");
9390 debug_rtx (ret);
9391
9392 if (insns)
9393 {
9394 fprintf (stderr, "Insns added:\n");
9395 debug_rtx_list (insns, 20);
9396 }
9397 }
9398 else
9399 {
9400 fprintf (stderr,
9401 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
9402 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
9403
9404 debug_rtx (x);
9405 }
9406
9407 if (insns)
9408 emit_insn (insns);
9409
9410 return ret;
9411 }
9412
9413 /* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL.
9414 We need to emit DTP-relative relocations. */
9415
9416 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
9417 static void
9418 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
9419 {
9420 switch (size)
9421 {
9422 case 4:
9423 fputs ("\t.long\t", file);
9424 break;
9425 case 8:
9426 fputs (DOUBLE_INT_ASM_OP, file);
9427 break;
9428 default:
9429 gcc_unreachable ();
9430 }
9431 output_addr_const (file, x);
9432 if (TARGET_ELF)
9433 fputs ("@dtprel+0x8000", file);
9434 }
9435
9436 /* Return true if X is a symbol that refers to real (rather than emulated)
9437 TLS. */
9438
9439 static bool
9440 rs6000_real_tls_symbol_ref_p (rtx x)
9441 {
9442 return (SYMBOL_REF_P (x)
9443 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
9444 }
9445
9446 /* In the name of slightly smaller debug output, and to cater to
9447 general assembler lossage, recognize various UNSPEC sequences
9448 and turn them back into a direct symbol reference. */
9449
9450 static rtx
9451 rs6000_delegitimize_address (rtx orig_x)
9452 {
9453 rtx x, y, offset;
9454
9455 /* UNSPEC_FUSION_GPR is created by the peephole2 for power8 fusion. It
9456 encodes loading up the high part of the address of a TOC reference along
9457 with a load of a GPR using the same base register used for the load. We
9458 return the original SYMBOL_REF.
9459
9460 (set (reg:INT1 <reg>
9461 (unspec:INT1 [<combined-address>] UNSPEC_FUSION_GPR)))
9462
9463 UNSPEC_PCREL_OPT_LD_ADDR is used by the power10 PCREL_OPT pass. These
9464 UNSPECs include the external SYMBOL_REF along with the value being loaded.
9465 We return the original SYMBOL_REF.
9466
9467 (parallel [(set (reg:DI <base-reg>)
9468 (unspec:DI [(symbol_ref <symbol>)
9469 (const_int <marker>)]
9470 UNSPEC_PCREL_OPT_LD_ADDR))
9471 (set (reg:DI <load-reg>)
9472 (unspec:DI [(const_int 0)]
9473 UNSPEC_PCREL_OPT_LD_DATA))])
9474
9475 UNSPEC_PCREL_OPT_LD_SAME_REG is an alternative that is used if the
9476 GPR being loaded is the same as the GPR used to hold the external address.
9477
9478 (set (reg:DI <base-reg>)
9479 (unspec:DI [(symbol_ref <symbol>)
9480 (const_int <marker>)]
9481 UNSPEC_PCREL_OPT_LD_SAME_REG))
9482
9483 UNSPEC_PCREL_OPT_ST_ADDR is used by the power10 PCREL_OPT pass. This
9484 UNSPEC include the external SYMBOL_REF along with the value being loaded.
9485 We return the original SYMBOL_REF.
9486
9487 (parallel [(set (reg:DI <base-reg>)
9488 (unspec:DI [(symbol_ref <symbol>)
9489 (const_int <marker>)]
9490 UNSPEC_PCREL_OPT_ST_ADDR))
9491 (use (reg <store-reg>))]) */
9492
9493 if (GET_CODE (orig_x) == UNSPEC)
9494 switch (XINT (orig_x, 1))
9495 {
9496 case UNSPEC_FUSION_GPR:
9497 case UNSPEC_PCREL_OPT_LD_ADDR:
9498 case UNSPEC_PCREL_OPT_LD_SAME_REG:
9499 case UNSPEC_PCREL_OPT_ST_ADDR:
9500 orig_x = XVECEXP (orig_x, 0, 0);
9501 break;
9502
9503 default:
9504 break;
9505 }
9506
9507 orig_x = delegitimize_mem_from_attrs (orig_x);
9508
9509 x = orig_x;
9510 if (MEM_P (x))
9511 x = XEXP (x, 0);
9512
9513 y = x;
9514 if (TARGET_CMODEL != CMODEL_SMALL && GET_CODE (y) == LO_SUM)
9515 y = XEXP (y, 1);
9516
9517 offset = NULL_RTX;
9518 if (GET_CODE (y) == PLUS
9519 && GET_MODE (y) == Pmode
9520 && CONST_INT_P (XEXP (y, 1)))
9521 {
9522 offset = XEXP (y, 1);
9523 y = XEXP (y, 0);
9524 }
9525
9526 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_TOCREL)
9527 {
9528 y = XVECEXP (y, 0, 0);
9529
9530 #ifdef HAVE_AS_TLS
9531 /* Do not associate thread-local symbols with the original
9532 constant pool symbol. */
9533 if (TARGET_XCOFF
9534 && SYMBOL_REF_P (y)
9535 && CONSTANT_POOL_ADDRESS_P (y)
9536 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
9537 return orig_x;
9538 #endif
9539
9540 if (offset != NULL_RTX)
9541 y = gen_rtx_PLUS (Pmode, y, offset);
9542 if (!MEM_P (orig_x))
9543 return y;
9544 else
9545 return replace_equiv_address_nv (orig_x, y);
9546 }
9547
9548 if (TARGET_MACHO
9549 && GET_CODE (orig_x) == LO_SUM
9550 && GET_CODE (XEXP (orig_x, 1)) == CONST)
9551 {
9552 y = XEXP (XEXP (orig_x, 1), 0);
9553 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
9554 return XVECEXP (y, 0, 0);
9555 }
9556
9557 return orig_x;
9558 }
9559
9560 /* Return true if X shouldn't be emitted into the debug info.
9561 The linker doesn't like .toc section references from
9562 .debug_* sections, so reject .toc section symbols. */
9563
9564 static bool
9565 rs6000_const_not_ok_for_debug_p (rtx x)
9566 {
9567 if (GET_CODE (x) == UNSPEC)
9568 return true;
9569 if (SYMBOL_REF_P (x)
9570 && CONSTANT_POOL_ADDRESS_P (x))
9571 {
9572 rtx c = get_pool_constant (x);
9573 machine_mode cmode = get_pool_mode (x);
9574 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
9575 return true;
9576 }
9577
9578 return false;
9579 }
9580
9581 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
9582
9583 static bool
9584 rs6000_legitimate_combined_insn (rtx_insn *insn)
9585 {
9586 int icode = INSN_CODE (insn);
9587
9588 /* Reject creating doloop insns. Combine should not be allowed
9589 to create these for a number of reasons:
9590 1) In a nested loop, if combine creates one of these in an
9591 outer loop and the register allocator happens to allocate ctr
9592 to the outer loop insn, then the inner loop can't use ctr.
9593 Inner loops ought to be more highly optimized.
9594 2) Combine often wants to create one of these from what was
9595 originally a three insn sequence, first combining the three
9596 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not
9597 allocated ctr, the splitter takes use back to the three insn
9598 sequence. It's better to stop combine at the two insn
9599 sequence.
9600 3) Faced with not being able to allocate ctr for ctrsi/crtdi
9601 insns, the register allocator sometimes uses floating point
9602 or vector registers for the pseudo. Since ctrsi/ctrdi is a
9603 jump insn and output reloads are not implemented for jumps,
9604 the ctrsi/ctrdi splitters need to handle all possible cases.
9605 That's a pain, and it gets to be seriously difficult when a
9606 splitter that runs after reload needs memory to transfer from
9607 a gpr to fpr. See PR70098 and PR71763 which are not fixed
9608 for the difficult case. It's better to not create problems
9609 in the first place. */
9610 if (icode != CODE_FOR_nothing
9611 && (icode == CODE_FOR_bdz_si
9612 || icode == CODE_FOR_bdz_di
9613 || icode == CODE_FOR_bdnz_si
9614 || icode == CODE_FOR_bdnz_di
9615 || icode == CODE_FOR_bdztf_si
9616 || icode == CODE_FOR_bdztf_di
9617 || icode == CODE_FOR_bdnztf_si
9618 || icode == CODE_FOR_bdnztf_di))
9619 return false;
9620
9621 return true;
9622 }
9623
9624 /* Construct the SYMBOL_REF for the tls_get_addr function. */
9625
9626 static GTY(()) rtx rs6000_tls_symbol;
9627 static rtx
9628 rs6000_tls_get_addr (void)
9629 {
9630 if (!rs6000_tls_symbol)
9631 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
9632
9633 return rs6000_tls_symbol;
9634 }
9635
9636 /* Construct the SYMBOL_REF for TLS GOT references. */
9637
9638 static GTY(()) rtx rs6000_got_symbol;
9639 rtx
9640 rs6000_got_sym (void)
9641 {
9642 if (!rs6000_got_symbol)
9643 {
9644 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
9645 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
9646 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
9647 }
9648
9649 return rs6000_got_symbol;
9650 }
9651
9652 /* AIX Thread-Local Address support. */
9653
9654 static rtx
9655 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
9656 {
9657 rtx sym, mem, tocref, tlsreg, tmpreg, dest;
9658 const char *name;
9659 char *tlsname;
9660
9661 /* Place addr into TOC constant pool. */
9662 sym = force_const_mem (GET_MODE (addr), addr);
9663
9664 /* Output the TOC entry and create the MEM referencing the value. */
9665 if (constant_pool_expr_p (XEXP (sym, 0))
9666 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
9667 {
9668 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
9669 mem = gen_const_mem (Pmode, tocref);
9670 set_mem_alias_set (mem, get_TOC_alias_set ());
9671 }
9672 else
9673 return sym;
9674
9675 /* Use global-dynamic for local-dynamic. */
9676 if (model == TLS_MODEL_GLOBAL_DYNAMIC
9677 || model == TLS_MODEL_LOCAL_DYNAMIC)
9678 {
9679 /* Create new TOC reference for @m symbol. */
9680 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
9681 tlsname = XALLOCAVEC (char, strlen (name) + 1);
9682 strcpy (tlsname, "*LCM");
9683 strcat (tlsname, name + 3);
9684 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
9685 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
9686 tocref = create_TOC_reference (modaddr, NULL_RTX);
9687 rtx modmem = gen_const_mem (Pmode, tocref);
9688 set_mem_alias_set (modmem, get_TOC_alias_set ());
9689
9690 rtx modreg = gen_reg_rtx (Pmode);
9691 emit_insn (gen_rtx_SET (modreg, modmem));
9692
9693 tmpreg = gen_reg_rtx (Pmode);
9694 emit_insn (gen_rtx_SET (tmpreg, mem));
9695
9696 dest = gen_reg_rtx (Pmode);
9697 if (TARGET_32BIT)
9698 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
9699 else
9700 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
9701 return dest;
9702 }
9703 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
9704 else if (TARGET_32BIT)
9705 {
9706 tlsreg = gen_reg_rtx (SImode);
9707 emit_insn (gen_tls_get_tpointer (tlsreg));
9708 }
9709 else
9710 {
9711 tlsreg = gen_rtx_REG (DImode, 13);
9712 xcoff_tls_exec_model_detected = true;
9713 }
9714
9715 /* Load the TOC value into temporary register. */
9716 tmpreg = gen_reg_rtx (Pmode);
9717 emit_insn (gen_rtx_SET (tmpreg, mem));
9718 set_unique_reg_note (get_last_insn (), REG_EQUAL,
9719 gen_rtx_MINUS (Pmode, addr, tlsreg));
9720
9721 /* Add TOC symbol value to TLS pointer. */
9722 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
9723
9724 return dest;
9725 }
9726
9727 /* Passes the tls arg value for global dynamic and local dynamic
9728 emit_library_call_value in rs6000_legitimize_tls_address to
9729 rs6000_call_aix and rs6000_call_sysv. This is used to emit the
9730 marker relocs put on __tls_get_addr calls. */
9731 static rtx global_tlsarg;
9732
9733 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
9734 this (thread-local) address. */
9735
9736 static rtx
9737 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
9738 {
9739 rtx dest, insn;
9740
9741 if (TARGET_XCOFF)
9742 return rs6000_legitimize_tls_address_aix (addr, model);
9743
9744 dest = gen_reg_rtx (Pmode);
9745 if (model == TLS_MODEL_LOCAL_EXEC
9746 && (rs6000_tls_size == 16 || rs6000_pcrel_p ()))
9747 {
9748 rtx tlsreg;
9749
9750 if (TARGET_64BIT)
9751 {
9752 tlsreg = gen_rtx_REG (Pmode, 13);
9753 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
9754 }
9755 else
9756 {
9757 tlsreg = gen_rtx_REG (Pmode, 2);
9758 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
9759 }
9760 emit_insn (insn);
9761 }
9762 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
9763 {
9764 rtx tlsreg, tmp;
9765
9766 tmp = gen_reg_rtx (Pmode);
9767 if (TARGET_64BIT)
9768 {
9769 tlsreg = gen_rtx_REG (Pmode, 13);
9770 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
9771 }
9772 else
9773 {
9774 tlsreg = gen_rtx_REG (Pmode, 2);
9775 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
9776 }
9777 emit_insn (insn);
9778 if (TARGET_64BIT)
9779 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
9780 else
9781 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
9782 emit_insn (insn);
9783 }
9784 else
9785 {
9786 rtx got, tga, tmp1, tmp2;
9787
9788 /* We currently use relocations like @got@tlsgd for tls, which
9789 means the linker will handle allocation of tls entries, placing
9790 them in the .got section. So use a pointer to the .got section,
9791 not one to secondary TOC sections used by 64-bit -mminimal-toc,
9792 or to secondary GOT sections used by 32-bit -fPIC. */
9793 if (rs6000_pcrel_p ())
9794 got = const0_rtx;
9795 else if (TARGET_64BIT)
9796 got = gen_rtx_REG (Pmode, 2);
9797 else
9798 {
9799 if (flag_pic == 1)
9800 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
9801 else
9802 {
9803 rtx gsym = rs6000_got_sym ();
9804 got = gen_reg_rtx (Pmode);
9805 if (flag_pic == 0)
9806 rs6000_emit_move (got, gsym, Pmode);
9807 else
9808 {
9809 rtx mem, lab;
9810
9811 tmp1 = gen_reg_rtx (Pmode);
9812 tmp2 = gen_reg_rtx (Pmode);
9813 mem = gen_const_mem (Pmode, tmp1);
9814 lab = gen_label_rtx ();
9815 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
9816 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
9817 if (TARGET_LINK_STACK)
9818 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
9819 emit_move_insn (tmp2, mem);
9820 rtx_insn *last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
9821 set_unique_reg_note (last, REG_EQUAL, gsym);
9822 }
9823 }
9824 }
9825
9826 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
9827 {
9828 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addr, got),
9829 UNSPEC_TLSGD);
9830 tga = rs6000_tls_get_addr ();
9831 rtx argreg = gen_rtx_REG (Pmode, 3);
9832 emit_insn (gen_rtx_SET (argreg, arg));
9833 global_tlsarg = arg;
9834 emit_library_call_value (tga, dest, LCT_CONST, Pmode, argreg, Pmode);
9835 global_tlsarg = NULL_RTX;
9836
9837 /* Make a note so that the result of this call can be CSEd. */
9838 rtvec vec = gen_rtvec (1, copy_rtx (arg));
9839 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
9840 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
9841 }
9842 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
9843 {
9844 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got), UNSPEC_TLSLD);
9845 tga = rs6000_tls_get_addr ();
9846 tmp1 = gen_reg_rtx (Pmode);
9847 rtx argreg = gen_rtx_REG (Pmode, 3);
9848 emit_insn (gen_rtx_SET (argreg, arg));
9849 global_tlsarg = arg;
9850 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode, argreg, Pmode);
9851 global_tlsarg = NULL_RTX;
9852
9853 /* Make a note so that the result of this call can be CSEd. */
9854 rtvec vec = gen_rtvec (1, copy_rtx (arg));
9855 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
9856 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
9857
9858 if (rs6000_tls_size == 16 || rs6000_pcrel_p ())
9859 {
9860 if (TARGET_64BIT)
9861 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
9862 else
9863 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
9864 }
9865 else if (rs6000_tls_size == 32)
9866 {
9867 tmp2 = gen_reg_rtx (Pmode);
9868 if (TARGET_64BIT)
9869 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
9870 else
9871 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
9872 emit_insn (insn);
9873 if (TARGET_64BIT)
9874 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
9875 else
9876 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
9877 }
9878 else
9879 {
9880 tmp2 = gen_reg_rtx (Pmode);
9881 if (TARGET_64BIT)
9882 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
9883 else
9884 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
9885 emit_insn (insn);
9886 insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
9887 }
9888 emit_insn (insn);
9889 }
9890 else
9891 {
9892 /* IE, or 64-bit offset LE. */
9893 tmp2 = gen_reg_rtx (Pmode);
9894 if (TARGET_64BIT)
9895 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
9896 else
9897 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
9898 emit_insn (insn);
9899 if (rs6000_pcrel_p ())
9900 {
9901 if (TARGET_64BIT)
9902 insn = gen_tls_tls_pcrel_64 (dest, tmp2, addr);
9903 else
9904 insn = gen_tls_tls_pcrel_32 (dest, tmp2, addr);
9905 }
9906 else if (TARGET_64BIT)
9907 insn = gen_tls_tls_64 (dest, tmp2, addr);
9908 else
9909 insn = gen_tls_tls_32 (dest, tmp2, addr);
9910 emit_insn (insn);
9911 }
9912 }
9913
9914 return dest;
9915 }
9916
9917 /* Only create the global variable for the stack protect guard if we are using
9918 the global flavor of that guard. */
9919 static tree
9920 rs6000_init_stack_protect_guard (void)
9921 {
9922 if (rs6000_stack_protector_guard == SSP_GLOBAL)
9923 return default_stack_protect_guard ();
9924
9925 return NULL_TREE;
9926 }
9927
9928 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9929
9930 static bool
9931 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9932 {
9933 /* If GET_CODE (x) is HIGH, the 'X' represets the high part of a symbol_ref.
9934 It can not be put into a constant pool. e.g.
9935 (high:DI (unspec:DI [(symbol_ref/u:DI ("*.LC0")..)
9936 (high:DI (symbol_ref:DI ("var")..)). */
9937 if (GET_CODE (x) == HIGH)
9938 return true;
9939
9940 /* A TLS symbol in the TOC cannot contain a sum. */
9941 if (GET_CODE (x) == CONST
9942 && GET_CODE (XEXP (x, 0)) == PLUS
9943 && SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
9944 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
9945 return true;
9946
9947 /* Allow AIX TOC TLS symbols in the constant pool,
9948 but not ELF TLS symbols. */
9949 return TARGET_ELF && tls_referenced_p (x);
9950 }
9951
9952 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
9953 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
9954 can be addressed relative to the toc pointer. */
9955
9956 static bool
9957 use_toc_relative_ref (rtx sym, machine_mode mode)
9958 {
9959 return ((constant_pool_expr_p (sym)
9960 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
9961 get_pool_mode (sym)))
9962 || (TARGET_CMODEL == CMODEL_MEDIUM
9963 && SYMBOL_REF_LOCAL_P (sym)
9964 && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
9965 }
9966
9967 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
9968 that is a valid memory address for an instruction.
9969 The MODE argument is the machine mode for the MEM expression
9970 that wants to use this address.
9971
9972 On the RS/6000, there are four valid address: a SYMBOL_REF that
9973 refers to a constant pool entry of an address (or the sum of it
9974 plus a constant), a short (16-bit signed) constant plus a register,
9975 the sum of two registers, or a register indirect, possibly with an
9976 auto-increment. For DFmode, DDmode and DImode with a constant plus
9977 register, we must ensure that both words are addressable or PowerPC64
9978 with offset word aligned.
9979
9980 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
9981 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
9982 because adjacent memory cells are accessed by adding word-sized offsets
9983 during assembly output. */
9984 static bool
9985 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
9986 {
9987 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
9988 bool quad_offset_p = mode_supports_dq_form (mode);
9989
9990 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
9991 return 0;
9992
9993 /* Handle unaligned altivec lvx/stvx type addresses. */
9994 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
9995 && GET_CODE (x) == AND
9996 && CONST_INT_P (XEXP (x, 1))
9997 && INTVAL (XEXP (x, 1)) == -16)
9998 {
9999 x = XEXP (x, 0);
10000 return (legitimate_indirect_address_p (x, reg_ok_strict)
10001 || legitimate_indexed_address_p (x, reg_ok_strict)
10002 || virtual_stack_registers_memory_p (x));
10003 }
10004
10005 if (legitimate_indirect_address_p (x, reg_ok_strict))
10006 return 1;
10007 if (TARGET_UPDATE
10008 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
10009 && mode_supports_pre_incdec_p (mode)
10010 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
10011 return 1;
10012
10013 /* Handle prefixed addresses (PC-relative or 34-bit offset). */
10014 if (address_is_prefixed (x, mode, NON_PREFIXED_DEFAULT))
10015 return 1;
10016
10017 /* Handle restricted vector d-form offsets in ISA 3.0. */
10018 if (quad_offset_p)
10019 {
10020 if (quad_address_p (x, mode, reg_ok_strict))
10021 return 1;
10022 }
10023 else if (virtual_stack_registers_memory_p (x))
10024 return 1;
10025
10026 else if (reg_offset_p)
10027 {
10028 if (legitimate_small_data_p (mode, x))
10029 return 1;
10030 if (legitimate_constant_pool_address_p (x, mode,
10031 reg_ok_strict || lra_in_progress))
10032 return 1;
10033 }
10034
10035 /* For TImode, if we have TImode in VSX registers, only allow register
10036 indirect addresses. This will allow the values to go in either GPRs
10037 or VSX registers without reloading. The vector types would tend to
10038 go into VSX registers, so we allow REG+REG, while TImode seems
10039 somewhat split, in that some uses are GPR based, and some VSX based. */
10040 /* FIXME: We could loosen this by changing the following to
10041 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX)
10042 but currently we cannot allow REG+REG addressing for TImode. See
10043 PR72827 for complete details on how this ends up hoodwinking DSE. */
10044 if (mode == TImode && TARGET_VSX)
10045 return 0;
10046 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
10047 if (! reg_ok_strict
10048 && reg_offset_p
10049 && GET_CODE (x) == PLUS
10050 && REG_P (XEXP (x, 0))
10051 && (XEXP (x, 0) == virtual_stack_vars_rtx
10052 || XEXP (x, 0) == arg_pointer_rtx)
10053 && CONST_INT_P (XEXP (x, 1)))
10054 return 1;
10055 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
10056 return 1;
10057 if (!FLOAT128_2REG_P (mode)
10058 && (TARGET_HARD_FLOAT
10059 || TARGET_POWERPC64
10060 || (mode != DFmode && mode != DDmode))
10061 && (TARGET_POWERPC64 || mode != DImode)
10062 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
10063 && mode != PTImode
10064 && !avoiding_indexed_address_p (mode)
10065 && legitimate_indexed_address_p (x, reg_ok_strict))
10066 return 1;
10067 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
10068 && mode_supports_pre_modify_p (mode)
10069 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
10070 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
10071 reg_ok_strict, false)
10072 || (!avoiding_indexed_address_p (mode)
10073 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
10074 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
10075 {
10076 /* There is no prefixed version of the load/store with update. */
10077 rtx addr = XEXP (x, 1);
10078 return !address_is_prefixed (addr, mode, NON_PREFIXED_DEFAULT);
10079 }
10080 if (reg_offset_p && !quad_offset_p
10081 && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
10082 return 1;
10083 return 0;
10084 }
10085
10086 /* Debug version of rs6000_legitimate_address_p. */
10087 static bool
10088 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
10089 bool reg_ok_strict)
10090 {
10091 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
10092 fprintf (stderr,
10093 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
10094 "strict = %d, reload = %s, code = %s\n",
10095 ret ? "true" : "false",
10096 GET_MODE_NAME (mode),
10097 reg_ok_strict,
10098 (reload_completed ? "after" : "before"),
10099 GET_RTX_NAME (GET_CODE (x)));
10100 debug_rtx (x);
10101
10102 return ret;
10103 }
10104
10105 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
10106
10107 static bool
10108 rs6000_mode_dependent_address_p (const_rtx addr,
10109 addr_space_t as ATTRIBUTE_UNUSED)
10110 {
10111 return rs6000_mode_dependent_address_ptr (addr);
10112 }
10113
10114 /* Go to LABEL if ADDR (a legitimate address expression)
10115 has an effect that depends on the machine mode it is used for.
10116
10117 On the RS/6000 this is true of all integral offsets (since AltiVec
10118 and VSX modes don't allow them) or is a pre-increment or decrement.
10119
10120 ??? Except that due to conceptual problems in offsettable_address_p
10121 we can't really report the problems of integral offsets. So leave
10122 this assuming that the adjustable offset must be valid for the
10123 sub-words of a TFmode operand, which is what we had before. */
10124
10125 static bool
10126 rs6000_mode_dependent_address (const_rtx addr)
10127 {
10128 switch (GET_CODE (addr))
10129 {
10130 case PLUS:
10131 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
10132 is considered a legitimate address before reload, so there
10133 are no offset restrictions in that case. Note that this
10134 condition is safe in strict mode because any address involving
10135 virtual_stack_vars_rtx or arg_pointer_rtx would already have
10136 been rejected as illegitimate. */
10137 if (XEXP (addr, 0) != virtual_stack_vars_rtx
10138 && XEXP (addr, 0) != arg_pointer_rtx
10139 && CONST_INT_P (XEXP (addr, 1)))
10140 {
10141 HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
10142 HOST_WIDE_INT extra = TARGET_POWERPC64 ? 8 : 12;
10143 if (TARGET_PREFIXED)
10144 return !SIGNED_34BIT_OFFSET_EXTRA_P (val, extra);
10145 else
10146 return !SIGNED_16BIT_OFFSET_EXTRA_P (val, extra);
10147 }
10148 break;
10149
10150 case LO_SUM:
10151 /* Anything in the constant pool is sufficiently aligned that
10152 all bytes have the same high part address. */
10153 return !legitimate_constant_pool_address_p (addr, QImode, false);
10154
10155 /* Auto-increment cases are now treated generically in recog.cc. */
10156 case PRE_MODIFY:
10157 return TARGET_UPDATE;
10158
10159 /* AND is only allowed in Altivec loads. */
10160 case AND:
10161 return true;
10162
10163 default:
10164 break;
10165 }
10166
10167 return false;
10168 }
10169
10170 /* Debug version of rs6000_mode_dependent_address. */
10171 static bool
10172 rs6000_debug_mode_dependent_address (const_rtx addr)
10173 {
10174 bool ret = rs6000_mode_dependent_address (addr);
10175
10176 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
10177 ret ? "true" : "false");
10178 debug_rtx (addr);
10179
10180 return ret;
10181 }
10182
10183 /* Implement FIND_BASE_TERM. */
10184
10185 rtx
10186 rs6000_find_base_term (rtx op)
10187 {
10188 rtx base;
10189
10190 base = op;
10191 if (GET_CODE (base) == CONST)
10192 base = XEXP (base, 0);
10193 if (GET_CODE (base) == PLUS)
10194 base = XEXP (base, 0);
10195 if (GET_CODE (base) == UNSPEC)
10196 switch (XINT (base, 1))
10197 {
10198 case UNSPEC_TOCREL:
10199 case UNSPEC_MACHOPIC_OFFSET:
10200 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
10201 for aliasing purposes. */
10202 return XVECEXP (base, 0, 0);
10203 }
10204
10205 return op;
10206 }
10207
10208 /* More elaborate version of recog's offsettable_memref_p predicate
10209 that works around the ??? note of rs6000_mode_dependent_address.
10210 In particular it accepts
10211
10212 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
10213
10214 in 32-bit mode, that the recog predicate rejects. */
10215
10216 static bool
10217 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode, bool strict)
10218 {
10219 bool worst_case;
10220
10221 if (!MEM_P (op))
10222 return false;
10223
10224 /* First mimic offsettable_memref_p. */
10225 if (offsettable_address_p (strict, GET_MODE (op), XEXP (op, 0)))
10226 return true;
10227
10228 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
10229 the latter predicate knows nothing about the mode of the memory
10230 reference and, therefore, assumes that it is the largest supported
10231 mode (TFmode). As a consequence, legitimate offsettable memory
10232 references are rejected. rs6000_legitimate_offset_address_p contains
10233 the correct logic for the PLUS case of rs6000_mode_dependent_address,
10234 at least with a little bit of help here given that we know the
10235 actual registers used. */
10236 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
10237 || GET_MODE_SIZE (reg_mode) == 4);
10238 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
10239 strict, worst_case);
10240 }
10241
10242 /* Determine the reassociation width to be used in reassociate_bb.
10243 This takes into account how many parallel operations we
10244 can actually do of a given type, and also the latency.
10245 P8:
10246 int add/sub 6/cycle
10247 mul 2/cycle
10248 vect add/sub/mul 2/cycle
10249 fp add/sub/mul 2/cycle
10250 dfp 1/cycle
10251 */
10252
10253 static int
10254 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
10255 machine_mode mode)
10256 {
10257 switch (rs6000_tune)
10258 {
10259 case PROCESSOR_POWER8:
10260 case PROCESSOR_POWER9:
10261 case PROCESSOR_POWER10:
10262 case PROCESSOR_FUTURE:
10263 if (DECIMAL_FLOAT_MODE_P (mode))
10264 return 1;
10265 if (VECTOR_MODE_P (mode))
10266 return 4;
10267 if (INTEGRAL_MODE_P (mode))
10268 return 1;
10269 if (FLOAT_MODE_P (mode))
10270 return 4;
10271 break;
10272 default:
10273 break;
10274 }
10275 return 1;
10276 }
10277
10278 /* Change register usage conditional on target flags. */
10279 static void
10280 rs6000_conditional_register_usage (void)
10281 {
10282 int i;
10283
10284 if (TARGET_DEBUG_TARGET)
10285 fprintf (stderr, "rs6000_conditional_register_usage called\n");
10286
10287 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
10288 if (TARGET_64BIT)
10289 fixed_regs[13] = call_used_regs[13] = 1;
10290
10291 /* Conditionally disable FPRs. */
10292 if (TARGET_SOFT_FLOAT)
10293 for (i = 32; i < 64; i++)
10294 fixed_regs[i] = call_used_regs[i] = 1;
10295
10296 /* The TOC register is not killed across calls in a way that is
10297 visible to the compiler. */
10298 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
10299 call_used_regs[2] = 0;
10300
10301 if (DEFAULT_ABI == ABI_V4 && flag_pic == 2)
10302 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10303
10304 if (DEFAULT_ABI == ABI_V4 && flag_pic == 1)
10305 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10306 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10307
10308 if (DEFAULT_ABI == ABI_DARWIN && flag_pic)
10309 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10310 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10311
10312 if (TARGET_TOC && TARGET_MINIMAL_TOC)
10313 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10314
10315 if (!TARGET_ALTIVEC && !TARGET_VSX)
10316 {
10317 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
10318 fixed_regs[i] = call_used_regs[i] = 1;
10319 call_used_regs[VRSAVE_REGNO] = 1;
10320 }
10321
10322 if (TARGET_ALTIVEC || TARGET_VSX)
10323 global_regs[VSCR_REGNO] = 1;
10324
10325 if (TARGET_ALTIVEC_ABI)
10326 {
10327 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
10328 call_used_regs[i] = 1;
10329
10330 /* AIX reserves VR20:31 in non-extended ABI mode. */
10331 if (TARGET_XCOFF && !rs6000_aix_extabi)
10332 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
10333 fixed_regs[i] = call_used_regs[i] = 1;
10334 }
10335 }
10336
10337 \f
10338 /* Output insns to set DEST equal to the constant SOURCE as a series of
10339 lis, ori and shl instructions and return TRUE. */
10340
10341 bool
10342 rs6000_emit_set_const (rtx dest, rtx source)
10343 {
10344 machine_mode mode = GET_MODE (dest);
10345 rtx temp, set;
10346 rtx_insn *insn;
10347 HOST_WIDE_INT c;
10348
10349 gcc_checking_assert (CONST_INT_P (source));
10350 c = INTVAL (source);
10351 switch (mode)
10352 {
10353 case E_QImode:
10354 case E_HImode:
10355 emit_insn (gen_rtx_SET (dest, source));
10356 return true;
10357
10358 case E_SImode:
10359 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
10360
10361 emit_insn (gen_rtx_SET (temp, GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
10362 emit_insn (gen_rtx_SET (dest,
10363 gen_rtx_IOR (SImode, temp,
10364 GEN_INT (c & 0xffff))));
10365 break;
10366
10367 case E_DImode:
10368 if (!TARGET_POWERPC64)
10369 {
10370 rtx hi, lo;
10371
10372 hi = operand_subword_force (dest, WORDS_BIG_ENDIAN == 0, DImode);
10373 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0, DImode);
10374 emit_move_insn (hi, GEN_INT (c >> 32));
10375 c = sext_hwi (c, 32);
10376 emit_move_insn (lo, GEN_INT (c));
10377 }
10378 else
10379 rs6000_emit_set_long_const (dest, c);
10380 break;
10381
10382 default:
10383 gcc_unreachable ();
10384 }
10385
10386 insn = get_last_insn ();
10387 set = single_set (insn);
10388 if (! CONSTANT_P (SET_SRC (set)))
10389 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
10390
10391 return true;
10392 }
10393
10394 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
10395 Output insns to set DEST equal to the constant C as a series of
10396 lis, ori and shl instructions. */
10397
10398 static void
10399 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
10400 {
10401 rtx temp;
10402 HOST_WIDE_INT ud1, ud2, ud3, ud4;
10403
10404 ud1 = c & 0xffff;
10405 c = c >> 16;
10406 ud2 = c & 0xffff;
10407 c = c >> 16;
10408 ud3 = c & 0xffff;
10409 c = c >> 16;
10410 ud4 = c & 0xffff;
10411
10412 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
10413 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
10414 emit_move_insn (dest, GEN_INT (sext_hwi (ud1, 16)));
10415
10416 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
10417 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
10418 {
10419 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10420
10421 emit_move_insn (ud1 != 0 ? temp : dest,
10422 GEN_INT (sext_hwi (ud2 << 16, 32)));
10423 if (ud1 != 0)
10424 emit_move_insn (dest, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
10425 }
10426 else if (ud4 == 0xffff && ud3 == 0xffff && (ud1 & 0x8000))
10427 {
10428 /* li; xoris */
10429 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10430 emit_move_insn (temp, GEN_INT (sext_hwi (ud1, 16)));
10431 emit_move_insn (dest, gen_rtx_XOR (DImode, temp,
10432 GEN_INT ((ud2 ^ 0xffff) << 16)));
10433 }
10434 else if (ud3 == 0 && ud4 == 0)
10435 {
10436 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10437
10438 gcc_assert (ud2 & 0x8000);
10439
10440 if (ud1 == 0)
10441 {
10442 /* lis; rldicl */
10443 emit_move_insn (temp, GEN_INT (sext_hwi (ud2 << 16, 32)));
10444 emit_move_insn (dest,
10445 gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff)));
10446 }
10447 else if (!(ud1 & 0x8000))
10448 {
10449 /* li; oris */
10450 emit_move_insn (temp, GEN_INT (ud1));
10451 emit_move_insn (dest,
10452 gen_rtx_IOR (DImode, temp, GEN_INT (ud2 << 16)));
10453 }
10454 else
10455 {
10456 /* lis; ori; rldicl */
10457 emit_move_insn (temp, GEN_INT (sext_hwi (ud2 << 16, 32)));
10458 emit_move_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
10459 emit_move_insn (dest,
10460 gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff)));
10461 }
10462 }
10463 else if (ud1 == ud3 && ud2 == ud4)
10464 {
10465 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10466 HOST_WIDE_INT num = (ud2 << 16) | ud1;
10467 rs6000_emit_set_long_const (temp, sext_hwi (num, 32));
10468 rtx one = gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff));
10469 rtx two = gen_rtx_ASHIFT (DImode, temp, GEN_INT (32));
10470 emit_move_insn (dest, gen_rtx_IOR (DImode, one, two));
10471 }
10472 else if ((ud4 == 0xffff && (ud3 & 0x8000))
10473 || (ud4 == 0 && ! (ud3 & 0x8000)))
10474 {
10475 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10476
10477 emit_move_insn (temp, GEN_INT (sext_hwi (ud3 << 16, 32)));
10478 if (ud2 != 0)
10479 emit_move_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud2)));
10480 emit_move_insn (ud1 != 0 ? temp : dest,
10481 gen_rtx_ASHIFT (DImode, temp, GEN_INT (16)));
10482 if (ud1 != 0)
10483 emit_move_insn (dest, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
10484 }
10485 else if (TARGET_PREFIXED)
10486 {
10487 if (can_create_pseudo_p ())
10488 {
10489 /* pli A,L + pli B,H + rldimi A,B,32,0. */
10490 temp = gen_reg_rtx (DImode);
10491 rtx temp1 = gen_reg_rtx (DImode);
10492 emit_move_insn (temp, GEN_INT ((ud4 << 16) | ud3));
10493 emit_move_insn (temp1, GEN_INT ((ud2 << 16) | ud1));
10494
10495 emit_insn (gen_rotldi3_insert_3 (dest, temp, GEN_INT (32), temp1,
10496 GEN_INT (0xffffffff)));
10497 }
10498 else
10499 {
10500 /* pli A,H + sldi A,32 + paddi A,A,L. */
10501 emit_move_insn (dest, GEN_INT ((ud4 << 16) | ud3));
10502
10503 emit_move_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32)));
10504
10505 bool can_use_paddi = REGNO (dest) != FIRST_GPR_REGNO;
10506
10507 /* Use paddi for the low 32 bits. */
10508 if (ud2 != 0 && ud1 != 0 && can_use_paddi)
10509 emit_move_insn (dest, gen_rtx_PLUS (DImode, dest,
10510 GEN_INT ((ud2 << 16) | ud1)));
10511
10512 /* Use oris, ori for low 32 bits. */
10513 if (ud2 != 0 && (ud1 == 0 || !can_use_paddi))
10514 emit_move_insn (dest,
10515 gen_rtx_IOR (DImode, dest, GEN_INT (ud2 << 16)));
10516 if (ud1 != 0 && (ud2 == 0 || !can_use_paddi))
10517 emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1)));
10518 }
10519 }
10520 else
10521 {
10522 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10523
10524 emit_move_insn (temp, GEN_INT (sext_hwi (ud4 << 16, 32)));
10525 if (ud3 != 0)
10526 emit_move_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud3)));
10527
10528 emit_move_insn (ud2 != 0 || ud1 != 0 ? temp : dest,
10529 gen_rtx_ASHIFT (DImode, temp, GEN_INT (32)));
10530 if (ud2 != 0)
10531 emit_move_insn (ud1 != 0 ? temp : dest,
10532 gen_rtx_IOR (DImode, temp, GEN_INT (ud2 << 16)));
10533 if (ud1 != 0)
10534 emit_move_insn (dest, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
10535 }
10536 }
10537
10538 /* Helper for the following. Get rid of [r+r] memory refs
10539 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
10540
10541 static void
10542 rs6000_eliminate_indexed_memrefs (rtx operands[2])
10543 {
10544 if (MEM_P (operands[0])
10545 && !REG_P (XEXP (operands[0], 0))
10546 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
10547 GET_MODE (operands[0]), false))
10548 operands[0]
10549 = replace_equiv_address (operands[0],
10550 copy_addr_to_reg (XEXP (operands[0], 0)));
10551
10552 if (MEM_P (operands[1])
10553 && !REG_P (XEXP (operands[1], 0))
10554 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
10555 GET_MODE (operands[1]), false))
10556 operands[1]
10557 = replace_equiv_address (operands[1],
10558 copy_addr_to_reg (XEXP (operands[1], 0)));
10559 }
10560
10561 /* Generate a vector of constants to permute MODE for a little-endian
10562 storage operation by swapping the two halves of a vector. */
10563 static rtvec
10564 rs6000_const_vec (machine_mode mode)
10565 {
10566 int i, subparts;
10567 rtvec v;
10568
10569 switch (mode)
10570 {
10571 case E_V1TImode:
10572 subparts = 1;
10573 break;
10574 case E_V2DFmode:
10575 case E_V2DImode:
10576 subparts = 2;
10577 break;
10578 case E_V4SFmode:
10579 case E_V4SImode:
10580 subparts = 4;
10581 break;
10582 case E_V8HImode:
10583 subparts = 8;
10584 break;
10585 case E_V16QImode:
10586 subparts = 16;
10587 break;
10588 default:
10589 gcc_unreachable();
10590 }
10591
10592 v = rtvec_alloc (subparts);
10593
10594 for (i = 0; i < subparts / 2; ++i)
10595 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
10596 for (i = subparts / 2; i < subparts; ++i)
10597 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
10598
10599 return v;
10600 }
10601
10602 /* Emit an lxvd2x, stxvd2x, or xxpermdi instruction for a VSX load or
10603 store operation. */
10604 void
10605 rs6000_emit_le_vsx_permute (rtx dest, rtx source, machine_mode mode)
10606 {
10607 gcc_assert (!altivec_indexed_or_indirect_operand (dest, mode));
10608 gcc_assert (!altivec_indexed_or_indirect_operand (source, mode));
10609
10610 /* Scalar permutations are easier to express in integer modes rather than
10611 floating-point modes, so cast them here. We use V1TImode instead
10612 of TImode to ensure that the values don't go through GPRs. */
10613 if (FLOAT128_VECTOR_P (mode))
10614 {
10615 dest = gen_lowpart (V1TImode, dest);
10616 source = gen_lowpart (V1TImode, source);
10617 mode = V1TImode;
10618 }
10619
10620 /* Use ROTATE instead of VEC_SELECT if the mode contains only a single
10621 scalar. */
10622 if (mode == TImode || mode == V1TImode)
10623 emit_insn (gen_rtx_SET (dest, gen_rtx_ROTATE (mode, source,
10624 GEN_INT (64))));
10625 else
10626 {
10627 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
10628 emit_insn (gen_rtx_SET (dest, gen_rtx_VEC_SELECT (mode, source, par)));
10629 }
10630 }
10631
10632 /* Emit a little-endian load from vector memory location SOURCE to VSX
10633 register DEST in mode MODE. The load is done with two permuting
10634 insn's that represent an lxvd2x and xxpermdi. */
10635 void
10636 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
10637 {
10638 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
10639 V1TImode). */
10640 if (mode == TImode || mode == V1TImode)
10641 {
10642 mode = V2DImode;
10643 dest = gen_lowpart (V2DImode, dest);
10644 source = adjust_address (source, V2DImode, 0);
10645 }
10646
10647 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
10648 rs6000_emit_le_vsx_permute (tmp, source, mode);
10649 rs6000_emit_le_vsx_permute (dest, tmp, mode);
10650 }
10651
10652 /* Emit a little-endian store to vector memory location DEST from VSX
10653 register SOURCE in mode MODE. The store is done with two permuting
10654 insn's that represent an xxpermdi and an stxvd2x. */
10655 void
10656 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
10657 {
10658 /* This should never be called after LRA. */
10659 gcc_assert (can_create_pseudo_p ());
10660
10661 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
10662 V1TImode). */
10663 if (mode == TImode || mode == V1TImode)
10664 {
10665 mode = V2DImode;
10666 dest = adjust_address (dest, V2DImode, 0);
10667 source = gen_lowpart (V2DImode, source);
10668 }
10669
10670 rtx tmp = gen_reg_rtx_and_attrs (source);
10671 rs6000_emit_le_vsx_permute (tmp, source, mode);
10672 rs6000_emit_le_vsx_permute (dest, tmp, mode);
10673 }
10674
10675 /* Emit a sequence representing a little-endian VSX load or store,
10676 moving data from SOURCE to DEST in mode MODE. This is done
10677 separately from rs6000_emit_move to ensure it is called only
10678 during expand. LE VSX loads and stores introduced later are
10679 handled with a split. The expand-time RTL generation allows
10680 us to optimize away redundant pairs of register-permutes. */
10681 void
10682 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
10683 {
10684 gcc_assert (!BYTES_BIG_ENDIAN
10685 && VECTOR_MEM_VSX_P (mode)
10686 && !TARGET_P9_VECTOR
10687 && !gpr_or_gpr_p (dest, source)
10688 && (MEM_P (source) ^ MEM_P (dest)));
10689
10690 if (MEM_P (source))
10691 {
10692 gcc_assert (REG_P (dest) || SUBREG_P (dest));
10693 rs6000_emit_le_vsx_load (dest, source, mode);
10694 }
10695 else
10696 {
10697 if (!REG_P (source))
10698 source = force_reg (mode, source);
10699 rs6000_emit_le_vsx_store (dest, source, mode);
10700 }
10701 }
10702
10703 /* Return whether a SFmode or SImode move can be done without converting one
10704 mode to another. This arrises when we have:
10705
10706 (SUBREG:SF (REG:SI ...))
10707 (SUBREG:SI (REG:SF ...))
10708
10709 and one of the values is in a floating point/vector register, where SFmode
10710 scalars are stored in DFmode format. */
10711
10712 bool
10713 valid_sf_si_move (rtx dest, rtx src, machine_mode mode)
10714 {
10715 if (TARGET_ALLOW_SF_SUBREG)
10716 return true;
10717
10718 if (mode != SFmode && GET_MODE_CLASS (mode) != MODE_INT)
10719 return true;
10720
10721 if (!SUBREG_P (src) || !sf_subreg_operand (src, mode))
10722 return true;
10723
10724 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */
10725 if (SUBREG_P (dest))
10726 {
10727 rtx dest_subreg = SUBREG_REG (dest);
10728 rtx src_subreg = SUBREG_REG (src);
10729 return GET_MODE (dest_subreg) == GET_MODE (src_subreg);
10730 }
10731
10732 return false;
10733 }
10734
10735
10736 /* Helper function to change moves with:
10737
10738 (SUBREG:SF (REG:SI)) and
10739 (SUBREG:SI (REG:SF))
10740
10741 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode
10742 values are stored as DFmode values in the VSX registers. We need to convert
10743 the bits before we can use a direct move or operate on the bits in the
10744 vector register as an integer type.
10745
10746 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */
10747
10748 static bool
10749 rs6000_emit_move_si_sf_subreg (rtx dest, rtx source, machine_mode mode)
10750 {
10751 if (TARGET_DIRECT_MOVE_64BIT && !reload_completed
10752 && (!SUBREG_P (dest) || !sf_subreg_operand (dest, mode))
10753 && SUBREG_P (source) && sf_subreg_operand (source, mode))
10754 {
10755 rtx inner_source = SUBREG_REG (source);
10756 machine_mode inner_mode = GET_MODE (inner_source);
10757
10758 if (mode == SImode && inner_mode == SFmode)
10759 {
10760 emit_insn (gen_movsi_from_sf (dest, inner_source));
10761 return true;
10762 }
10763
10764 if (mode == SFmode && inner_mode == SImode)
10765 {
10766 emit_insn (gen_movsf_from_si (dest, inner_source));
10767 return true;
10768 }
10769 }
10770
10771 return false;
10772 }
10773
10774 /* Emit a move from SOURCE to DEST in mode MODE. */
10775 void
10776 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
10777 {
10778 rtx operands[2];
10779 operands[0] = dest;
10780 operands[1] = source;
10781
10782 if (TARGET_DEBUG_ADDR)
10783 {
10784 fprintf (stderr,
10785 "\nrs6000_emit_move: mode = %s, lra_in_progress = %d, "
10786 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
10787 GET_MODE_NAME (mode),
10788 lra_in_progress,
10789 reload_completed,
10790 can_create_pseudo_p ());
10791 debug_rtx (dest);
10792 fprintf (stderr, "source:\n");
10793 debug_rtx (source);
10794 }
10795
10796 /* Check that we get CONST_WIDE_INT only when we should. */
10797 if (CONST_WIDE_INT_P (operands[1])
10798 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
10799 gcc_unreachable ();
10800
10801 #ifdef HAVE_AS_GNU_ATTRIBUTE
10802 /* If we use a long double type, set the flags in .gnu_attribute that say
10803 what the long double type is. This is to allow the linker's warning
10804 message for the wrong long double to be useful, even if the function does
10805 not do a call (for example, doing a 128-bit add on power9 if the long
10806 double type is IEEE 128-bit. Do not set this if __ibm128 or __floa128 are
10807 used if they aren't the default long dobule type. */
10808 if (rs6000_gnu_attr && (HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT))
10809 {
10810 if (TARGET_LONG_DOUBLE_128 && (mode == TFmode || mode == TCmode))
10811 rs6000_passes_float = rs6000_passes_long_double = true;
10812
10813 else if (!TARGET_LONG_DOUBLE_128 && (mode == DFmode || mode == DCmode))
10814 rs6000_passes_float = rs6000_passes_long_double = true;
10815 }
10816 #endif
10817
10818 /* See if we need to special case SImode/SFmode SUBREG moves. */
10819 if ((mode == SImode || mode == SFmode) && SUBREG_P (source)
10820 && rs6000_emit_move_si_sf_subreg (dest, source, mode))
10821 return;
10822
10823 /* Check if GCC is setting up a block move that will end up using FP
10824 registers as temporaries. We must make sure this is acceptable. */
10825 if (MEM_P (operands[0])
10826 && MEM_P (operands[1])
10827 && mode == DImode
10828 && (rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[0]))
10829 || rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[1])))
10830 && ! (rs6000_slow_unaligned_access (SImode,
10831 (MEM_ALIGN (operands[0]) > 32
10832 ? 32 : MEM_ALIGN (operands[0])))
10833 || rs6000_slow_unaligned_access (SImode,
10834 (MEM_ALIGN (operands[1]) > 32
10835 ? 32 : MEM_ALIGN (operands[1]))))
10836 && ! MEM_VOLATILE_P (operands [0])
10837 && ! MEM_VOLATILE_P (operands [1]))
10838 {
10839 emit_move_insn (adjust_address (operands[0], SImode, 0),
10840 adjust_address (operands[1], SImode, 0));
10841 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
10842 adjust_address (copy_rtx (operands[1]), SImode, 4));
10843 return;
10844 }
10845
10846 if (can_create_pseudo_p () && MEM_P (operands[0])
10847 && !gpc_reg_operand (operands[1], mode))
10848 operands[1] = force_reg (mode, operands[1]);
10849
10850 /* Recognize the case where operand[1] is a reference to thread-local
10851 data and load its address to a register. */
10852 if (tls_referenced_p (operands[1]))
10853 {
10854 enum tls_model model;
10855 rtx tmp = operands[1];
10856 rtx addend = NULL;
10857
10858 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
10859 {
10860 addend = XEXP (XEXP (tmp, 0), 1);
10861 tmp = XEXP (XEXP (tmp, 0), 0);
10862 }
10863
10864 gcc_assert (SYMBOL_REF_P (tmp));
10865 model = SYMBOL_REF_TLS_MODEL (tmp);
10866 gcc_assert (model != 0);
10867
10868 tmp = rs6000_legitimize_tls_address (tmp, model);
10869 if (addend)
10870 {
10871 tmp = gen_rtx_PLUS (mode, tmp, addend);
10872 tmp = force_operand (tmp, operands[0]);
10873 }
10874 operands[1] = tmp;
10875 }
10876
10877 /* 128-bit constant floating-point values on Darwin should really be loaded
10878 as two parts. However, this premature splitting is a problem when DFmode
10879 values can go into Altivec registers. */
10880 if (TARGET_MACHO && CONST_DOUBLE_P (operands[1]) && FLOAT128_IBM_P (mode)
10881 && !reg_addr[DFmode].scalar_in_vmx_p)
10882 {
10883 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
10884 simplify_gen_subreg (DFmode, operands[1], mode, 0),
10885 DFmode);
10886 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
10887 GET_MODE_SIZE (DFmode)),
10888 simplify_gen_subreg (DFmode, operands[1], mode,
10889 GET_MODE_SIZE (DFmode)),
10890 DFmode);
10891 return;
10892 }
10893
10894 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
10895 p1:SD) if p1 is not of floating point class and p0 is spilled as
10896 we can have no analogous movsd_store for this. */
10897 if (lra_in_progress && mode == DDmode
10898 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
10899 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
10900 && SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1]))
10901 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
10902 {
10903 enum reg_class cl;
10904 int regno = REGNO (SUBREG_REG (operands[1]));
10905
10906 if (!HARD_REGISTER_NUM_P (regno))
10907 {
10908 cl = reg_preferred_class (regno);
10909 regno = reg_renumber[regno];
10910 if (regno < 0)
10911 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
10912 }
10913 if (regno >= 0 && ! FP_REGNO_P (regno))
10914 {
10915 mode = SDmode;
10916 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
10917 operands[1] = SUBREG_REG (operands[1]);
10918 }
10919 }
10920 if (lra_in_progress
10921 && mode == SDmode
10922 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
10923 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
10924 && (REG_P (operands[1])
10925 || (SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1])))))
10926 {
10927 int regno = reg_or_subregno (operands[1]);
10928 enum reg_class cl;
10929
10930 if (!HARD_REGISTER_NUM_P (regno))
10931 {
10932 cl = reg_preferred_class (regno);
10933 gcc_assert (cl != NO_REGS);
10934 regno = reg_renumber[regno];
10935 if (regno < 0)
10936 regno = ira_class_hard_regs[cl][0];
10937 }
10938 if (FP_REGNO_P (regno))
10939 {
10940 if (GET_MODE (operands[0]) != DDmode)
10941 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
10942 emit_insn (gen_movsd_store (operands[0], operands[1]));
10943 }
10944 else if (INT_REGNO_P (regno))
10945 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
10946 else
10947 gcc_unreachable();
10948 return;
10949 }
10950 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
10951 p:DD)) if p0 is not of floating point class and p1 is spilled as
10952 we can have no analogous movsd_load for this. */
10953 if (lra_in_progress && mode == DDmode
10954 && SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))
10955 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
10956 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
10957 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
10958 {
10959 enum reg_class cl;
10960 int regno = REGNO (SUBREG_REG (operands[0]));
10961
10962 if (!HARD_REGISTER_NUM_P (regno))
10963 {
10964 cl = reg_preferred_class (regno);
10965 regno = reg_renumber[regno];
10966 if (regno < 0)
10967 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
10968 }
10969 if (regno >= 0 && ! FP_REGNO_P (regno))
10970 {
10971 mode = SDmode;
10972 operands[0] = SUBREG_REG (operands[0]);
10973 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
10974 }
10975 }
10976 if (lra_in_progress
10977 && mode == SDmode
10978 && (REG_P (operands[0])
10979 || (SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))))
10980 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
10981 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
10982 {
10983 int regno = reg_or_subregno (operands[0]);
10984 enum reg_class cl;
10985
10986 if (!HARD_REGISTER_NUM_P (regno))
10987 {
10988 cl = reg_preferred_class (regno);
10989 gcc_assert (cl != NO_REGS);
10990 regno = reg_renumber[regno];
10991 if (regno < 0)
10992 regno = ira_class_hard_regs[cl][0];
10993 }
10994 if (FP_REGNO_P (regno))
10995 {
10996 if (GET_MODE (operands[1]) != DDmode)
10997 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
10998 emit_insn (gen_movsd_load (operands[0], operands[1]));
10999 }
11000 else if (INT_REGNO_P (regno))
11001 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
11002 else
11003 gcc_unreachable();
11004 return;
11005 }
11006
11007 /* FIXME: In the long term, this switch statement should go away
11008 and be replaced by a sequence of tests based on things like
11009 mode == Pmode. */
11010 switch (mode)
11011 {
11012 case E_HImode:
11013 case E_QImode:
11014 if (CONSTANT_P (operands[1])
11015 && !CONST_INT_P (operands[1]))
11016 operands[1] = force_const_mem (mode, operands[1]);
11017 break;
11018
11019 case E_TFmode:
11020 case E_TDmode:
11021 case E_IFmode:
11022 case E_KFmode:
11023 if (FLOAT128_2REG_P (mode))
11024 rs6000_eliminate_indexed_memrefs (operands);
11025 /* fall through */
11026
11027 case E_DFmode:
11028 case E_DDmode:
11029 case E_SFmode:
11030 case E_SDmode:
11031 if (CONSTANT_P (operands[1])
11032 && ! easy_fp_constant (operands[1], mode))
11033 operands[1] = force_const_mem (mode, operands[1]);
11034 break;
11035
11036 case E_V16QImode:
11037 case E_V8HImode:
11038 case E_V4SFmode:
11039 case E_V4SImode:
11040 case E_V2DFmode:
11041 case E_V2DImode:
11042 case E_V1TImode:
11043 if (CONSTANT_P (operands[1])
11044 && !easy_vector_constant (operands[1], mode))
11045 operands[1] = force_const_mem (mode, operands[1]);
11046 break;
11047
11048 case E_OOmode:
11049 case E_XOmode:
11050 if (CONST_INT_P (operands[1]) && INTVAL (operands[1]) != 0)
11051 error ("%qs is an opaque type, and you cannot set it to other values",
11052 (mode == OOmode) ? "__vector_pair" : "__vector_quad");
11053 break;
11054
11055 case E_TDOmode:
11056 if (CONST_INT_P (operands[1]))
11057 error ("%qs is an opaque type, and you cannot set it to constants",
11058 "__dmr");
11059 break;
11060
11061 case E_SImode:
11062 case E_DImode:
11063 /* Use default pattern for address of ELF small data */
11064 if (TARGET_ELF
11065 && mode == Pmode
11066 && DEFAULT_ABI == ABI_V4
11067 && (SYMBOL_REF_P (operands[1])
11068 || GET_CODE (operands[1]) == CONST)
11069 && small_data_operand (operands[1], mode))
11070 {
11071 emit_insn (gen_rtx_SET (operands[0], operands[1]));
11072 return;
11073 }
11074
11075 /* Use the default pattern for loading up PC-relative addresses. */
11076 if (TARGET_PCREL && mode == Pmode
11077 && pcrel_local_or_external_address (operands[1], Pmode))
11078 {
11079 emit_insn (gen_rtx_SET (operands[0], operands[1]));
11080 return;
11081 }
11082
11083 if (DEFAULT_ABI == ABI_V4
11084 && mode == Pmode && mode == SImode
11085 && flag_pic == 1 && got_operand (operands[1], mode))
11086 {
11087 emit_insn (gen_movsi_got (operands[0], operands[1]));
11088 return;
11089 }
11090
11091 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
11092 && TARGET_NO_TOC_OR_PCREL
11093 && ! flag_pic
11094 && mode == Pmode
11095 && CONSTANT_P (operands[1])
11096 && GET_CODE (operands[1]) != HIGH
11097 && !CONST_INT_P (operands[1]))
11098 {
11099 rtx target = (!can_create_pseudo_p ()
11100 ? operands[0]
11101 : gen_reg_rtx (mode));
11102
11103 /* If this is a function address on -mcall-aixdesc,
11104 convert it to the address of the descriptor. */
11105 if (DEFAULT_ABI == ABI_AIX
11106 && SYMBOL_REF_P (operands[1])
11107 && XSTR (operands[1], 0)[0] == '.')
11108 {
11109 const char *name = XSTR (operands[1], 0);
11110 rtx new_ref;
11111 while (*name == '.')
11112 name++;
11113 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
11114 CONSTANT_POOL_ADDRESS_P (new_ref)
11115 = CONSTANT_POOL_ADDRESS_P (operands[1]);
11116 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
11117 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
11118 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
11119 operands[1] = new_ref;
11120 }
11121
11122 if (DEFAULT_ABI == ABI_DARWIN)
11123 {
11124 #if TARGET_MACHO
11125 /* This is not PIC code, but could require the subset of
11126 indirections used by mdynamic-no-pic. */
11127 if (MACHO_DYNAMIC_NO_PIC_P)
11128 {
11129 /* Take care of any required data indirection. */
11130 operands[1] = rs6000_machopic_legitimize_pic_address (
11131 operands[1], mode, operands[0]);
11132 if (operands[0] != operands[1])
11133 emit_insn (gen_rtx_SET (operands[0], operands[1]));
11134 return;
11135 }
11136 #endif
11137 emit_insn (gen_macho_high (Pmode, target, operands[1]));
11138 emit_insn (gen_macho_low (Pmode, operands[0],
11139 target, operands[1]));
11140 return;
11141 }
11142
11143 emit_insn (gen_elf_high (target, operands[1]));
11144 emit_insn (gen_elf_low (operands[0], target, operands[1]));
11145 return;
11146 }
11147
11148 /* If this is a SYMBOL_REF that refers to a constant pool entry,
11149 and we have put it in the TOC, we just need to make a TOC-relative
11150 reference to it. */
11151 if (TARGET_TOC
11152 && SYMBOL_REF_P (operands[1])
11153 && use_toc_relative_ref (operands[1], mode))
11154 operands[1] = create_TOC_reference (operands[1], operands[0]);
11155 else if (mode == Pmode
11156 && CONSTANT_P (operands[1])
11157 && GET_CODE (operands[1]) != HIGH
11158 && ((REG_P (operands[0])
11159 && FP_REGNO_P (REGNO (operands[0])))
11160 || !CONST_INT_P (operands[1])
11161 || (num_insns_constant (operands[1], mode)
11162 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
11163 && !toc_relative_expr_p (operands[1], false, NULL, NULL)
11164 && (TARGET_CMODEL == CMODEL_SMALL
11165 || can_create_pseudo_p ()
11166 || (REG_P (operands[0])
11167 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
11168 {
11169
11170 #if TARGET_MACHO
11171 /* Darwin uses a special PIC legitimizer. */
11172 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
11173 {
11174 operands[1] =
11175 rs6000_machopic_legitimize_pic_address (operands[1], mode,
11176 operands[0]);
11177 if (operands[0] != operands[1])
11178 emit_insn (gen_rtx_SET (operands[0], operands[1]));
11179 return;
11180 }
11181 #endif
11182
11183 /* If we are to limit the number of things we put in the TOC and
11184 this is a symbol plus a constant we can add in one insn,
11185 just put the symbol in the TOC and add the constant. */
11186 if (GET_CODE (operands[1]) == CONST
11187 && TARGET_NO_SUM_IN_TOC
11188 && GET_CODE (XEXP (operands[1], 0)) == PLUS
11189 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
11190 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
11191 || SYMBOL_REF_P (XEXP (XEXP (operands[1], 0), 0)))
11192 && ! side_effects_p (operands[0]))
11193 {
11194 rtx sym =
11195 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
11196 rtx other = XEXP (XEXP (operands[1], 0), 1);
11197
11198 sym = force_reg (mode, sym);
11199 emit_insn (gen_add3_insn (operands[0], sym, other));
11200 return;
11201 }
11202
11203 operands[1] = force_const_mem (mode, operands[1]);
11204
11205 if (TARGET_TOC
11206 && SYMBOL_REF_P (XEXP (operands[1], 0))
11207 && use_toc_relative_ref (XEXP (operands[1], 0), mode))
11208 {
11209 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
11210 operands[0]);
11211 operands[1] = gen_const_mem (mode, tocref);
11212 set_mem_alias_set (operands[1], get_TOC_alias_set ());
11213 }
11214 }
11215 break;
11216
11217 case E_TImode:
11218 if (!VECTOR_MEM_VSX_P (TImode))
11219 rs6000_eliminate_indexed_memrefs (operands);
11220 break;
11221
11222 case E_PTImode:
11223 rs6000_eliminate_indexed_memrefs (operands);
11224 break;
11225
11226 default:
11227 fatal_insn ("bad move", gen_rtx_SET (dest, source));
11228 }
11229
11230 /* Above, we may have called force_const_mem which may have returned
11231 an invalid address. If we can, fix this up; otherwise, reload will
11232 have to deal with it. */
11233 if (MEM_P (operands[1]))
11234 operands[1] = validize_mem (operands[1]);
11235
11236 emit_insn (gen_rtx_SET (operands[0], operands[1]));
11237 }
11238 \f
11239
11240 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
11241 static void
11242 init_float128_ibm (machine_mode mode)
11243 {
11244 if (!TARGET_XL_COMPAT)
11245 {
11246 set_optab_libfunc (add_optab, mode, "__gcc_qadd");
11247 set_optab_libfunc (sub_optab, mode, "__gcc_qsub");
11248 set_optab_libfunc (smul_optab, mode, "__gcc_qmul");
11249 set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv");
11250
11251 if (!TARGET_HARD_FLOAT)
11252 {
11253 set_optab_libfunc (neg_optab, mode, "__gcc_qneg");
11254 set_optab_libfunc (eq_optab, mode, "__gcc_qeq");
11255 set_optab_libfunc (ne_optab, mode, "__gcc_qne");
11256 set_optab_libfunc (gt_optab, mode, "__gcc_qgt");
11257 set_optab_libfunc (ge_optab, mode, "__gcc_qge");
11258 set_optab_libfunc (lt_optab, mode, "__gcc_qlt");
11259 set_optab_libfunc (le_optab, mode, "__gcc_qle");
11260 set_optab_libfunc (unord_optab, mode, "__gcc_qunord");
11261
11262 set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq");
11263 set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq");
11264 set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos");
11265 set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod");
11266 set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi");
11267 set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou");
11268 set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq");
11269 set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq");
11270 }
11271 }
11272 else
11273 {
11274 set_optab_libfunc (add_optab, mode, "_xlqadd");
11275 set_optab_libfunc (sub_optab, mode, "_xlqsub");
11276 set_optab_libfunc (smul_optab, mode, "_xlqmul");
11277 set_optab_libfunc (sdiv_optab, mode, "_xlqdiv");
11278 }
11279
11280 /* Add various conversions for IFmode to use the traditional TFmode
11281 names. */
11282 if (mode == IFmode)
11283 {
11284 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf");
11285 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf");
11286 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdtf");
11287 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd");
11288 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd");
11289 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtftd");
11290
11291 set_conv_libfunc (sfix_optab, DImode, mode, "__fixtfdi");
11292 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunstfdi");
11293
11294 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatditf");
11295 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatunditf");
11296
11297 if (TARGET_POWERPC64)
11298 {
11299 set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti");
11300 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti");
11301 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf");
11302 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf");
11303 }
11304 }
11305 }
11306
11307 /* Set up IEEE 128-bit floating point routines. Use different names if the
11308 arguments can be passed in a vector register. The historical PowerPC
11309 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
11310 continue to use that if we aren't using vector registers to pass IEEE
11311 128-bit floating point. */
11312
11313 static void
11314 init_float128_ieee (machine_mode mode)
11315 {
11316 if (FLOAT128_VECTOR_P (mode))
11317 {
11318 set_optab_libfunc (add_optab, mode, "__addkf3");
11319 set_optab_libfunc (sub_optab, mode, "__subkf3");
11320 set_optab_libfunc (neg_optab, mode, "__negkf2");
11321 set_optab_libfunc (smul_optab, mode, "__mulkf3");
11322 set_optab_libfunc (sdiv_optab, mode, "__divkf3");
11323 set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2");
11324 set_optab_libfunc (abs_optab, mode, "__abskf2");
11325 set_optab_libfunc (powi_optab, mode, "__powikf2");
11326
11327 set_optab_libfunc (eq_optab, mode, "__eqkf2");
11328 set_optab_libfunc (ne_optab, mode, "__nekf2");
11329 set_optab_libfunc (gt_optab, mode, "__gtkf2");
11330 set_optab_libfunc (ge_optab, mode, "__gekf2");
11331 set_optab_libfunc (lt_optab, mode, "__ltkf2");
11332 set_optab_libfunc (le_optab, mode, "__lekf2");
11333 set_optab_libfunc (unord_optab, mode, "__unordkf2");
11334
11335 set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2");
11336 set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2");
11337 set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2");
11338 set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2");
11339
11340 set_conv_libfunc (sext_optab, mode, IFmode, "__trunctfkf2");
11341 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
11342 set_conv_libfunc (sext_optab, mode, TFmode, "__trunctfkf2");
11343
11344 set_conv_libfunc (trunc_optab, IFmode, mode, "__extendkftf2");
11345 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
11346 set_conv_libfunc (trunc_optab, TFmode, mode, "__extendkftf2");
11347
11348 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf");
11349 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf");
11350 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdkf");
11351 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd");
11352 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd");
11353 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendkftd");
11354
11355 set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi");
11356 set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi");
11357 set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi");
11358 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi");
11359
11360 set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf");
11361 set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf");
11362 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf");
11363 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf");
11364
11365 if (TARGET_POWERPC64)
11366 {
11367 set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti_sw");
11368 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti_sw");
11369 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf_sw");
11370 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf_sw");
11371 }
11372 }
11373
11374 else
11375 {
11376 set_optab_libfunc (add_optab, mode, "_q_add");
11377 set_optab_libfunc (sub_optab, mode, "_q_sub");
11378 set_optab_libfunc (neg_optab, mode, "_q_neg");
11379 set_optab_libfunc (smul_optab, mode, "_q_mul");
11380 set_optab_libfunc (sdiv_optab, mode, "_q_div");
11381 if (TARGET_PPC_GPOPT)
11382 set_optab_libfunc (sqrt_optab, mode, "_q_sqrt");
11383
11384 set_optab_libfunc (eq_optab, mode, "_q_feq");
11385 set_optab_libfunc (ne_optab, mode, "_q_fne");
11386 set_optab_libfunc (gt_optab, mode, "_q_fgt");
11387 set_optab_libfunc (ge_optab, mode, "_q_fge");
11388 set_optab_libfunc (lt_optab, mode, "_q_flt");
11389 set_optab_libfunc (le_optab, mode, "_q_fle");
11390
11391 set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq");
11392 set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq");
11393 set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos");
11394 set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod");
11395 set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi");
11396 set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou");
11397 set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq");
11398 set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq");
11399 }
11400 }
11401
11402 static void
11403 rs6000_init_libfuncs (void)
11404 {
11405 /* __float128 support. */
11406 if (TARGET_FLOAT128_TYPE)
11407 {
11408 init_float128_ibm (IFmode);
11409 init_float128_ieee (KFmode);
11410 }
11411
11412 /* AIX/Darwin/64-bit Linux quad floating point routines. */
11413 if (TARGET_LONG_DOUBLE_128)
11414 {
11415 if (!TARGET_IEEEQUAD)
11416 init_float128_ibm (TFmode);
11417
11418 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
11419 else
11420 init_float128_ieee (TFmode);
11421 }
11422 }
11423
11424 /* Emit a potentially record-form instruction, setting DST from SRC.
11425 If DOT is 0, that is all; otherwise, set CCREG to the result of the
11426 signed comparison of DST with zero. If DOT is 1, the generated RTL
11427 doesn't care about the DST result; if DOT is 2, it does. If CCREG
11428 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
11429 a separate COMPARE. */
11430
11431 void
11432 rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
11433 {
11434 if (dot == 0)
11435 {
11436 emit_move_insn (dst, src);
11437 return;
11438 }
11439
11440 if (cc_reg_not_cr0_operand (ccreg, CCmode))
11441 {
11442 emit_move_insn (dst, src);
11443 emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx));
11444 return;
11445 }
11446
11447 rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx));
11448 if (dot == 1)
11449 {
11450 rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst);
11451 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber)));
11452 }
11453 else
11454 {
11455 rtx set = gen_rtx_SET (dst, src);
11456 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set)));
11457 }
11458 }
11459
11460 \f
11461 /* A validation routine: say whether CODE, a condition code, and MODE
11462 match. The other alternatives either don't make sense or should
11463 never be generated. */
11464
11465 void
11466 validate_condition_mode (enum rtx_code code, machine_mode mode)
11467 {
11468 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
11469 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
11470 && GET_MODE_CLASS (mode) == MODE_CC);
11471
11472 /* These don't make sense. */
11473 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
11474 || mode != CCUNSmode);
11475
11476 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
11477 || mode == CCUNSmode);
11478
11479 gcc_assert (mode == CCFPmode
11480 || (code != ORDERED && code != UNORDERED
11481 && code != UNEQ && code != LTGT
11482 && code != UNGT && code != UNLT
11483 && code != UNGE && code != UNLE));
11484
11485 /* These are invalid; the information is not there. */
11486 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
11487 }
11488
11489 \f
11490 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
11491 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
11492 not zero, store there the bit offset (counted from the right) where
11493 the single stretch of 1 bits begins; and similarly for B, the bit
11494 offset where it ends. */
11495
11496 bool
11497 rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
11498 {
11499 unsigned HOST_WIDE_INT val = INTVAL (mask);
11500 unsigned HOST_WIDE_INT bit;
11501 int nb, ne;
11502 int n = GET_MODE_PRECISION (mode);
11503
11504 if (mode != DImode && mode != SImode)
11505 return false;
11506
11507 if (INTVAL (mask) >= 0)
11508 {
11509 bit = val & -val;
11510 ne = exact_log2 (bit);
11511 nb = exact_log2 (val + bit);
11512 }
11513 else if (val + 1 == 0)
11514 {
11515 nb = n;
11516 ne = 0;
11517 }
11518 else if (val & 1)
11519 {
11520 val = ~val;
11521 bit = val & -val;
11522 nb = exact_log2 (bit);
11523 ne = exact_log2 (val + bit);
11524 }
11525 else
11526 {
11527 bit = val & -val;
11528 ne = exact_log2 (bit);
11529 if (val + bit == 0)
11530 nb = n;
11531 else
11532 nb = 0;
11533 }
11534
11535 nb--;
11536
11537 if (nb < 0 || ne < 0 || nb >= n || ne >= n)
11538 return false;
11539
11540 if (b)
11541 *b = nb;
11542 if (e)
11543 *e = ne;
11544
11545 return true;
11546 }
11547
11548 bool
11549 rs6000_is_valid_rotate_dot_mask (rtx mask, machine_mode mode)
11550 {
11551 int nb, ne;
11552 return rs6000_is_valid_mask (mask, &nb, &ne, mode) && nb >= ne && ne > 0;
11553 }
11554
11555 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
11556 or rldicr instruction, to implement an AND with it in mode MODE. */
11557
11558 bool
11559 rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
11560 {
11561 int nb, ne;
11562
11563 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
11564 return false;
11565
11566 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
11567 does not wrap. */
11568 if (mode == DImode)
11569 return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
11570
11571 /* For SImode, rlwinm can do everything. */
11572 if (mode == SImode)
11573 return (nb < 32 && ne < 32);
11574
11575 return false;
11576 }
11577
11578 /* Return the instruction template for an AND with mask in mode MODE, with
11579 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11580
11581 const char *
11582 rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
11583 {
11584 int nb, ne;
11585
11586 if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
11587 gcc_unreachable ();
11588
11589 if (mode == DImode && ne == 0)
11590 {
11591 operands[3] = GEN_INT (63 - nb);
11592 if (dot)
11593 return "rldicl. %0,%1,0,%3";
11594 return "rldicl %0,%1,0,%3";
11595 }
11596
11597 if (mode == DImode && nb == 63)
11598 {
11599 operands[3] = GEN_INT (63 - ne);
11600 if (dot)
11601 return "rldicr. %0,%1,0,%3";
11602 return "rldicr %0,%1,0,%3";
11603 }
11604
11605 if (nb < 32 && ne < 32)
11606 {
11607 operands[3] = GEN_INT (31 - nb);
11608 operands[4] = GEN_INT (31 - ne);
11609 if (dot)
11610 return "rlwinm. %0,%1,0,%3,%4";
11611 return "rlwinm %0,%1,0,%3,%4";
11612 }
11613
11614 gcc_unreachable ();
11615 }
11616
11617 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
11618 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
11619 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
11620
11621 bool
11622 rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
11623 {
11624 int nb, ne;
11625
11626 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
11627 return false;
11628
11629 int n = GET_MODE_PRECISION (mode);
11630 int sh = -1;
11631
11632 if (CONST_INT_P (XEXP (shift, 1)))
11633 {
11634 sh = INTVAL (XEXP (shift, 1));
11635 if (sh < 0 || sh >= n)
11636 return false;
11637 }
11638
11639 rtx_code code = GET_CODE (shift);
11640
11641 /* Convert any shift by 0 to a rotate, to simplify below code. */
11642 if (sh == 0)
11643 code = ROTATE;
11644
11645 /* Convert rotate to simple shift if we can, to make analysis simpler. */
11646 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
11647 code = ASHIFT;
11648 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
11649 {
11650 code = LSHIFTRT;
11651 sh = n - sh;
11652 }
11653
11654 /* DImode rotates need rld*. */
11655 if (mode == DImode && code == ROTATE)
11656 return (nb == 63 || ne == 0 || ne == sh);
11657
11658 /* SImode rotates need rlw*. */
11659 if (mode == SImode && code == ROTATE)
11660 return (nb < 32 && ne < 32 && sh < 32);
11661
11662 /* Wrap-around masks are only okay for rotates. */
11663 if (ne > nb)
11664 return false;
11665
11666 /* Variable shifts are only okay for rotates. */
11667 if (sh < 0)
11668 return false;
11669
11670 /* Don't allow ASHIFT if the mask is wrong for that. */
11671 if (code == ASHIFT && ne < sh)
11672 return false;
11673
11674 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
11675 if the mask is wrong for that. */
11676 if (nb < 32 && ne < 32 && sh < 32
11677 && !(code == LSHIFTRT && nb >= 32 - sh))
11678 return true;
11679
11680 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
11681 if the mask is wrong for that. */
11682 if (code == LSHIFTRT)
11683 sh = 64 - sh;
11684 if (nb == 63 || ne == 0 || ne == sh)
11685 return !(code == LSHIFTRT && nb >= sh);
11686
11687 return false;
11688 }
11689
11690 /* Return the instruction template for a shift with mask in mode MODE, with
11691 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11692
11693 const char *
11694 rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
11695 {
11696 int nb, ne;
11697
11698 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
11699 gcc_unreachable ();
11700
11701 if (mode == DImode && ne == 0)
11702 {
11703 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
11704 operands[2] = GEN_INT (64 - INTVAL (operands[2]));
11705 operands[3] = GEN_INT (63 - nb);
11706 if (dot)
11707 return "rld%I2cl. %0,%1,%2,%3";
11708 return "rld%I2cl %0,%1,%2,%3";
11709 }
11710
11711 if (mode == DImode && nb == 63)
11712 {
11713 operands[3] = GEN_INT (63 - ne);
11714 if (dot)
11715 return "rld%I2cr. %0,%1,%2,%3";
11716 return "rld%I2cr %0,%1,%2,%3";
11717 }
11718
11719 if (mode == DImode
11720 && GET_CODE (operands[4]) != LSHIFTRT
11721 && CONST_INT_P (operands[2])
11722 && ne == INTVAL (operands[2]))
11723 {
11724 operands[3] = GEN_INT (63 - nb);
11725 if (dot)
11726 return "rld%I2c. %0,%1,%2,%3";
11727 return "rld%I2c %0,%1,%2,%3";
11728 }
11729
11730 if (nb < 32 && ne < 32)
11731 {
11732 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
11733 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
11734 operands[3] = GEN_INT (31 - nb);
11735 operands[4] = GEN_INT (31 - ne);
11736 /* This insn can also be a 64-bit rotate with mask that really makes
11737 it just a shift right (with mask); the %h below are to adjust for
11738 that situation (shift count is >= 32 in that case). */
11739 if (dot)
11740 return "rlw%I2nm. %0,%1,%h2,%3,%4";
11741 return "rlw%I2nm %0,%1,%h2,%3,%4";
11742 }
11743
11744 gcc_unreachable ();
11745 }
11746
11747 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
11748 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
11749 ASHIFT, or LSHIFTRT) in mode MODE. */
11750
11751 bool
11752 rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
11753 {
11754 int nb, ne;
11755
11756 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
11757 return false;
11758
11759 int n = GET_MODE_PRECISION (mode);
11760
11761 int sh = INTVAL (XEXP (shift, 1));
11762 if (sh < 0 || sh >= n)
11763 return false;
11764
11765 rtx_code code = GET_CODE (shift);
11766
11767 /* Convert any shift by 0 to a rotate, to simplify below code. */
11768 if (sh == 0)
11769 code = ROTATE;
11770
11771 /* Convert rotate to simple shift if we can, to make analysis simpler. */
11772 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
11773 code = ASHIFT;
11774 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
11775 {
11776 code = LSHIFTRT;
11777 sh = n - sh;
11778 }
11779
11780 /* DImode rotates need rldimi. */
11781 if (mode == DImode && code == ROTATE)
11782 return (ne == sh);
11783
11784 /* SImode rotates need rlwimi. */
11785 if (mode == SImode && code == ROTATE)
11786 return (nb < 32 && ne < 32 && sh < 32);
11787
11788 /* Wrap-around masks are only okay for rotates. */
11789 if (ne > nb)
11790 return false;
11791
11792 /* Don't allow ASHIFT if the mask is wrong for that. */
11793 if (code == ASHIFT && ne < sh)
11794 return false;
11795
11796 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
11797 if the mask is wrong for that. */
11798 if (nb < 32 && ne < 32 && sh < 32
11799 && !(code == LSHIFTRT && nb >= 32 - sh))
11800 return true;
11801
11802 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
11803 if the mask is wrong for that. */
11804 if (code == LSHIFTRT)
11805 sh = 64 - sh;
11806 if (ne == sh)
11807 return !(code == LSHIFTRT && nb >= sh);
11808
11809 return false;
11810 }
11811
11812 /* Return the instruction template for an insert with mask in mode MODE, with
11813 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11814
11815 const char *
11816 rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
11817 {
11818 int nb, ne;
11819
11820 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
11821 gcc_unreachable ();
11822
11823 /* Prefer rldimi because rlwimi is cracked. */
11824 if (TARGET_POWERPC64
11825 && (!dot || mode == DImode)
11826 && GET_CODE (operands[4]) != LSHIFTRT
11827 && ne == INTVAL (operands[2]))
11828 {
11829 operands[3] = GEN_INT (63 - nb);
11830 if (dot)
11831 return "rldimi. %0,%1,%2,%3";
11832 return "rldimi %0,%1,%2,%3";
11833 }
11834
11835 if (nb < 32 && ne < 32)
11836 {
11837 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
11838 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
11839 operands[3] = GEN_INT (31 - nb);
11840 operands[4] = GEN_INT (31 - ne);
11841 if (dot)
11842 return "rlwimi. %0,%1,%2,%3,%4";
11843 return "rlwimi %0,%1,%2,%3,%4";
11844 }
11845
11846 gcc_unreachable ();
11847 }
11848
11849 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
11850 using two machine instructions. */
11851
11852 bool
11853 rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
11854 {
11855 /* There are two kinds of AND we can handle with two insns:
11856 1) those we can do with two rl* insn;
11857 2) ori[s];xori[s].
11858
11859 We do not handle that last case yet. */
11860
11861 /* If there is just one stretch of ones, we can do it. */
11862 if (rs6000_is_valid_mask (c, NULL, NULL, mode))
11863 return true;
11864
11865 /* Otherwise, fill in the lowest "hole"; if we can do the result with
11866 one insn, we can do the whole thing with two. */
11867 unsigned HOST_WIDE_INT val = INTVAL (c);
11868 unsigned HOST_WIDE_INT bit1 = val & -val;
11869 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
11870 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
11871 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
11872 return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
11873 }
11874
11875 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
11876 If EXPAND is true, split rotate-and-mask instructions we generate to
11877 their constituent parts as well (this is used during expand); if DOT
11878 is 1, make the last insn a record-form instruction clobbering the
11879 destination GPR and setting the CC reg (from operands[3]); if 2, set
11880 that GPR as well as the CC reg. */
11881
11882 void
11883 rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot)
11884 {
11885 gcc_assert (!(expand && dot));
11886
11887 unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
11888
11889 /* If it is one stretch of ones, it is DImode; shift left, mask, then
11890 shift right. This generates better code than doing the masks without
11891 shifts, or shifting first right and then left. */
11892 int nb, ne;
11893 if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
11894 {
11895 gcc_assert (mode == DImode);
11896
11897 int shift = 63 - nb;
11898 if (expand)
11899 {
11900 rtx tmp1 = gen_reg_rtx (DImode);
11901 rtx tmp2 = gen_reg_rtx (DImode);
11902 emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
11903 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
11904 emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
11905 }
11906 else
11907 {
11908 rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
11909 tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
11910 emit_move_insn (operands[0], tmp);
11911 tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift));
11912 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
11913 }
11914 return;
11915 }
11916
11917 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
11918 that does the rest. */
11919 unsigned HOST_WIDE_INT bit1 = val & -val;
11920 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
11921 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
11922 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
11923
11924 unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
11925 unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
11926
11927 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
11928
11929 /* Two "no-rotate"-and-mask instructions, for SImode. */
11930 if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
11931 {
11932 gcc_assert (mode == SImode);
11933
11934 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
11935 rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
11936 emit_move_insn (reg, tmp);
11937 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
11938 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
11939 return;
11940 }
11941
11942 gcc_assert (mode == DImode);
11943
11944 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
11945 insns; we have to do the first in SImode, because it wraps. */
11946 if (mask2 <= 0xffffffff
11947 && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
11948 {
11949 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
11950 rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
11951 GEN_INT (mask1));
11952 rtx reg_low = gen_lowpart (SImode, reg);
11953 emit_move_insn (reg_low, tmp);
11954 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
11955 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
11956 return;
11957 }
11958
11959 /* Two rld* insns: rotate, clear the hole in the middle (which now is
11960 at the top end), rotate back and clear the other hole. */
11961 int right = exact_log2 (bit3);
11962 int left = 64 - right;
11963
11964 /* Rotate the mask too. */
11965 mask1 = (mask1 >> right) | ((bit2 - 1) << left);
11966
11967 if (expand)
11968 {
11969 rtx tmp1 = gen_reg_rtx (DImode);
11970 rtx tmp2 = gen_reg_rtx (DImode);
11971 rtx tmp3 = gen_reg_rtx (DImode);
11972 emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
11973 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
11974 emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
11975 emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
11976 }
11977 else
11978 {
11979 rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
11980 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
11981 emit_move_insn (operands[0], tmp);
11982 tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
11983 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
11984 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
11985 }
11986 }
11987 \f
11988 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
11989 for lfq and stfq insns iff the registers are hard registers. */
11990
11991 int
11992 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
11993 {
11994 /* We might have been passed a SUBREG. */
11995 if (!REG_P (reg1) || !REG_P (reg2))
11996 return 0;
11997
11998 /* We might have been passed non floating point registers. */
11999 if (!FP_REGNO_P (REGNO (reg1))
12000 || !FP_REGNO_P (REGNO (reg2)))
12001 return 0;
12002
12003 return (REGNO (reg1) == REGNO (reg2) - 1);
12004 }
12005
12006 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
12007 addr1 and addr2 must be in consecutive memory locations
12008 (addr2 == addr1 + 8). */
12009
12010 int
12011 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
12012 {
12013 rtx addr1, addr2;
12014 unsigned int reg1, reg2;
12015 int offset1, offset2;
12016
12017 /* The mems cannot be volatile. */
12018 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
12019 return 0;
12020
12021 addr1 = XEXP (mem1, 0);
12022 addr2 = XEXP (mem2, 0);
12023
12024 /* Extract an offset (if used) from the first addr. */
12025 if (GET_CODE (addr1) == PLUS)
12026 {
12027 /* If not a REG, return zero. */
12028 if (!REG_P (XEXP (addr1, 0)))
12029 return 0;
12030 else
12031 {
12032 reg1 = REGNO (XEXP (addr1, 0));
12033 /* The offset must be constant! */
12034 if (!CONST_INT_P (XEXP (addr1, 1)))
12035 return 0;
12036 offset1 = INTVAL (XEXP (addr1, 1));
12037 }
12038 }
12039 else if (!REG_P (addr1))
12040 return 0;
12041 else
12042 {
12043 reg1 = REGNO (addr1);
12044 /* This was a simple (mem (reg)) expression. Offset is 0. */
12045 offset1 = 0;
12046 }
12047
12048 /* And now for the second addr. */
12049 if (GET_CODE (addr2) == PLUS)
12050 {
12051 /* If not a REG, return zero. */
12052 if (!REG_P (XEXP (addr2, 0)))
12053 return 0;
12054 else
12055 {
12056 reg2 = REGNO (XEXP (addr2, 0));
12057 /* The offset must be constant. */
12058 if (!CONST_INT_P (XEXP (addr2, 1)))
12059 return 0;
12060 offset2 = INTVAL (XEXP (addr2, 1));
12061 }
12062 }
12063 else if (!REG_P (addr2))
12064 return 0;
12065 else
12066 {
12067 reg2 = REGNO (addr2);
12068 /* This was a simple (mem (reg)) expression. Offset is 0. */
12069 offset2 = 0;
12070 }
12071
12072 /* Both of these must have the same base register. */
12073 if (reg1 != reg2)
12074 return 0;
12075
12076 /* The offset for the second addr must be 8 more than the first addr. */
12077 if (offset2 != offset1 + 8)
12078 return 0;
12079
12080 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
12081 instructions. */
12082 return 1;
12083 }
12084 \f
12085 /* Implement TARGET_SECONDARY_RELOAD_NEEDED_MODE. For SDmode values we
12086 need to use DDmode, in all other cases we can use the same mode. */
12087 static machine_mode
12088 rs6000_secondary_memory_needed_mode (machine_mode mode)
12089 {
12090 if (lra_in_progress && mode == SDmode)
12091 return DDmode;
12092 return mode;
12093 }
12094
12095 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
12096 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
12097 only work on the traditional altivec registers, note if an altivec register
12098 was chosen. */
12099
12100 static enum rs6000_reg_type
12101 register_to_reg_type (rtx reg, bool *is_altivec)
12102 {
12103 HOST_WIDE_INT regno;
12104 enum reg_class rclass;
12105
12106 if (SUBREG_P (reg))
12107 reg = SUBREG_REG (reg);
12108
12109 if (!REG_P (reg))
12110 return NO_REG_TYPE;
12111
12112 regno = REGNO (reg);
12113 if (!HARD_REGISTER_NUM_P (regno))
12114 {
12115 if (!lra_in_progress && !reload_completed)
12116 return PSEUDO_REG_TYPE;
12117
12118 regno = true_regnum (reg);
12119 if (regno < 0 || !HARD_REGISTER_NUM_P (regno))
12120 return PSEUDO_REG_TYPE;
12121 }
12122
12123 gcc_assert (regno >= 0);
12124
12125 if (is_altivec && ALTIVEC_REGNO_P (regno))
12126 *is_altivec = true;
12127
12128 rclass = rs6000_regno_regclass[regno];
12129 return reg_class_to_reg_type[(int)rclass];
12130 }
12131
12132 /* Helper function to return the cost of adding a TOC entry address. */
12133
12134 static inline int
12135 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
12136 {
12137 int ret;
12138
12139 if (TARGET_CMODEL != CMODEL_SMALL)
12140 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
12141
12142 else
12143 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
12144
12145 return ret;
12146 }
12147
12148 /* Helper function for rs6000_secondary_reload to determine whether the memory
12149 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
12150 needs reloading. Return negative if the memory is not handled by the memory
12151 helper functions and to try a different reload method, 0 if no additional
12152 instructions are need, and positive to give the extra cost for the
12153 memory. */
12154
12155 static int
12156 rs6000_secondary_reload_memory (rtx addr,
12157 enum reg_class rclass,
12158 machine_mode mode)
12159 {
12160 int extra_cost = 0;
12161 rtx reg, and_arg, plus_arg0, plus_arg1;
12162 addr_mask_type addr_mask;
12163 const char *type = NULL;
12164 const char *fail_msg = NULL;
12165
12166 if (GPR_REG_CLASS_P (rclass))
12167 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
12168
12169 else if (rclass == FLOAT_REGS)
12170 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
12171
12172 else if (rclass == ALTIVEC_REGS)
12173 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
12174
12175 /* For the combined VSX_REGS, turn off Altivec AND -16. */
12176 else if (rclass == VSX_REGS)
12177 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
12178 & ~RELOAD_REG_AND_M16);
12179
12180 /* DMR registers use VSX registers, and need to generate some extra
12181 instructions. */
12182 else if (rclass == DM_REGS)
12183 return 2;
12184
12185 /* If the register allocator hasn't made up its mind yet on the register
12186 class to use, settle on defaults to use. */
12187 else if (rclass == NO_REGS)
12188 {
12189 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY]
12190 & ~RELOAD_REG_AND_M16);
12191
12192 if ((addr_mask & RELOAD_REG_MULTIPLE) != 0)
12193 addr_mask &= ~(RELOAD_REG_INDEXED
12194 | RELOAD_REG_PRE_INCDEC
12195 | RELOAD_REG_PRE_MODIFY);
12196 }
12197
12198 else
12199 addr_mask = 0;
12200
12201 /* If the register isn't valid in this register class, just return now. */
12202 if ((addr_mask & RELOAD_REG_VALID) == 0)
12203 {
12204 if (TARGET_DEBUG_ADDR)
12205 {
12206 fprintf (stderr,
12207 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
12208 "not valid in class\n",
12209 GET_MODE_NAME (mode), reg_class_names[rclass]);
12210 debug_rtx (addr);
12211 }
12212
12213 return -1;
12214 }
12215
12216 switch (GET_CODE (addr))
12217 {
12218 /* Does the register class supports auto update forms for this mode? We
12219 don't need a scratch register, since the powerpc only supports
12220 PRE_INC, PRE_DEC, and PRE_MODIFY. */
12221 case PRE_INC:
12222 case PRE_DEC:
12223 reg = XEXP (addr, 0);
12224 if (!base_reg_operand (addr, GET_MODE (reg)))
12225 {
12226 fail_msg = "no base register #1";
12227 extra_cost = -1;
12228 }
12229
12230 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
12231 {
12232 extra_cost = 1;
12233 type = "update";
12234 }
12235 break;
12236
12237 case PRE_MODIFY:
12238 reg = XEXP (addr, 0);
12239 plus_arg1 = XEXP (addr, 1);
12240 if (!base_reg_operand (reg, GET_MODE (reg))
12241 || GET_CODE (plus_arg1) != PLUS
12242 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
12243 {
12244 fail_msg = "bad PRE_MODIFY";
12245 extra_cost = -1;
12246 }
12247
12248 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
12249 {
12250 extra_cost = 1;
12251 type = "update";
12252 }
12253 break;
12254
12255 /* Do we need to simulate AND -16 to clear the bottom address bits used
12256 in VMX load/stores? Only allow the AND for vector sizes. */
12257 case AND:
12258 and_arg = XEXP (addr, 0);
12259 if (GET_MODE_SIZE (mode) != 16
12260 || !CONST_INT_P (XEXP (addr, 1))
12261 || INTVAL (XEXP (addr, 1)) != -16)
12262 {
12263 fail_msg = "bad Altivec AND #1";
12264 extra_cost = -1;
12265 }
12266
12267 if (rclass != ALTIVEC_REGS)
12268 {
12269 if (legitimate_indirect_address_p (and_arg, false))
12270 extra_cost = 1;
12271
12272 else if (legitimate_indexed_address_p (and_arg, false))
12273 extra_cost = 2;
12274
12275 else
12276 {
12277 fail_msg = "bad Altivec AND #2";
12278 extra_cost = -1;
12279 }
12280
12281 type = "and";
12282 }
12283 break;
12284
12285 /* If this is an indirect address, make sure it is a base register. */
12286 case REG:
12287 case SUBREG:
12288 if (!legitimate_indirect_address_p (addr, false))
12289 {
12290 extra_cost = 1;
12291 type = "move";
12292 }
12293 break;
12294
12295 /* If this is an indexed address, make sure the register class can handle
12296 indexed addresses for this mode. */
12297 case PLUS:
12298 plus_arg0 = XEXP (addr, 0);
12299 plus_arg1 = XEXP (addr, 1);
12300
12301 /* (plus (plus (reg) (constant)) (constant)) is generated during
12302 push_reload processing, so handle it now. */
12303 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
12304 {
12305 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12306 {
12307 extra_cost = 1;
12308 type = "offset";
12309 }
12310 }
12311
12312 /* (plus (plus (reg) (constant)) (reg)) is also generated during
12313 push_reload processing, so handle it now. */
12314 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
12315 {
12316 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
12317 {
12318 extra_cost = 1;
12319 type = "indexed #2";
12320 }
12321 }
12322
12323 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
12324 {
12325 fail_msg = "no base register #2";
12326 extra_cost = -1;
12327 }
12328
12329 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
12330 {
12331 if ((addr_mask & RELOAD_REG_INDEXED) == 0
12332 || !legitimate_indexed_address_p (addr, false))
12333 {
12334 extra_cost = 1;
12335 type = "indexed";
12336 }
12337 }
12338
12339 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0
12340 && CONST_INT_P (plus_arg1))
12341 {
12342 if (!quad_address_offset_p (INTVAL (plus_arg1)))
12343 {
12344 extra_cost = 1;
12345 type = "vector d-form offset";
12346 }
12347 }
12348
12349 /* Make sure the register class can handle offset addresses. */
12350 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
12351 {
12352 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12353 {
12354 extra_cost = 1;
12355 type = "offset #2";
12356 }
12357 }
12358
12359 else
12360 {
12361 fail_msg = "bad PLUS";
12362 extra_cost = -1;
12363 }
12364
12365 break;
12366
12367 case LO_SUM:
12368 /* Quad offsets are restricted and can't handle normal addresses. */
12369 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
12370 {
12371 extra_cost = -1;
12372 type = "vector d-form lo_sum";
12373 }
12374
12375 else if (!legitimate_lo_sum_address_p (mode, addr, false))
12376 {
12377 fail_msg = "bad LO_SUM";
12378 extra_cost = -1;
12379 }
12380
12381 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12382 {
12383 extra_cost = 1;
12384 type = "lo_sum";
12385 }
12386 break;
12387
12388 /* Static addresses need to create a TOC entry. */
12389 case CONST:
12390 case SYMBOL_REF:
12391 case LABEL_REF:
12392 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
12393 {
12394 extra_cost = -1;
12395 type = "vector d-form lo_sum #2";
12396 }
12397
12398 else
12399 {
12400 type = "address";
12401 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
12402 }
12403 break;
12404
12405 /* TOC references look like offsetable memory. */
12406 case UNSPEC:
12407 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
12408 {
12409 fail_msg = "bad UNSPEC";
12410 extra_cost = -1;
12411 }
12412
12413 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
12414 {
12415 extra_cost = -1;
12416 type = "vector d-form lo_sum #3";
12417 }
12418
12419 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12420 {
12421 extra_cost = 1;
12422 type = "toc reference";
12423 }
12424 break;
12425
12426 default:
12427 {
12428 fail_msg = "bad address";
12429 extra_cost = -1;
12430 }
12431 }
12432
12433 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
12434 {
12435 if (extra_cost < 0)
12436 fprintf (stderr,
12437 "rs6000_secondary_reload_memory error: mode = %s, "
12438 "class = %s, addr_mask = '%s', %s\n",
12439 GET_MODE_NAME (mode),
12440 reg_class_names[rclass],
12441 rs6000_debug_addr_mask (addr_mask, false),
12442 (fail_msg != NULL) ? fail_msg : "<bad address>");
12443
12444 else
12445 fprintf (stderr,
12446 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
12447 "addr_mask = '%s', extra cost = %d, %s\n",
12448 GET_MODE_NAME (mode),
12449 reg_class_names[rclass],
12450 rs6000_debug_addr_mask (addr_mask, false),
12451 extra_cost,
12452 (type) ? type : "<none>");
12453
12454 debug_rtx (addr);
12455 }
12456
12457 return extra_cost;
12458 }
12459
12460 /* Helper function for rs6000_secondary_reload to return true if a move to a
12461 different register classe is really a simple move. */
12462
12463 static bool
12464 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
12465 enum rs6000_reg_type from_type,
12466 machine_mode mode)
12467 {
12468 int size = GET_MODE_SIZE (mode);
12469
12470 /* Add support for various direct moves available. In this function, we only
12471 look at cases where we don't need any extra registers, and one or more
12472 simple move insns are issued. Originally small integers are not allowed
12473 in FPR/VSX registers. Single precision binary floating is not a simple
12474 move because we need to convert to the single precision memory layout.
12475 The 4-byte SDmode can be moved. TDmode values are disallowed since they
12476 need special direct move handling, which we do not support yet. */
12477 if (TARGET_DIRECT_MOVE
12478 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
12479 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
12480 {
12481 if (TARGET_POWERPC64)
12482 {
12483 /* ISA 2.07: MTVSRD or MVFVSRD. */
12484 if (size == 8)
12485 return true;
12486
12487 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
12488 if (size == 16 && TARGET_P9_VECTOR && mode != TDmode)
12489 return true;
12490 }
12491
12492 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
12493 if (TARGET_P8_VECTOR)
12494 {
12495 if (mode == SImode)
12496 return true;
12497
12498 if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode))
12499 return true;
12500 }
12501
12502 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
12503 if (mode == SDmode)
12504 return true;
12505 }
12506
12507 /* Move to/from SPR. */
12508 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
12509 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
12510 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
12511 return true;
12512
12513 /* We can transfer between VSX registers and DMR registers without needing
12514 extra registers. */
12515 if (TARGET_DENSE_MATH && (mode == XOmode || mode == TDOmode)
12516 && ((to_type == DMR_REG_TYPE && from_type == VSX_REG_TYPE)
12517 || (to_type == VSX_REG_TYPE && from_type == DMR_REG_TYPE)))
12518 return true;
12519
12520 return false;
12521 }
12522
12523 /* Direct move helper function for rs6000_secondary_reload, handle all of the
12524 special direct moves that involve allocating an extra register, return the
12525 insn code of the helper function if there is such a function or
12526 CODE_FOR_nothing if not. */
12527
12528 static bool
12529 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
12530 enum rs6000_reg_type from_type,
12531 machine_mode mode,
12532 secondary_reload_info *sri,
12533 bool altivec_p)
12534 {
12535 bool ret = false;
12536 enum insn_code icode = CODE_FOR_nothing;
12537 int cost = 0;
12538 int size = GET_MODE_SIZE (mode);
12539
12540 if (TARGET_POWERPC64 && size == 16)
12541 {
12542 /* Handle moving 128-bit values from GPRs to VSX point registers on
12543 ISA 2.07 (power8, power9) when running in 64-bit mode using
12544 XXPERMDI to glue the two 64-bit values back together. */
12545 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
12546 {
12547 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
12548 icode = reg_addr[mode].reload_vsx_gpr;
12549 }
12550
12551 /* Handle moving 128-bit values from VSX point registers to GPRs on
12552 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
12553 bottom 64-bit value. */
12554 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
12555 {
12556 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
12557 icode = reg_addr[mode].reload_gpr_vsx;
12558 }
12559 }
12560
12561 else if (TARGET_POWERPC64 && mode == SFmode)
12562 {
12563 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
12564 {
12565 cost = 3; /* xscvdpspn, mfvsrd, and. */
12566 icode = reg_addr[mode].reload_gpr_vsx;
12567 }
12568
12569 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
12570 {
12571 cost = 2; /* mtvsrz, xscvspdpn. */
12572 icode = reg_addr[mode].reload_vsx_gpr;
12573 }
12574 }
12575
12576 else if (!TARGET_POWERPC64 && size == 8)
12577 {
12578 /* Handle moving 64-bit values from GPRs to floating point registers on
12579 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
12580 32-bit values back together. Altivec register classes must be handled
12581 specially since a different instruction is used, and the secondary
12582 reload support requires a single instruction class in the scratch
12583 register constraint. However, right now TFmode is not allowed in
12584 Altivec registers, so the pattern will never match. */
12585 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
12586 {
12587 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
12588 icode = reg_addr[mode].reload_fpr_gpr;
12589 }
12590 }
12591
12592 if (icode != CODE_FOR_nothing)
12593 {
12594 ret = true;
12595 if (sri)
12596 {
12597 sri->icode = icode;
12598 sri->extra_cost = cost;
12599 }
12600 }
12601
12602 return ret;
12603 }
12604
12605 /* Return whether a move between two register classes can be done either
12606 directly (simple move) or via a pattern that uses a single extra temporary
12607 (using ISA 2.07's direct move in this case. */
12608
12609 static bool
12610 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
12611 enum rs6000_reg_type from_type,
12612 machine_mode mode,
12613 secondary_reload_info *sri,
12614 bool altivec_p)
12615 {
12616 /* Fall back to load/store reloads if either type is not a register. */
12617 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
12618 return false;
12619
12620 /* If we haven't allocated registers yet, assume the move can be done for the
12621 standard register types. */
12622 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
12623 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
12624 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
12625 return true;
12626
12627 /* Moves to the same set of registers is a simple move for non-specialized
12628 registers. */
12629 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
12630 return true;
12631
12632 /* Check whether a simple move can be done directly. */
12633 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
12634 {
12635 if (sri)
12636 {
12637 sri->icode = CODE_FOR_nothing;
12638 sri->extra_cost = 0;
12639 }
12640 return true;
12641 }
12642
12643 /* Now check if we can do it in a few steps. */
12644 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
12645 altivec_p);
12646 }
12647
12648 /* Inform reload about cases where moving X with a mode MODE to a register in
12649 RCLASS requires an extra scratch or immediate register. Return the class
12650 needed for the immediate register.
12651
12652 For VSX and Altivec, we may need a register to convert sp+offset into
12653 reg+sp.
12654
12655 For misaligned 64-bit gpr loads and stores we need a register to
12656 convert an offset address to indirect. */
12657
12658 static reg_class_t
12659 rs6000_secondary_reload (bool in_p,
12660 rtx x,
12661 reg_class_t rclass_i,
12662 machine_mode mode,
12663 secondary_reload_info *sri)
12664 {
12665 enum reg_class rclass = (enum reg_class) rclass_i;
12666 reg_class_t ret = ALL_REGS;
12667 enum insn_code icode;
12668 bool default_p = false;
12669 bool done_p = false;
12670
12671 /* Allow subreg of memory before/during reload. */
12672 bool memory_p = (MEM_P (x)
12673 || (!reload_completed && SUBREG_P (x)
12674 && MEM_P (SUBREG_REG (x))));
12675
12676 sri->icode = CODE_FOR_nothing;
12677 sri->t_icode = CODE_FOR_nothing;
12678 sri->extra_cost = 0;
12679 icode = ((in_p)
12680 ? reg_addr[mode].reload_load
12681 : reg_addr[mode].reload_store);
12682
12683 if (REG_P (x) || register_operand (x, mode))
12684 {
12685 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
12686 bool altivec_p = (rclass == ALTIVEC_REGS);
12687 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
12688
12689 if (!in_p)
12690 std::swap (to_type, from_type);
12691
12692 /* Can we do a direct move of some sort? */
12693 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
12694 altivec_p))
12695 {
12696 icode = (enum insn_code)sri->icode;
12697 default_p = false;
12698 done_p = true;
12699 ret = NO_REGS;
12700 }
12701 }
12702
12703 /* Make sure 0.0 is not reloaded or forced into memory. */
12704 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
12705 {
12706 ret = NO_REGS;
12707 default_p = false;
12708 done_p = true;
12709 }
12710
12711 /* If this is a scalar floating point value and we want to load it into the
12712 traditional Altivec registers, do it via a move via a traditional floating
12713 point register, unless we have D-form addressing. Also make sure that
12714 non-zero constants use a FPR. */
12715 if (!done_p && reg_addr[mode].scalar_in_vmx_p
12716 && !mode_supports_vmx_dform (mode)
12717 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
12718 && (memory_p || CONST_DOUBLE_P (x)))
12719 {
12720 ret = FLOAT_REGS;
12721 default_p = false;
12722 done_p = true;
12723 }
12724
12725 /* Handle reload of load/stores if we have reload helper functions. */
12726 if (!done_p && icode != CODE_FOR_nothing && memory_p)
12727 {
12728 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
12729 mode);
12730
12731 if (extra_cost >= 0)
12732 {
12733 done_p = true;
12734 ret = NO_REGS;
12735 if (extra_cost > 0)
12736 {
12737 sri->extra_cost = extra_cost;
12738 sri->icode = icode;
12739 }
12740 }
12741 }
12742
12743 /* Handle unaligned loads and stores of integer registers. */
12744 if (!done_p && TARGET_POWERPC64
12745 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
12746 && memory_p
12747 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
12748 {
12749 rtx addr = XEXP (x, 0);
12750 rtx off = address_offset (addr);
12751
12752 if (off != NULL_RTX)
12753 {
12754 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
12755 unsigned HOST_WIDE_INT offset = INTVAL (off);
12756
12757 /* We need a secondary reload when our legitimate_address_p
12758 says the address is good (as otherwise the entire address
12759 will be reloaded), and the offset is not a multiple of
12760 four or we have an address wrap. Address wrap will only
12761 occur for LO_SUMs since legitimate_offset_address_p
12762 rejects addresses for 16-byte mems that will wrap. */
12763 if (GET_CODE (addr) == LO_SUM
12764 ? (1 /* legitimate_address_p allows any offset for lo_sum */
12765 && ((offset & 3) != 0
12766 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
12767 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
12768 && (offset & 3) != 0))
12769 {
12770 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
12771 if (in_p)
12772 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
12773 : CODE_FOR_reload_di_load);
12774 else
12775 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
12776 : CODE_FOR_reload_di_store);
12777 sri->extra_cost = 2;
12778 ret = NO_REGS;
12779 done_p = true;
12780 }
12781 else
12782 default_p = true;
12783 }
12784 else
12785 default_p = true;
12786 }
12787
12788 if (!done_p && !TARGET_POWERPC64
12789 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
12790 && memory_p
12791 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
12792 {
12793 rtx addr = XEXP (x, 0);
12794 rtx off = address_offset (addr);
12795
12796 if (off != NULL_RTX)
12797 {
12798 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
12799 unsigned HOST_WIDE_INT offset = INTVAL (off);
12800
12801 /* We need a secondary reload when our legitimate_address_p
12802 says the address is good (as otherwise the entire address
12803 will be reloaded), and we have a wrap.
12804
12805 legitimate_lo_sum_address_p allows LO_SUM addresses to
12806 have any offset so test for wrap in the low 16 bits.
12807
12808 legitimate_offset_address_p checks for the range
12809 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
12810 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
12811 [0x7ff4,0x7fff] respectively, so test for the
12812 intersection of these ranges, [0x7ffc,0x7fff] and
12813 [0x7ff4,0x7ff7] respectively.
12814
12815 Note that the address we see here may have been
12816 manipulated by legitimize_reload_address. */
12817 if (GET_CODE (addr) == LO_SUM
12818 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
12819 : offset - (0x8000 - extra) < UNITS_PER_WORD)
12820 {
12821 if (in_p)
12822 sri->icode = CODE_FOR_reload_si_load;
12823 else
12824 sri->icode = CODE_FOR_reload_si_store;
12825 sri->extra_cost = 2;
12826 ret = NO_REGS;
12827 done_p = true;
12828 }
12829 else
12830 default_p = true;
12831 }
12832 else
12833 default_p = true;
12834 }
12835
12836 if (!done_p)
12837 default_p = true;
12838
12839 if (default_p)
12840 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
12841
12842 gcc_assert (ret != ALL_REGS);
12843
12844 if (TARGET_DEBUG_ADDR)
12845 {
12846 fprintf (stderr,
12847 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
12848 "mode = %s",
12849 reg_class_names[ret],
12850 in_p ? "true" : "false",
12851 reg_class_names[rclass],
12852 GET_MODE_NAME (mode));
12853
12854 if (reload_completed)
12855 fputs (", after reload", stderr);
12856
12857 if (!done_p)
12858 fputs (", done_p not set", stderr);
12859
12860 if (default_p)
12861 fputs (", default secondary reload", stderr);
12862
12863 if (sri->icode != CODE_FOR_nothing)
12864 fprintf (stderr, ", reload func = %s, extra cost = %d",
12865 insn_data[sri->icode].name, sri->extra_cost);
12866
12867 else if (sri->extra_cost > 0)
12868 fprintf (stderr, ", extra cost = %d", sri->extra_cost);
12869
12870 fputs ("\n", stderr);
12871 debug_rtx (x);
12872 }
12873
12874 return ret;
12875 }
12876
12877 /* Better tracing for rs6000_secondary_reload_inner. */
12878
12879 static void
12880 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
12881 bool store_p)
12882 {
12883 rtx set, clobber;
12884
12885 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
12886
12887 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
12888 store_p ? "store" : "load");
12889
12890 if (store_p)
12891 set = gen_rtx_SET (mem, reg);
12892 else
12893 set = gen_rtx_SET (reg, mem);
12894
12895 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
12896 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
12897 }
12898
12899 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
12900 ATTRIBUTE_NORETURN;
12901
12902 static void
12903 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
12904 bool store_p)
12905 {
12906 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
12907 gcc_unreachable ();
12908 }
12909
12910 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
12911 reload helper functions. These were identified in
12912 rs6000_secondary_reload_memory, and if reload decided to use the secondary
12913 reload, it calls the insns:
12914 reload_<RELOAD:mode>_<P:mptrsize>_store
12915 reload_<RELOAD:mode>_<P:mptrsize>_load
12916
12917 which in turn calls this function, to do whatever is necessary to create
12918 valid addresses. */
12919
12920 void
12921 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
12922 {
12923 int regno = true_regnum (reg);
12924 machine_mode mode = GET_MODE (reg);
12925 addr_mask_type addr_mask;
12926 rtx addr;
12927 rtx new_addr;
12928 rtx op_reg, op0, op1;
12929 rtx and_op;
12930 rtx cc_clobber;
12931 rtvec rv;
12932
12933 if (regno < 0 || !HARD_REGISTER_NUM_P (regno) || !MEM_P (mem)
12934 || !base_reg_operand (scratch, GET_MODE (scratch)))
12935 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12936
12937 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
12938 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
12939
12940 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
12941 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
12942
12943 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
12944 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
12945
12946 else
12947 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12948
12949 /* Make sure the mode is valid in this register class. */
12950 if ((addr_mask & RELOAD_REG_VALID) == 0)
12951 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12952
12953 if (TARGET_DEBUG_ADDR)
12954 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
12955
12956 new_addr = addr = XEXP (mem, 0);
12957 switch (GET_CODE (addr))
12958 {
12959 /* Does the register class support auto update forms for this mode? If
12960 not, do the update now. We don't need a scratch register, since the
12961 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
12962 case PRE_INC:
12963 case PRE_DEC:
12964 op_reg = XEXP (addr, 0);
12965 if (!base_reg_operand (op_reg, Pmode))
12966 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12967
12968 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
12969 {
12970 int delta = GET_MODE_SIZE (mode);
12971 if (GET_CODE (addr) == PRE_DEC)
12972 delta = -delta;
12973 emit_insn (gen_add2_insn (op_reg, GEN_INT (delta)));
12974 new_addr = op_reg;
12975 }
12976 break;
12977
12978 case PRE_MODIFY:
12979 op0 = XEXP (addr, 0);
12980 op1 = XEXP (addr, 1);
12981 if (!base_reg_operand (op0, Pmode)
12982 || GET_CODE (op1) != PLUS
12983 || !rtx_equal_p (op0, XEXP (op1, 0)))
12984 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12985
12986 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
12987 {
12988 emit_insn (gen_rtx_SET (op0, op1));
12989 new_addr = reg;
12990 }
12991 break;
12992
12993 /* Do we need to simulate AND -16 to clear the bottom address bits used
12994 in VMX load/stores? */
12995 case AND:
12996 op0 = XEXP (addr, 0);
12997 op1 = XEXP (addr, 1);
12998 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
12999 {
13000 if (REG_P (op0) || SUBREG_P (op0))
13001 op_reg = op0;
13002
13003 else if (GET_CODE (op1) == PLUS)
13004 {
13005 emit_insn (gen_rtx_SET (scratch, op1));
13006 op_reg = scratch;
13007 }
13008
13009 else
13010 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
13011
13012 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
13013 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
13014 rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
13015 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
13016 new_addr = scratch;
13017 }
13018 break;
13019
13020 /* If this is an indirect address, make sure it is a base register. */
13021 case REG:
13022 case SUBREG:
13023 if (!base_reg_operand (addr, GET_MODE (addr)))
13024 {
13025 emit_insn (gen_rtx_SET (scratch, addr));
13026 new_addr = scratch;
13027 }
13028 break;
13029
13030 /* If this is an indexed address, make sure the register class can handle
13031 indexed addresses for this mode. */
13032 case PLUS:
13033 op0 = XEXP (addr, 0);
13034 op1 = XEXP (addr, 1);
13035 if (!base_reg_operand (op0, Pmode))
13036 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
13037
13038 else if (int_reg_operand (op1, Pmode))
13039 {
13040 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
13041 {
13042 emit_insn (gen_rtx_SET (scratch, addr));
13043 new_addr = scratch;
13044 }
13045 }
13046
13047 else if (mode_supports_dq_form (mode) && CONST_INT_P (op1))
13048 {
13049 if (((addr_mask & RELOAD_REG_QUAD_OFFSET) == 0)
13050 || !quad_address_p (addr, mode, false))
13051 {
13052 emit_insn (gen_rtx_SET (scratch, addr));
13053 new_addr = scratch;
13054 }
13055 }
13056
13057 /* Make sure the register class can handle offset addresses. */
13058 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
13059 {
13060 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
13061 {
13062 emit_insn (gen_rtx_SET (scratch, addr));
13063 new_addr = scratch;
13064 }
13065 }
13066
13067 else
13068 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
13069
13070 break;
13071
13072 case LO_SUM:
13073 op0 = XEXP (addr, 0);
13074 op1 = XEXP (addr, 1);
13075 if (!base_reg_operand (op0, Pmode))
13076 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
13077
13078 else if (int_reg_operand (op1, Pmode))
13079 {
13080 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
13081 {
13082 emit_insn (gen_rtx_SET (scratch, addr));
13083 new_addr = scratch;
13084 }
13085 }
13086
13087 /* Quad offsets are restricted and can't handle normal addresses. */
13088 else if (mode_supports_dq_form (mode))
13089 {
13090 emit_insn (gen_rtx_SET (scratch, addr));
13091 new_addr = scratch;
13092 }
13093
13094 /* Make sure the register class can handle offset addresses. */
13095 else if (legitimate_lo_sum_address_p (mode, addr, false))
13096 {
13097 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
13098 {
13099 emit_insn (gen_rtx_SET (scratch, addr));
13100 new_addr = scratch;
13101 }
13102 }
13103
13104 else
13105 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
13106
13107 break;
13108
13109 case SYMBOL_REF:
13110 case CONST:
13111 case LABEL_REF:
13112 rs6000_emit_move (scratch, addr, Pmode);
13113 new_addr = scratch;
13114 break;
13115
13116 default:
13117 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
13118 }
13119
13120 /* Adjust the address if it changed. */
13121 if (addr != new_addr)
13122 {
13123 mem = replace_equiv_address_nv (mem, new_addr);
13124 if (TARGET_DEBUG_ADDR)
13125 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
13126 }
13127
13128 /* Now create the move. */
13129 if (store_p)
13130 emit_insn (gen_rtx_SET (mem, reg));
13131 else
13132 emit_insn (gen_rtx_SET (reg, mem));
13133
13134 return;
13135 }
13136
13137 /* Convert reloads involving 64-bit gprs and misaligned offset
13138 addressing, or multiple 32-bit gprs and offsets that are too large,
13139 to use indirect addressing. */
13140
13141 void
13142 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
13143 {
13144 int regno = true_regnum (reg);
13145 enum reg_class rclass;
13146 rtx addr;
13147 rtx scratch_or_premodify = scratch;
13148
13149 if (TARGET_DEBUG_ADDR)
13150 {
13151 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
13152 store_p ? "store" : "load");
13153 fprintf (stderr, "reg:\n");
13154 debug_rtx (reg);
13155 fprintf (stderr, "mem:\n");
13156 debug_rtx (mem);
13157 fprintf (stderr, "scratch:\n");
13158 debug_rtx (scratch);
13159 }
13160
13161 gcc_assert (regno >= 0 && HARD_REGISTER_NUM_P (regno));
13162 gcc_assert (MEM_P (mem));
13163 rclass = REGNO_REG_CLASS (regno);
13164 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
13165 addr = XEXP (mem, 0);
13166
13167 if (GET_CODE (addr) == PRE_MODIFY)
13168 {
13169 gcc_assert (REG_P (XEXP (addr, 0))
13170 && GET_CODE (XEXP (addr, 1)) == PLUS
13171 && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0));
13172 scratch_or_premodify = XEXP (addr, 0);
13173 addr = XEXP (addr, 1);
13174 }
13175 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
13176
13177 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
13178
13179 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
13180
13181 /* Now create the move. */
13182 if (store_p)
13183 emit_insn (gen_rtx_SET (mem, reg));
13184 else
13185 emit_insn (gen_rtx_SET (reg, mem));
13186
13187 return;
13188 }
13189
13190 /* Given an rtx X being reloaded into a reg required to be
13191 in class CLASS, return the class of reg to actually use.
13192 In general this is just CLASS; but on some machines
13193 in some cases it is preferable to use a more restrictive class.
13194
13195 On the RS/6000, we have to return NO_REGS when we want to reload a
13196 floating-point CONST_DOUBLE to force it to be copied to memory.
13197
13198 We also don't want to reload integer values into floating-point
13199 registers if we can at all help it. In fact, this can
13200 cause reload to die, if it tries to generate a reload of CTR
13201 into a FP register and discovers it doesn't have the memory location
13202 required.
13203
13204 ??? Would it be a good idea to have reload do the converse, that is
13205 try to reload floating modes into FP registers if possible?
13206 */
13207
13208 static enum reg_class
13209 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
13210 {
13211 machine_mode mode = GET_MODE (x);
13212 bool is_constant = CONSTANT_P (x);
13213
13214 /* DMR registers can't be loaded or stored. */
13215 if (rclass == DM_REGS)
13216 return NO_REGS;
13217
13218 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
13219 reload class for it. */
13220 if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS)
13221 && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0)
13222 return NO_REGS;
13223
13224 if ((rclass == FLOAT_REGS || rclass == VSX_REGS)
13225 && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
13226 return NO_REGS;
13227
13228 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
13229 the reloading of address expressions using PLUS into floating point
13230 registers. */
13231 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
13232 {
13233 if (is_constant)
13234 {
13235 /* Zero is always allowed in all VSX registers. */
13236 if (x == CONST0_RTX (mode))
13237 return rclass;
13238
13239 /* If this is a vector constant that can be formed with a few Altivec
13240 instructions, we want altivec registers. */
13241 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
13242 return ALTIVEC_REGS;
13243
13244 /* If this is an integer constant that can easily be loaded into
13245 vector registers, allow it. */
13246 if (CONST_INT_P (x))
13247 {
13248 HOST_WIDE_INT value = INTVAL (x);
13249
13250 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
13251 2.06 can generate it in the Altivec registers with
13252 VSPLTI<x>. */
13253 if (value == -1)
13254 {
13255 if (TARGET_P8_VECTOR)
13256 return rclass;
13257 else if (rclass == ALTIVEC_REGS || rclass == VSX_REGS)
13258 return ALTIVEC_REGS;
13259 else
13260 return NO_REGS;
13261 }
13262
13263 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
13264 a sign extend in the Altivec registers. */
13265 if (IN_RANGE (value, -128, 127) && TARGET_P9_VECTOR
13266 && (rclass == ALTIVEC_REGS || rclass == VSX_REGS))
13267 return ALTIVEC_REGS;
13268 }
13269
13270 /* Force constant to memory. */
13271 return NO_REGS;
13272 }
13273
13274 /* D-form addressing can easily reload the value. */
13275 if (mode_supports_vmx_dform (mode)
13276 || mode_supports_dq_form (mode))
13277 return rclass;
13278
13279 /* If this is a scalar floating point value and we don't have D-form
13280 addressing, prefer the traditional floating point registers so that we
13281 can use D-form (register+offset) addressing. */
13282 if (rclass == VSX_REGS
13283 && (mode == SFmode || GET_MODE_SIZE (mode) == 8))
13284 return FLOAT_REGS;
13285
13286 /* Prefer the Altivec registers if Altivec is handling the vector
13287 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
13288 loads. */
13289 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
13290 || mode == V1TImode)
13291 return ALTIVEC_REGS;
13292
13293 return rclass;
13294 }
13295
13296 if (is_constant || GET_CODE (x) == PLUS)
13297 {
13298 if (reg_class_subset_p (GENERAL_REGS, rclass))
13299 return GENERAL_REGS;
13300 if (reg_class_subset_p (BASE_REGS, rclass))
13301 return BASE_REGS;
13302 return NO_REGS;
13303 }
13304
13305 /* For the vector pair and vector quad modes, prefer their natural register
13306 (VSX or FPR) rather than GPR registers. For other integer types, prefer
13307 the GPR registers. */
13308 if (rclass == GEN_OR_FLOAT_REGS)
13309 {
13310 if (mode == OOmode)
13311 return VSX_REGS;
13312
13313 if (mode == XOmode)
13314 return TARGET_DENSE_MATH ? VSX_REGS : FLOAT_REGS;
13315
13316 if (mode == TDOmode)
13317 return VSX_REGS;
13318
13319 if (GET_MODE_CLASS (mode) == MODE_INT)
13320 return GENERAL_REGS;
13321 }
13322
13323 return rclass;
13324 }
13325
13326 /* Debug version of rs6000_preferred_reload_class. */
13327 static enum reg_class
13328 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
13329 {
13330 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
13331
13332 fprintf (stderr,
13333 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
13334 "mode = %s, x:\n",
13335 reg_class_names[ret], reg_class_names[rclass],
13336 GET_MODE_NAME (GET_MODE (x)));
13337 debug_rtx (x);
13338
13339 return ret;
13340 }
13341
13342 /* If we are copying between FP or AltiVec registers and anything else, we need
13343 a memory location. The exception is when we are targeting ppc64 and the
13344 move to/from fpr to gpr instructions are available. Also, under VSX, you
13345 can copy vector registers from the FP register set to the Altivec register
13346 set and vice versa. */
13347
13348 static bool
13349 rs6000_secondary_memory_needed (machine_mode mode,
13350 reg_class_t from_class,
13351 reg_class_t to_class)
13352 {
13353 enum rs6000_reg_type from_type, to_type;
13354 bool altivec_p = ((from_class == ALTIVEC_REGS)
13355 || (to_class == ALTIVEC_REGS));
13356
13357 /* If a simple/direct move is available, we don't need secondary memory */
13358 from_type = reg_class_to_reg_type[(int)from_class];
13359 to_type = reg_class_to_reg_type[(int)to_class];
13360
13361 if (rs6000_secondary_reload_move (to_type, from_type, mode,
13362 (secondary_reload_info *)0, altivec_p))
13363 return false;
13364
13365 /* If we have a floating point or vector register class, we need to use
13366 memory to transfer the data. */
13367 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
13368 return true;
13369
13370 return false;
13371 }
13372
13373 /* Debug version of rs6000_secondary_memory_needed. */
13374 static bool
13375 rs6000_debug_secondary_memory_needed (machine_mode mode,
13376 reg_class_t from_class,
13377 reg_class_t to_class)
13378 {
13379 bool ret = rs6000_secondary_memory_needed (mode, from_class, to_class);
13380
13381 fprintf (stderr,
13382 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
13383 "to_class = %s, mode = %s\n",
13384 ret ? "true" : "false",
13385 reg_class_names[from_class],
13386 reg_class_names[to_class],
13387 GET_MODE_NAME (mode));
13388
13389 return ret;
13390 }
13391
13392 /* Return the register class of a scratch register needed to copy IN into
13393 or out of a register in RCLASS in MODE. If it can be done directly,
13394 NO_REGS is returned. */
13395
13396 static enum reg_class
13397 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
13398 rtx in)
13399 {
13400 int regno;
13401
13402 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
13403 #if TARGET_MACHO
13404 && MACHOPIC_INDIRECT
13405 #endif
13406 ))
13407 {
13408 /* We cannot copy a symbolic operand directly into anything
13409 other than BASE_REGS for TARGET_ELF. So indicate that a
13410 register from BASE_REGS is needed as an intermediate
13411 register.
13412
13413 On Darwin, pic addresses require a load from memory, which
13414 needs a base register. */
13415 if (rclass != BASE_REGS
13416 && (SYMBOL_REF_P (in)
13417 || GET_CODE (in) == HIGH
13418 || GET_CODE (in) == LABEL_REF
13419 || GET_CODE (in) == CONST))
13420 return BASE_REGS;
13421 }
13422
13423 if (REG_P (in))
13424 {
13425 regno = REGNO (in);
13426 if (!HARD_REGISTER_NUM_P (regno))
13427 {
13428 regno = true_regnum (in);
13429 if (!HARD_REGISTER_NUM_P (regno))
13430 regno = -1;
13431 }
13432 }
13433 else if (SUBREG_P (in))
13434 {
13435 regno = true_regnum (in);
13436 if (!HARD_REGISTER_NUM_P (regno))
13437 regno = -1;
13438 }
13439 else
13440 regno = -1;
13441
13442 /* Dense math registers don't have loads or stores. We have to go through
13443 the VSX registers to load XOmode (vector quad) and TDOmode (dmr 1024
13444 bit). */
13445 if (TARGET_DENSE_MATH && rclass == DM_REGS)
13446 return VSX_REGS;
13447
13448 /* If we have VSX register moves, prefer moving scalar values between
13449 Altivec registers and GPR by going via an FPR (and then via memory)
13450 instead of reloading the secondary memory address for Altivec moves. */
13451 if (TARGET_VSX
13452 && GET_MODE_SIZE (mode) < 16
13453 && !mode_supports_vmx_dform (mode)
13454 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
13455 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
13456 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
13457 && (regno >= 0 && INT_REGNO_P (regno)))))
13458 return FLOAT_REGS;
13459
13460 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
13461 into anything. */
13462 if (rclass == GENERAL_REGS || rclass == BASE_REGS
13463 || (regno >= 0 && INT_REGNO_P (regno)))
13464 return NO_REGS;
13465
13466 /* Constants, memory, and VSX registers can go into VSX registers (both the
13467 traditional floating point and the altivec registers). */
13468 if (rclass == VSX_REGS
13469 && (regno == -1 || VSX_REGNO_P (regno)))
13470 return NO_REGS;
13471
13472 /* Constants, memory, and FP registers can go into FP registers. */
13473 if ((regno == -1 || FP_REGNO_P (regno))
13474 && (rclass == FLOAT_REGS || rclass == GEN_OR_FLOAT_REGS))
13475 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
13476
13477 /* Memory, and AltiVec registers can go into AltiVec registers. */
13478 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
13479 && rclass == ALTIVEC_REGS)
13480 return NO_REGS;
13481
13482 /* We can copy among the CR registers. */
13483 if ((rclass == CR_REGS || rclass == CR0_REGS)
13484 && regno >= 0 && CR_REGNO_P (regno))
13485 return NO_REGS;
13486
13487 /* Otherwise, we need GENERAL_REGS. */
13488 return GENERAL_REGS;
13489 }
13490
13491 /* Debug version of rs6000_secondary_reload_class. */
13492 static enum reg_class
13493 rs6000_debug_secondary_reload_class (enum reg_class rclass,
13494 machine_mode mode, rtx in)
13495 {
13496 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
13497 fprintf (stderr,
13498 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
13499 "mode = %s, input rtx:\n",
13500 reg_class_names[ret], reg_class_names[rclass],
13501 GET_MODE_NAME (mode));
13502 debug_rtx (in);
13503
13504 return ret;
13505 }
13506
13507 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
13508
13509 static bool
13510 rs6000_can_change_mode_class (machine_mode from,
13511 machine_mode to,
13512 reg_class_t rclass)
13513 {
13514 unsigned from_size = GET_MODE_SIZE (from);
13515 unsigned to_size = GET_MODE_SIZE (to);
13516
13517 if (from_size != to_size)
13518 {
13519 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
13520
13521 if (reg_classes_intersect_p (xclass, rclass))
13522 {
13523 unsigned to_nregs = hard_regno_nregs (FIRST_FPR_REGNO, to);
13524 unsigned from_nregs = hard_regno_nregs (FIRST_FPR_REGNO, from);
13525 bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
13526 bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
13527
13528 /* Don't allow 64-bit types to overlap with 128-bit types that take a
13529 single register under VSX because the scalar part of the register
13530 is in the upper 64-bits, and not the lower 64-bits. Types like
13531 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
13532 IEEE floating point can't overlap, and neither can small
13533 values. */
13534
13535 if (to_float128_vector_p && from_float128_vector_p)
13536 return true;
13537
13538 else if (to_float128_vector_p || from_float128_vector_p)
13539 return false;
13540
13541 /* TDmode in floating-mode registers must always go into a register
13542 pair with the most significant word in the even-numbered register
13543 to match ISA requirements. In little-endian mode, this does not
13544 match subreg numbering, so we cannot allow subregs. */
13545 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
13546 return false;
13547
13548 /* Allow SD<->DD changes, since SDmode values are stored in
13549 the low half of the DDmode, just like target-independent
13550 code expects. We need to allow at least SD->DD since
13551 rs6000_secondary_memory_needed_mode asks for that change
13552 to be made for SD reloads. */
13553 if ((to == DDmode && from == SDmode)
13554 || (to == SDmode && from == DDmode))
13555 return true;
13556
13557 if (from_size < 8 || to_size < 8)
13558 return false;
13559
13560 if (from_size == 8 && (8 * to_nregs) != to_size)
13561 return false;
13562
13563 if (to_size == 8 && (8 * from_nregs) != from_size)
13564 return false;
13565
13566 return true;
13567 }
13568 else
13569 return true;
13570 }
13571
13572 /* Since the VSX register set includes traditional floating point registers
13573 and altivec registers, just check for the size being different instead of
13574 trying to check whether the modes are vector modes. Otherwise it won't
13575 allow say DF and DI to change classes. For types like TFmode and TDmode
13576 that take 2 64-bit registers, rather than a single 128-bit register, don't
13577 allow subregs of those types to other 128 bit types. */
13578 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
13579 {
13580 unsigned num_regs = (from_size + 15) / 16;
13581 if (hard_regno_nregs (FIRST_FPR_REGNO, to) > num_regs
13582 || hard_regno_nregs (FIRST_FPR_REGNO, from) > num_regs)
13583 return false;
13584
13585 return (from_size == 8 || from_size == 16);
13586 }
13587
13588 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
13589 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
13590 return false;
13591
13592 return true;
13593 }
13594
13595 /* Debug version of rs6000_can_change_mode_class. */
13596 static bool
13597 rs6000_debug_can_change_mode_class (machine_mode from,
13598 machine_mode to,
13599 reg_class_t rclass)
13600 {
13601 bool ret = rs6000_can_change_mode_class (from, to, rclass);
13602
13603 fprintf (stderr,
13604 "rs6000_can_change_mode_class, return %s, from = %s, "
13605 "to = %s, rclass = %s\n",
13606 ret ? "true" : "false",
13607 GET_MODE_NAME (from), GET_MODE_NAME (to),
13608 reg_class_names[rclass]);
13609
13610 return ret;
13611 }
13612 \f
13613 /* Return a string to do a move operation of 128 bits of data. */
13614
13615 const char *
13616 rs6000_output_move_128bit (rtx operands[])
13617 {
13618 rtx dest = operands[0];
13619 rtx src = operands[1];
13620 machine_mode mode = GET_MODE (dest);
13621 int dest_regno;
13622 int src_regno;
13623 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
13624 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
13625
13626 if (REG_P (dest))
13627 {
13628 dest_regno = REGNO (dest);
13629 dest_gpr_p = INT_REGNO_P (dest_regno);
13630 dest_fp_p = FP_REGNO_P (dest_regno);
13631 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
13632 dest_vsx_p = dest_fp_p | dest_vmx_p;
13633 }
13634 else
13635 {
13636 dest_regno = -1;
13637 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
13638 }
13639
13640 if (REG_P (src))
13641 {
13642 src_regno = REGNO (src);
13643 src_gpr_p = INT_REGNO_P (src_regno);
13644 src_fp_p = FP_REGNO_P (src_regno);
13645 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
13646 src_vsx_p = src_fp_p | src_vmx_p;
13647 }
13648 else
13649 {
13650 src_regno = -1;
13651 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
13652 }
13653
13654 /* Register moves. */
13655 if (dest_regno >= 0 && src_regno >= 0)
13656 {
13657 if (dest_gpr_p)
13658 {
13659 if (src_gpr_p)
13660 return "#";
13661
13662 if (TARGET_DIRECT_MOVE_128 && src_vsx_p)
13663 return (WORDS_BIG_ENDIAN
13664 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
13665 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
13666
13667 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
13668 return "#";
13669 }
13670
13671 else if (TARGET_VSX && dest_vsx_p)
13672 {
13673 if (src_vsx_p)
13674 return "xxlor %x0,%x1,%x1";
13675
13676 else if (TARGET_DIRECT_MOVE_128 && src_gpr_p)
13677 return (WORDS_BIG_ENDIAN
13678 ? "mtvsrdd %x0,%1,%L1"
13679 : "mtvsrdd %x0,%L1,%1");
13680
13681 else if (TARGET_DIRECT_MOVE && src_gpr_p)
13682 return "#";
13683 }
13684
13685 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
13686 return "vor %0,%1,%1";
13687
13688 else if (dest_fp_p && src_fp_p)
13689 return "#";
13690 }
13691
13692 /* Loads. */
13693 else if (dest_regno >= 0 && MEM_P (src))
13694 {
13695 if (dest_gpr_p)
13696 {
13697 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
13698 return "lq %0,%1";
13699 else
13700 return "#";
13701 }
13702
13703 else if (TARGET_ALTIVEC && dest_vmx_p
13704 && altivec_indexed_or_indirect_operand (src, mode))
13705 return "lvx %0,%y1";
13706
13707 else if (TARGET_VSX && dest_vsx_p)
13708 {
13709 if (mode_supports_dq_form (mode)
13710 && quad_address_p (XEXP (src, 0), mode, true))
13711 return "lxv %x0,%1";
13712
13713 else if (TARGET_P9_VECTOR)
13714 return "lxvx %x0,%y1";
13715
13716 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
13717 return "lxvw4x %x0,%y1";
13718
13719 else
13720 return "lxvd2x %x0,%y1";
13721 }
13722
13723 else if (TARGET_ALTIVEC && dest_vmx_p)
13724 return "lvx %0,%y1";
13725
13726 else if (dest_fp_p)
13727 return "#";
13728 }
13729
13730 /* Stores. */
13731 else if (src_regno >= 0 && MEM_P (dest))
13732 {
13733 if (src_gpr_p)
13734 {
13735 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
13736 return "stq %1,%0";
13737 else
13738 return "#";
13739 }
13740
13741 else if (TARGET_ALTIVEC && src_vmx_p
13742 && altivec_indexed_or_indirect_operand (dest, mode))
13743 return "stvx %1,%y0";
13744
13745 else if (TARGET_VSX && src_vsx_p)
13746 {
13747 if (mode_supports_dq_form (mode)
13748 && quad_address_p (XEXP (dest, 0), mode, true))
13749 return "stxv %x1,%0";
13750
13751 else if (TARGET_P9_VECTOR)
13752 return "stxvx %x1,%y0";
13753
13754 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
13755 return "stxvw4x %x1,%y0";
13756
13757 else
13758 return "stxvd2x %x1,%y0";
13759 }
13760
13761 else if (TARGET_ALTIVEC && src_vmx_p)
13762 return "stvx %1,%y0";
13763
13764 else if (src_fp_p)
13765 return "#";
13766 }
13767
13768 /* Constants. */
13769 else if (dest_regno >= 0
13770 && (CONST_INT_P (src)
13771 || CONST_WIDE_INT_P (src)
13772 || CONST_DOUBLE_P (src)
13773 || GET_CODE (src) == CONST_VECTOR))
13774 {
13775 if (dest_gpr_p)
13776 return "#";
13777
13778 else if ((dest_vmx_p && TARGET_ALTIVEC)
13779 || (dest_vsx_p && TARGET_VSX))
13780 return output_vec_const_move (operands);
13781 }
13782
13783 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
13784 }
13785
13786 /* Validate a 128-bit move. */
13787 bool
13788 rs6000_move_128bit_ok_p (rtx operands[])
13789 {
13790 machine_mode mode = GET_MODE (operands[0]);
13791 return (gpc_reg_operand (operands[0], mode)
13792 || gpc_reg_operand (operands[1], mode));
13793 }
13794
13795 /* Return true if a 128-bit move needs to be split. */
13796 bool
13797 rs6000_split_128bit_ok_p (rtx operands[])
13798 {
13799 if (!reload_completed)
13800 return false;
13801
13802 if (!gpr_or_gpr_p (operands[0], operands[1]))
13803 return false;
13804
13805 if (quad_load_store_p (operands[0], operands[1]))
13806 return false;
13807
13808 return true;
13809 }
13810
13811 \f
13812 /* Given a comparison operation, return the bit number in CCR to test. We
13813 know this is a valid comparison.
13814
13815 SCC_P is 1 if this is for an scc. That means that %D will have been
13816 used instead of %C, so the bits will be in different places.
13817
13818 Return -1 if OP isn't a valid comparison for some reason. */
13819
13820 int
13821 ccr_bit (rtx op, int scc_p)
13822 {
13823 enum rtx_code code = GET_CODE (op);
13824 machine_mode cc_mode;
13825 int cc_regnum;
13826 int base_bit;
13827 rtx reg;
13828
13829 if (!COMPARISON_P (op))
13830 return -1;
13831
13832 reg = XEXP (op, 0);
13833
13834 if (!REG_P (reg) || !CR_REGNO_P (REGNO (reg)))
13835 return -1;
13836
13837 cc_mode = GET_MODE (reg);
13838 cc_regnum = REGNO (reg);
13839 base_bit = 4 * (cc_regnum - CR0_REGNO);
13840
13841 validate_condition_mode (code, cc_mode);
13842
13843 /* When generating a sCOND operation, only positive conditions are
13844 allowed. */
13845 if (scc_p)
13846 switch (code)
13847 {
13848 case EQ:
13849 case GT:
13850 case LT:
13851 case UNORDERED:
13852 case GTU:
13853 case LTU:
13854 break;
13855 default:
13856 return -1;
13857 }
13858
13859 switch (code)
13860 {
13861 case NE:
13862 return scc_p ? base_bit + 3 : base_bit + 2;
13863 case EQ:
13864 return base_bit + 2;
13865 case GT: case GTU: case UNLE:
13866 return base_bit + 1;
13867 case LT: case LTU: case UNGE:
13868 return base_bit;
13869 case ORDERED: case UNORDERED:
13870 return base_bit + 3;
13871
13872 case GE: case GEU:
13873 /* If scc, we will have done a cror to put the bit in the
13874 unordered position. So test that bit. For integer, this is ! LT
13875 unless this is an scc insn. */
13876 return scc_p ? base_bit + 3 : base_bit;
13877
13878 case LE: case LEU:
13879 return scc_p ? base_bit + 3 : base_bit + 1;
13880
13881 default:
13882 return -1;
13883 }
13884 }
13885 \f
13886 /* Return the GOT register. */
13887
13888 rtx
13889 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
13890 {
13891 /* The second flow pass currently (June 1999) can't update
13892 regs_ever_live without disturbing other parts of the compiler, so
13893 update it here to make the prolog/epilogue code happy. */
13894 if (!can_create_pseudo_p ()
13895 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
13896 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
13897
13898 crtl->uses_pic_offset_table = 1;
13899
13900 return pic_offset_table_rtx;
13901 }
13902 \f
13903 #define INT_P(X) (CONST_INT_P (X) && GET_MODE (X) == VOIDmode)
13904
13905 /* Write out a function code label. */
13906
13907 void
13908 rs6000_output_function_entry (FILE *file, const char *fname)
13909 {
13910 if (fname[0] != '.')
13911 {
13912 switch (DEFAULT_ABI)
13913 {
13914 default:
13915 gcc_unreachable ();
13916
13917 case ABI_AIX:
13918 if (DOT_SYMBOLS)
13919 putc ('.', file);
13920 else
13921 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
13922 break;
13923
13924 case ABI_ELFv2:
13925 case ABI_V4:
13926 case ABI_DARWIN:
13927 break;
13928 }
13929 }
13930
13931 RS6000_OUTPUT_BASENAME (file, fname);
13932 }
13933
13934 /* Print an operand. Recognize special options, documented below. */
13935
13936 #if TARGET_ELF
13937 /* Access to .sdata2 through r2 (see -msdata=eabi in invoke.texi) is
13938 only introduced by the linker, when applying the sda21
13939 relocation. */
13940 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
13941 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
13942 #else
13943 #define SMALL_DATA_RELOC "sda21"
13944 #define SMALL_DATA_REG 0
13945 #endif
13946
13947 void
13948 print_operand (FILE *file, rtx x, int code)
13949 {
13950 int i;
13951 unsigned HOST_WIDE_INT uval;
13952
13953 switch (code)
13954 {
13955 /* %a is output_address. */
13956
13957 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
13958 output_operand. */
13959
13960 case 'A':
13961 /* Write the MMA accumulator number associated with VSX register X. On
13962 dense math systems, only allow DMR accumulators, not accumulators
13963 overlapping with the FPR registers. */
13964 if (!REG_P (x))
13965 output_operand_lossage ("invalid %%A value");
13966 else if (TARGET_DENSE_MATH)
13967 {
13968 if (DMR_REGNO_P (REGNO (x)))
13969 fprintf (file, "%d", REGNO (x) - FIRST_DMR_REGNO);
13970 else
13971 output_operand_lossage ("%%A operand is not a DMR");
13972 }
13973 else if (!FP_REGNO_P (REGNO (x)) || (REGNO (x) % 4) != 0)
13974 output_operand_lossage ("invalid %%A value");
13975 else
13976 fprintf (file, "%d", (REGNO (x) - FIRST_FPR_REGNO) / 4);
13977 return;
13978
13979 case 'D':
13980 /* Like 'J' but get to the GT bit only. */
13981 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13982 {
13983 output_operand_lossage ("invalid %%D value");
13984 return;
13985 }
13986
13987 /* Bit 1 is GT bit. */
13988 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
13989
13990 /* Add one for shift count in rlinm for scc. */
13991 fprintf (file, "%d", i + 1);
13992 return;
13993
13994 case 'e':
13995 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
13996 if (! INT_P (x))
13997 {
13998 output_operand_lossage ("invalid %%e value");
13999 return;
14000 }
14001
14002 uval = INTVAL (x);
14003 if ((uval & 0xffff) == 0 && uval != 0)
14004 putc ('s', file);
14005 return;
14006
14007 case 'E':
14008 /* X is a CR register. Print the number of the EQ bit of the CR */
14009 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
14010 output_operand_lossage ("invalid %%E value");
14011 else
14012 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
14013 return;
14014
14015 case 'f':
14016 /* X is a CR register. Print the shift count needed to move it
14017 to the high-order four bits. */
14018 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
14019 output_operand_lossage ("invalid %%f value");
14020 else
14021 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
14022 return;
14023
14024 case 'F':
14025 /* Similar, but print the count for the rotate in the opposite
14026 direction. */
14027 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
14028 output_operand_lossage ("invalid %%F value");
14029 else
14030 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
14031 return;
14032
14033 case 'G':
14034 /* X is a constant integer. If it is negative, print "m",
14035 otherwise print "z". This is to make an aze or ame insn. */
14036 if (!CONST_INT_P (x))
14037 output_operand_lossage ("invalid %%G value");
14038 else if (INTVAL (x) >= 0)
14039 putc ('z', file);
14040 else
14041 putc ('m', file);
14042 return;
14043
14044 case 'h':
14045 /* If constant, output low-order five bits. Otherwise, write
14046 normally. */
14047 if (INT_P (x))
14048 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
14049 else
14050 print_operand (file, x, 0);
14051 return;
14052
14053 case 'H':
14054 /* If constant, output low-order six bits. Otherwise, write
14055 normally. */
14056 if (INT_P (x))
14057 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
14058 else
14059 print_operand (file, x, 0);
14060 return;
14061
14062 case 'I':
14063 /* Print `i' if this is a constant, else nothing. */
14064 if (INT_P (x))
14065 putc ('i', file);
14066 return;
14067
14068 case 'j':
14069 /* Write the bit number in CCR for jump. */
14070 i = ccr_bit (x, 0);
14071 if (i == -1)
14072 output_operand_lossage ("invalid %%j code");
14073 else
14074 fprintf (file, "%d", i);
14075 return;
14076
14077 case 'J':
14078 /* Similar, but add one for shift count in rlinm for scc and pass
14079 scc flag to `ccr_bit'. */
14080 i = ccr_bit (x, 1);
14081 if (i == -1)
14082 output_operand_lossage ("invalid %%J code");
14083 else
14084 /* If we want bit 31, write a shift count of zero, not 32. */
14085 fprintf (file, "%d", i == 31 ? 0 : i + 1);
14086 return;
14087
14088 case 'k':
14089 /* X must be a constant. Write the 1's complement of the
14090 constant. */
14091 if (! INT_P (x))
14092 output_operand_lossage ("invalid %%k value");
14093 else
14094 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
14095 return;
14096
14097 case 'K':
14098 /* X must be a symbolic constant on ELF. Write an
14099 expression suitable for an 'addi' that adds in the low 16
14100 bits of the MEM. */
14101 if (GET_CODE (x) == CONST)
14102 {
14103 if (GET_CODE (XEXP (x, 0)) != PLUS
14104 || (!SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
14105 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
14106 || !CONST_INT_P (XEXP (XEXP (x, 0), 1)))
14107 output_operand_lossage ("invalid %%K value");
14108 }
14109 print_operand_address (file, x);
14110 fputs ("@l", file);
14111 return;
14112
14113 /* %l is output_asm_label. */
14114
14115 case 'L':
14116 /* Write second word of DImode or DFmode reference. Works on register
14117 or non-indexed memory only. */
14118 if (REG_P (x))
14119 fputs (reg_names[REGNO (x) + 1], file);
14120 else if (MEM_P (x))
14121 {
14122 machine_mode mode = GET_MODE (x);
14123 /* Handle possible auto-increment. Since it is pre-increment and
14124 we have already done it, we can just use an offset of word. */
14125 if (GET_CODE (XEXP (x, 0)) == PRE_INC
14126 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
14127 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
14128 UNITS_PER_WORD));
14129 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
14130 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
14131 UNITS_PER_WORD));
14132 else
14133 output_address (mode, XEXP (adjust_address_nv (x, SImode,
14134 UNITS_PER_WORD),
14135 0));
14136
14137 if (small_data_operand (x, GET_MODE (x)))
14138 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
14139 reg_names[SMALL_DATA_REG]);
14140 }
14141 return;
14142
14143 case 'N': /* Unused */
14144 /* Write the number of elements in the vector times 4. */
14145 if (GET_CODE (x) != PARALLEL)
14146 output_operand_lossage ("invalid %%N value");
14147 else
14148 fprintf (file, "%d", XVECLEN (x, 0) * 4);
14149 return;
14150
14151 case 'O': /* Unused */
14152 /* Similar, but subtract 1 first. */
14153 if (GET_CODE (x) != PARALLEL)
14154 output_operand_lossage ("invalid %%O value");
14155 else
14156 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
14157 return;
14158
14159 case 'p':
14160 /* X is a CONST_INT that is a power of two. Output the logarithm. */
14161 if (! INT_P (x)
14162 || INTVAL (x) < 0
14163 || (i = exact_log2 (INTVAL (x))) < 0)
14164 output_operand_lossage ("invalid %%p value");
14165 else
14166 fprintf (file, "%d", i);
14167 return;
14168
14169 case 'P':
14170 /* The operand must be an indirect memory reference. The result
14171 is the register name. */
14172 if (!MEM_P (x) || !REG_P (XEXP (x, 0))
14173 || REGNO (XEXP (x, 0)) >= 32)
14174 output_operand_lossage ("invalid %%P value");
14175 else
14176 fputs (reg_names[REGNO (XEXP (x, 0))], file);
14177 return;
14178
14179 case 'q':
14180 /* This outputs the logical code corresponding to a boolean
14181 expression. The expression may have one or both operands
14182 negated (if one, only the first one). For condition register
14183 logical operations, it will also treat the negated
14184 CR codes as NOTs, but not handle NOTs of them. */
14185 {
14186 const char *const *t = 0;
14187 const char *s;
14188 enum rtx_code code = GET_CODE (x);
14189 static const char * const tbl[3][3] = {
14190 { "and", "andc", "nor" },
14191 { "or", "orc", "nand" },
14192 { "xor", "eqv", "xor" } };
14193
14194 if (code == AND)
14195 t = tbl[0];
14196 else if (code == IOR)
14197 t = tbl[1];
14198 else if (code == XOR)
14199 t = tbl[2];
14200 else
14201 output_operand_lossage ("invalid %%q value");
14202
14203 if (GET_CODE (XEXP (x, 0)) != NOT)
14204 s = t[0];
14205 else
14206 {
14207 if (GET_CODE (XEXP (x, 1)) == NOT)
14208 s = t[2];
14209 else
14210 s = t[1];
14211 }
14212
14213 fputs (s, file);
14214 }
14215 return;
14216
14217 case 'Q':
14218 if (! TARGET_MFCRF)
14219 return;
14220 fputc (',', file);
14221 /* FALLTHRU */
14222
14223 case 'R':
14224 /* X is a CR register. Print the mask for `mtcrf'. */
14225 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
14226 output_operand_lossage ("invalid %%R value");
14227 else
14228 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
14229 return;
14230
14231 case 's':
14232 /* Low 5 bits of 32 - value */
14233 if (! INT_P (x))
14234 output_operand_lossage ("invalid %%s value");
14235 else
14236 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
14237 return;
14238
14239 case 't':
14240 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
14241 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
14242 {
14243 output_operand_lossage ("invalid %%t value");
14244 return;
14245 }
14246
14247 /* Bit 3 is OV bit. */
14248 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
14249
14250 /* If we want bit 31, write a shift count of zero, not 32. */
14251 fprintf (file, "%d", i == 31 ? 0 : i + 1);
14252 return;
14253
14254 case 'T':
14255 /* Print the symbolic name of a branch target register. */
14256 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
14257 x = XVECEXP (x, 0, 0);
14258 if (!REG_P (x) || (REGNO (x) != LR_REGNO
14259 && REGNO (x) != CTR_REGNO))
14260 output_operand_lossage ("invalid %%T value");
14261 else if (REGNO (x) == LR_REGNO)
14262 fputs ("lr", file);
14263 else
14264 fputs ("ctr", file);
14265 return;
14266
14267 case 'u':
14268 /* High-order or low-order 16 bits of constant, whichever is non-zero,
14269 for use in unsigned operand. */
14270 if (! INT_P (x))
14271 {
14272 output_operand_lossage ("invalid %%u value");
14273 return;
14274 }
14275
14276 uval = INTVAL (x);
14277 if ((uval & 0xffff) == 0)
14278 uval >>= 16;
14279
14280 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
14281 return;
14282
14283 case 'v':
14284 /* High-order 16 bits of constant for use in signed operand. */
14285 if (! INT_P (x))
14286 output_operand_lossage ("invalid %%v value");
14287 else
14288 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
14289 (INTVAL (x) >> 16) & 0xffff);
14290 return;
14291
14292 case 'U':
14293 /* Print `u' if this has an auto-increment or auto-decrement. */
14294 if (MEM_P (x)
14295 && (GET_CODE (XEXP (x, 0)) == PRE_INC
14296 || GET_CODE (XEXP (x, 0)) == PRE_DEC
14297 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
14298 putc ('u', file);
14299 return;
14300
14301 case 'V':
14302 /* Print the trap code for this operand. */
14303 switch (GET_CODE (x))
14304 {
14305 case EQ:
14306 fputs ("eq", file); /* 4 */
14307 break;
14308 case NE:
14309 fputs ("ne", file); /* 24 */
14310 break;
14311 case LT:
14312 fputs ("lt", file); /* 16 */
14313 break;
14314 case LE:
14315 fputs ("le", file); /* 20 */
14316 break;
14317 case GT:
14318 fputs ("gt", file); /* 8 */
14319 break;
14320 case GE:
14321 fputs ("ge", file); /* 12 */
14322 break;
14323 case LTU:
14324 fputs ("llt", file); /* 2 */
14325 break;
14326 case LEU:
14327 fputs ("lle", file); /* 6 */
14328 break;
14329 case GTU:
14330 fputs ("lgt", file); /* 1 */
14331 break;
14332 case GEU:
14333 fputs ("lge", file); /* 5 */
14334 break;
14335 default:
14336 output_operand_lossage ("invalid %%V value");
14337 }
14338 break;
14339
14340 case 'w':
14341 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
14342 normally. */
14343 if (INT_P (x))
14344 fprintf (file, HOST_WIDE_INT_PRINT_DEC, sext_hwi (INTVAL (x), 16));
14345 else
14346 print_operand (file, x, 0);
14347 return;
14348
14349 case 'x':
14350 /* X is a FPR or Altivec register used in a VSX context. */
14351 if (!REG_P (x) || !VSX_REGNO_P (REGNO (x)))
14352 output_operand_lossage ("invalid %%x value");
14353 else
14354 {
14355 int reg = REGNO (x);
14356 int vsx_reg = (FP_REGNO_P (reg)
14357 ? reg - 32
14358 : reg - FIRST_ALTIVEC_REGNO + 32);
14359
14360 #ifdef TARGET_REGNAMES
14361 if (TARGET_REGNAMES)
14362 fprintf (file, "%%vs%d", vsx_reg);
14363 else
14364 #endif
14365 fprintf (file, "%d", vsx_reg);
14366 }
14367 return;
14368
14369 case 'X':
14370 if (MEM_P (x)
14371 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
14372 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
14373 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
14374 putc ('x', file);
14375 return;
14376
14377 case 'Y':
14378 /* Like 'L', for third word of TImode/PTImode */
14379 if (REG_P (x))
14380 fputs (reg_names[REGNO (x) + 2], file);
14381 else if (MEM_P (x))
14382 {
14383 machine_mode mode = GET_MODE (x);
14384 if (GET_CODE (XEXP (x, 0)) == PRE_INC
14385 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
14386 output_address (mode, plus_constant (Pmode,
14387 XEXP (XEXP (x, 0), 0), 8));
14388 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
14389 output_address (mode, plus_constant (Pmode,
14390 XEXP (XEXP (x, 0), 0), 8));
14391 else
14392 output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0));
14393 if (small_data_operand (x, GET_MODE (x)))
14394 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
14395 reg_names[SMALL_DATA_REG]);
14396 }
14397 return;
14398
14399 case 'z':
14400 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
14401 x = XVECEXP (x, 0, 1);
14402 /* X is a SYMBOL_REF. Write out the name preceded by a
14403 period and without any trailing data in brackets. Used for function
14404 names. If we are configured for System V (or the embedded ABI) on
14405 the PowerPC, do not emit the period, since those systems do not use
14406 TOCs and the like. */
14407 if (!SYMBOL_REF_P (x))
14408 {
14409 output_operand_lossage ("invalid %%z value");
14410 return;
14411 }
14412
14413 /* For macho, check to see if we need a stub. */
14414 if (TARGET_MACHO)
14415 {
14416 const char *name = XSTR (x, 0);
14417 #if TARGET_MACHO
14418 if (darwin_symbol_stubs
14419 && MACHOPIC_INDIRECT
14420 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14421 name = machopic_indirection_name (x, /*stub_p=*/true);
14422 #endif
14423 assemble_name (file, name);
14424 }
14425 else if (!DOT_SYMBOLS)
14426 assemble_name (file, XSTR (x, 0));
14427 else
14428 rs6000_output_function_entry (file, XSTR (x, 0));
14429 return;
14430
14431 case 'Z':
14432 /* Like 'L', for last word of TImode/PTImode. */
14433 if (REG_P (x))
14434 fputs (reg_names[REGNO (x) + 3], file);
14435 else if (MEM_P (x))
14436 {
14437 machine_mode mode = GET_MODE (x);
14438 if (GET_CODE (XEXP (x, 0)) == PRE_INC
14439 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
14440 output_address (mode, plus_constant (Pmode,
14441 XEXP (XEXP (x, 0), 0), 12));
14442 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
14443 output_address (mode, plus_constant (Pmode,
14444 XEXP (XEXP (x, 0), 0), 12));
14445 else
14446 output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0));
14447 if (small_data_operand (x, GET_MODE (x)))
14448 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
14449 reg_names[SMALL_DATA_REG]);
14450 }
14451 return;
14452
14453 /* Print AltiVec memory operand. */
14454 case 'y':
14455 {
14456 rtx tmp;
14457
14458 gcc_assert (MEM_P (x));
14459
14460 tmp = XEXP (x, 0);
14461
14462 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (GET_MODE (x))
14463 && GET_CODE (tmp) == AND
14464 && CONST_INT_P (XEXP (tmp, 1))
14465 && INTVAL (XEXP (tmp, 1)) == -16)
14466 tmp = XEXP (tmp, 0);
14467 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
14468 && GET_CODE (tmp) == PRE_MODIFY)
14469 tmp = XEXP (tmp, 1);
14470 if (REG_P (tmp))
14471 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
14472 else
14473 {
14474 if (GET_CODE (tmp) != PLUS
14475 || !REG_P (XEXP (tmp, 0))
14476 || !REG_P (XEXP (tmp, 1)))
14477 {
14478 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
14479 break;
14480 }
14481
14482 if (REGNO (XEXP (tmp, 0)) == 0)
14483 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
14484 reg_names[ REGNO (XEXP (tmp, 0)) ]);
14485 else
14486 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
14487 reg_names[ REGNO (XEXP (tmp, 1)) ]);
14488 }
14489 break;
14490 }
14491
14492 case 0:
14493 if (REG_P (x))
14494 fprintf (file, "%s", reg_names[REGNO (x)]);
14495 else if (MEM_P (x))
14496 {
14497 /* We need to handle PRE_INC and PRE_DEC here, since we need to
14498 know the width from the mode. */
14499 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
14500 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
14501 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
14502 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
14503 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
14504 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
14505 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
14506 output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
14507 else
14508 output_address (GET_MODE (x), XEXP (x, 0));
14509 }
14510 else if (toc_relative_expr_p (x, false,
14511 &tocrel_base_oac, &tocrel_offset_oac))
14512 /* This hack along with a corresponding hack in
14513 rs6000_output_addr_const_extra arranges to output addends
14514 where the assembler expects to find them. eg.
14515 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
14516 without this hack would be output as "x@toc+4". We
14517 want "x+4@toc". */
14518 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
14519 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
14520 output_addr_const (file, XVECEXP (x, 0, 0));
14521 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
14522 output_addr_const (file, XVECEXP (x, 0, 1));
14523 else
14524 output_addr_const (file, x);
14525 return;
14526
14527 case '&':
14528 if (const char *name = get_some_local_dynamic_name ())
14529 assemble_name (file, name);
14530 else
14531 output_operand_lossage ("'%%&' used without any "
14532 "local dynamic TLS references");
14533 return;
14534
14535 default:
14536 output_operand_lossage ("invalid %%xn code");
14537 }
14538 }
14539 \f
14540 /* Print the address of an operand. */
14541
14542 void
14543 print_operand_address (FILE *file, rtx x)
14544 {
14545 if (REG_P (x))
14546 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
14547
14548 /* Is it a PC-relative address? */
14549 else if (TARGET_PCREL && pcrel_local_or_external_address (x, VOIDmode))
14550 {
14551 HOST_WIDE_INT offset;
14552
14553 if (GET_CODE (x) == CONST)
14554 x = XEXP (x, 0);
14555
14556 if (GET_CODE (x) == PLUS)
14557 {
14558 offset = INTVAL (XEXP (x, 1));
14559 x = XEXP (x, 0);
14560 }
14561 else
14562 offset = 0;
14563
14564 output_addr_const (file, x);
14565
14566 if (offset)
14567 fprintf (file, "%+" PRId64, offset);
14568
14569 if (SYMBOL_REF_P (x) && !SYMBOL_REF_LOCAL_P (x))
14570 fprintf (file, "@got");
14571
14572 fprintf (file, "@pcrel");
14573 }
14574 else if (SYMBOL_REF_P (x) || GET_CODE (x) == CONST
14575 || GET_CODE (x) == LABEL_REF)
14576 {
14577 output_addr_const (file, x);
14578 if (small_data_operand (x, GET_MODE (x)))
14579 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
14580 reg_names[SMALL_DATA_REG]);
14581 else
14582 gcc_assert (!TARGET_TOC);
14583 }
14584 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
14585 && REG_P (XEXP (x, 1)))
14586 {
14587 if (REGNO (XEXP (x, 0)) == 0)
14588 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
14589 reg_names[ REGNO (XEXP (x, 0)) ]);
14590 else
14591 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
14592 reg_names[ REGNO (XEXP (x, 1)) ]);
14593 }
14594 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
14595 && CONST_INT_P (XEXP (x, 1)))
14596 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
14597 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
14598 #if TARGET_MACHO
14599 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
14600 && CONSTANT_P (XEXP (x, 1)))
14601 {
14602 fprintf (file, "lo16(");
14603 output_addr_const (file, XEXP (x, 1));
14604 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
14605 }
14606 #endif
14607 #if TARGET_ELF
14608 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
14609 && CONSTANT_P (XEXP (x, 1)))
14610 {
14611 output_addr_const (file, XEXP (x, 1));
14612 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
14613 }
14614 #endif
14615 else if (toc_relative_expr_p (x, false, &tocrel_base_oac, &tocrel_offset_oac))
14616 {
14617 /* This hack along with a corresponding hack in
14618 rs6000_output_addr_const_extra arranges to output addends
14619 where the assembler expects to find them. eg.
14620 (lo_sum (reg 9)
14621 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
14622 without this hack would be output as "x@toc+8@l(9)". We
14623 want "x+8@toc@l(9)". */
14624 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
14625 if (GET_CODE (x) == LO_SUM)
14626 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
14627 else
14628 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base_oac, 0, 1))]);
14629 }
14630 else
14631 output_addr_const (file, x);
14632 }
14633 \f
14634 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14635
14636 bool
14637 rs6000_output_addr_const_extra (FILE *file, rtx x)
14638 {
14639 if (GET_CODE (x) == UNSPEC)
14640 switch (XINT (x, 1))
14641 {
14642 case UNSPEC_TOCREL:
14643 gcc_checking_assert (SYMBOL_REF_P (XVECEXP (x, 0, 0))
14644 && REG_P (XVECEXP (x, 0, 1))
14645 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
14646 output_addr_const (file, XVECEXP (x, 0, 0));
14647 if (x == tocrel_base_oac && tocrel_offset_oac != const0_rtx)
14648 {
14649 if (INTVAL (tocrel_offset_oac) >= 0)
14650 fprintf (file, "+");
14651 output_addr_const (file, CONST_CAST_RTX (tocrel_offset_oac));
14652 }
14653 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
14654 {
14655 putc ('-', file);
14656 assemble_name (file, toc_label_name);
14657 need_toc_init = 1;
14658 }
14659 else if (TARGET_ELF)
14660 fputs ("@toc", file);
14661 return true;
14662
14663 #if TARGET_MACHO
14664 case UNSPEC_MACHOPIC_OFFSET:
14665 output_addr_const (file, XVECEXP (x, 0, 0));
14666 putc ('-', file);
14667 machopic_output_function_base_name (file);
14668 return true;
14669 #endif
14670 }
14671 return false;
14672 }
14673 \f
14674 /* Target hook for assembling integer objects. The PowerPC version has
14675 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
14676 is defined. It also needs to handle DI-mode objects on 64-bit
14677 targets. */
14678
14679 static bool
14680 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
14681 {
14682 #ifdef RELOCATABLE_NEEDS_FIXUP
14683 /* Special handling for SI values. */
14684 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
14685 {
14686 static int recurse = 0;
14687
14688 /* For -mrelocatable, we mark all addresses that need to be fixed up in
14689 the .fixup section. Since the TOC section is already relocated, we
14690 don't need to mark it here. We used to skip the text section, but it
14691 should never be valid for relocated addresses to be placed in the text
14692 section. */
14693 if (DEFAULT_ABI == ABI_V4
14694 && (TARGET_RELOCATABLE || flag_pic > 1)
14695 && in_section != toc_section
14696 && !recurse
14697 && !CONST_SCALAR_INT_P (x)
14698 && CONSTANT_P (x))
14699 {
14700 char buf[256];
14701
14702 recurse = 1;
14703 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
14704 fixuplabelno++;
14705 ASM_OUTPUT_LABEL (asm_out_file, buf);
14706 fprintf (asm_out_file, "\t.long\t(");
14707 output_addr_const (asm_out_file, x);
14708 fprintf (asm_out_file, ")@fixup\n");
14709 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
14710 ASM_OUTPUT_ALIGN (asm_out_file, 2);
14711 fprintf (asm_out_file, "\t.long\t");
14712 assemble_name (asm_out_file, buf);
14713 fprintf (asm_out_file, "\n\t.previous\n");
14714 recurse = 0;
14715 return true;
14716 }
14717 /* Remove initial .'s to turn a -mcall-aixdesc function
14718 address into the address of the descriptor, not the function
14719 itself. */
14720 else if (SYMBOL_REF_P (x)
14721 && XSTR (x, 0)[0] == '.'
14722 && DEFAULT_ABI == ABI_AIX)
14723 {
14724 const char *name = XSTR (x, 0);
14725 while (*name == '.')
14726 name++;
14727
14728 fprintf (asm_out_file, "\t.long\t%s\n", name);
14729 return true;
14730 }
14731 }
14732 #endif /* RELOCATABLE_NEEDS_FIXUP */
14733 return default_assemble_integer (x, size, aligned_p);
14734 }
14735
14736 /* Return a template string for assembly to emit when making an
14737 external call. FUNOP is the call mem argument operand number. */
14738
14739 static const char *
14740 rs6000_call_template_1 (rtx *operands, unsigned int funop, bool sibcall)
14741 {
14742 /* -Wformat-overflow workaround, without which gcc thinks that %u
14743 might produce 10 digits. */
14744 gcc_assert (funop <= MAX_RECOG_OPERANDS);
14745
14746 char arg[12];
14747 arg[0] = 0;
14748 if (GET_CODE (operands[funop + 1]) == UNSPEC)
14749 {
14750 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
14751 sprintf (arg, "(%%%u@tlsgd)", funop + 1);
14752 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
14753 sprintf (arg, "(%%&@tlsld)");
14754 }
14755
14756 /* The magic 32768 offset here corresponds to the offset of
14757 r30 in .got2, as given by LCTOC1. See sysv4.h:toc_section. */
14758 char z[11];
14759 sprintf (z, "%%z%u%s", funop,
14760 (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic == 2
14761 ? "+32768" : ""));
14762
14763 static char str[32]; /* 1 spare */
14764 if (rs6000_pcrel_p ())
14765 sprintf (str, "b%s %s@notoc%s", sibcall ? "" : "l", z, arg);
14766 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
14767 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
14768 sibcall ? "" : "\n\tnop");
14769 else if (DEFAULT_ABI == ABI_V4)
14770 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
14771 flag_pic ? "@plt" : "");
14772 #if TARGET_MACHO
14773 /* If/when we remove the mlongcall opt, we can share the AIX/ELGv2 case. */
14774 else if (DEFAULT_ABI == ABI_DARWIN)
14775 {
14776 /* The cookie is in operand func+2. */
14777 gcc_checking_assert (GET_CODE (operands[funop + 2]) == CONST_INT);
14778 int cookie = INTVAL (operands[funop + 2]);
14779 if (cookie & CALL_LONG)
14780 {
14781 tree funname = get_identifier (XSTR (operands[funop], 0));
14782 tree labelname = get_prev_label (funname);
14783 gcc_checking_assert (labelname && !sibcall);
14784
14785 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
14786 instruction will reach 'foo', otherwise link as 'bl L42'".
14787 "L42" should be a 'branch island', that will do a far jump to
14788 'foo'. Branch islands are generated in
14789 macho_branch_islands(). */
14790 sprintf (str, "jbsr %%z%u,%.10s", funop,
14791 IDENTIFIER_POINTER (labelname));
14792 }
14793 else
14794 /* Same as AIX or ELFv2, except to keep backwards compat, no nop
14795 after the call. */
14796 sprintf (str, "b%s %s%s", sibcall ? "" : "l", z, arg);
14797 }
14798 #endif
14799 else
14800 gcc_unreachable ();
14801 return str;
14802 }
14803
14804 const char *
14805 rs6000_call_template (rtx *operands, unsigned int funop)
14806 {
14807 return rs6000_call_template_1 (operands, funop, false);
14808 }
14809
14810 const char *
14811 rs6000_sibcall_template (rtx *operands, unsigned int funop)
14812 {
14813 return rs6000_call_template_1 (operands, funop, true);
14814 }
14815
14816 /* As above, for indirect calls. */
14817
14818 static const char *
14819 rs6000_indirect_call_template_1 (rtx *operands, unsigned int funop,
14820 bool sibcall)
14821 {
14822 /* -Wformat-overflow workaround, without which gcc thinks that %u
14823 might produce 10 digits. Note that -Wformat-overflow will not
14824 currently warn here for str[], so do not rely on a warning to
14825 ensure str[] is correctly sized. */
14826 gcc_assert (funop <= MAX_RECOG_OPERANDS);
14827
14828 /* Currently, funop is either 0 or 1. The maximum string is always
14829 a !speculate 64-bit __tls_get_addr call.
14830
14831 ABI_ELFv2, pcrel:
14832 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14833 . 35 .reloc .,R_PPC64_PLTSEQ_NOTOC,%z1\n\t
14834 . 9 crset 2\n\t
14835 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14836 . 36 .reloc .,R_PPC64_PLTCALL_NOTOC,%z1\n\t
14837 . 8 beq%T1l-
14838 .---
14839 .142
14840
14841 ABI_AIX:
14842 . 9 ld 2,%3\n\t
14843 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14844 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
14845 . 9 crset 2\n\t
14846 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14847 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
14848 . 10 beq%T1l-\n\t
14849 . 10 ld 2,%4(1)
14850 .---
14851 .151
14852
14853 ABI_ELFv2:
14854 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14855 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
14856 . 9 crset 2\n\t
14857 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14858 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
14859 . 10 beq%T1l-\n\t
14860 . 10 ld 2,%3(1)
14861 .---
14862 .142
14863
14864 ABI_V4:
14865 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14866 . 35 .reloc .,R_PPC64_PLTSEQ,%z1+32768\n\t
14867 . 9 crset 2\n\t
14868 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14869 . 36 .reloc .,R_PPC64_PLTCALL,%z1+32768\n\t
14870 . 8 beq%T1l-
14871 .---
14872 .141 */
14873 static char str[160]; /* 8 spare */
14874 char *s = str;
14875 const char *ptrload = TARGET_64BIT ? "d" : "wz";
14876
14877 if (DEFAULT_ABI == ABI_AIX)
14878 s += sprintf (s,
14879 "l%s 2,%%%u\n\t",
14880 ptrload, funop + 3);
14881
14882 /* We don't need the extra code to stop indirect call speculation if
14883 calling via LR. */
14884 bool speculate = (TARGET_MACHO
14885 || rs6000_speculate_indirect_jumps
14886 || (REG_P (operands[funop])
14887 && REGNO (operands[funop]) == LR_REGNO));
14888
14889 if (TARGET_PLTSEQ && GET_CODE (operands[funop]) == UNSPEC)
14890 {
14891 const char *rel64 = TARGET_64BIT ? "64" : "";
14892 char tls[29];
14893 tls[0] = 0;
14894 if (GET_CODE (operands[funop + 1]) == UNSPEC)
14895 {
14896 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
14897 sprintf (tls, ".reloc .,R_PPC%s_TLSGD,%%%u\n\t",
14898 rel64, funop + 1);
14899 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
14900 sprintf (tls, ".reloc .,R_PPC%s_TLSLD,%%&\n\t",
14901 rel64);
14902 }
14903
14904 const char *notoc = rs6000_pcrel_p () ? "_NOTOC" : "";
14905 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
14906 && flag_pic == 2 ? "+32768" : "");
14907 if (!speculate)
14908 {
14909 s += sprintf (s,
14910 "%s.reloc .,R_PPC%s_PLTSEQ%s,%%z%u%s\n\t",
14911 tls, rel64, notoc, funop, addend);
14912 s += sprintf (s, "crset 2\n\t");
14913 }
14914 s += sprintf (s,
14915 "%s.reloc .,R_PPC%s_PLTCALL%s,%%z%u%s\n\t",
14916 tls, rel64, notoc, funop, addend);
14917 }
14918 else if (!speculate)
14919 s += sprintf (s, "crset 2\n\t");
14920
14921 if (rs6000_pcrel_p ())
14922 {
14923 if (speculate)
14924 sprintf (s, "b%%T%ul", funop);
14925 else
14926 sprintf (s, "beq%%T%ul-", funop);
14927 }
14928 else if (DEFAULT_ABI == ABI_AIX)
14929 {
14930 if (speculate)
14931 sprintf (s,
14932 "b%%T%ul\n\t"
14933 "l%s 2,%%%u(1)",
14934 funop, ptrload, funop + 4);
14935 else
14936 sprintf (s,
14937 "beq%%T%ul-\n\t"
14938 "l%s 2,%%%u(1)",
14939 funop, ptrload, funop + 4);
14940 }
14941 else if (DEFAULT_ABI == ABI_ELFv2)
14942 {
14943 if (speculate)
14944 sprintf (s,
14945 "b%%T%ul\n\t"
14946 "l%s 2,%%%u(1)",
14947 funop, ptrload, funop + 3);
14948 else
14949 sprintf (s,
14950 "beq%%T%ul-\n\t"
14951 "l%s 2,%%%u(1)",
14952 funop, ptrload, funop + 3);
14953 }
14954 else
14955 {
14956 if (speculate)
14957 sprintf (s,
14958 "b%%T%u%s",
14959 funop, sibcall ? "" : "l");
14960 else
14961 sprintf (s,
14962 "beq%%T%u%s-%s",
14963 funop, sibcall ? "" : "l", sibcall ? "\n\tb $" : "");
14964 }
14965 return str;
14966 }
14967
14968 const char *
14969 rs6000_indirect_call_template (rtx *operands, unsigned int funop)
14970 {
14971 return rs6000_indirect_call_template_1 (operands, funop, false);
14972 }
14973
14974 const char *
14975 rs6000_indirect_sibcall_template (rtx *operands, unsigned int funop)
14976 {
14977 return rs6000_indirect_call_template_1 (operands, funop, true);
14978 }
14979
14980 #if HAVE_AS_PLTSEQ
14981 /* Output indirect call insns. WHICH identifies the type of sequence. */
14982 const char *
14983 rs6000_pltseq_template (rtx *operands, int which)
14984 {
14985 const char *rel64 = TARGET_64BIT ? "64" : "";
14986 char tls[30];
14987 tls[0] = 0;
14988 if (GET_CODE (operands[3]) == UNSPEC)
14989 {
14990 char off = which == RS6000_PLTSEQ_PLT_PCREL34 ? '8' : '4';
14991 if (XINT (operands[3], 1) == UNSPEC_TLSGD)
14992 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSGD,%%3\n\t",
14993 off, rel64);
14994 else if (XINT (operands[3], 1) == UNSPEC_TLSLD)
14995 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSLD,%%&\n\t",
14996 off, rel64);
14997 }
14998
14999 gcc_assert (DEFAULT_ABI == ABI_ELFv2 || DEFAULT_ABI == ABI_V4);
15000 static char str[96]; /* 10 spare */
15001 char off = WORDS_BIG_ENDIAN ? '2' : '4';
15002 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
15003 && flag_pic == 2 ? "+32768" : "");
15004 switch (which)
15005 {
15006 case RS6000_PLTSEQ_TOCSAVE:
15007 sprintf (str,
15008 "st%s\n\t"
15009 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2",
15010 TARGET_64BIT ? "d 2,24(1)" : "w 2,12(1)",
15011 tls, rel64);
15012 break;
15013 case RS6000_PLTSEQ_PLT16_HA:
15014 if (DEFAULT_ABI == ABI_V4 && !flag_pic)
15015 sprintf (str,
15016 "lis %%0,0\n\t"
15017 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2",
15018 tls, off, rel64);
15019 else
15020 sprintf (str,
15021 "addis %%0,%%1,0\n\t"
15022 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2%s",
15023 tls, off, rel64, addend);
15024 break;
15025 case RS6000_PLTSEQ_PLT16_LO:
15026 sprintf (str,
15027 "l%s %%0,0(%%1)\n\t"
15028 "%s.reloc .-%c,R_PPC%s_PLT16_LO%s,%%z2%s",
15029 TARGET_64BIT ? "d" : "wz",
15030 tls, off, rel64, TARGET_64BIT ? "_DS" : "", addend);
15031 break;
15032 case RS6000_PLTSEQ_MTCTR:
15033 sprintf (str,
15034 "mtctr %%1\n\t"
15035 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2%s",
15036 tls, rel64, addend);
15037 break;
15038 case RS6000_PLTSEQ_PLT_PCREL34:
15039 sprintf (str,
15040 "pl%s %%0,0(0),1\n\t"
15041 "%s.reloc .-8,R_PPC%s_PLT_PCREL34_NOTOC,%%z2",
15042 TARGET_64BIT ? "d" : "wz",
15043 tls, rel64);
15044 break;
15045 default:
15046 gcc_unreachable ();
15047 }
15048 return str;
15049 }
15050 #endif
15051 \f
15052 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
15053 /* Emit an assembler directive to set symbol visibility for DECL to
15054 VISIBILITY_TYPE. */
15055
15056 static void
15057 rs6000_assemble_visibility (tree decl, int vis)
15058 {
15059 if (TARGET_XCOFF)
15060 return;
15061
15062 /* Functions need to have their entry point symbol visibility set as
15063 well as their descriptor symbol visibility. */
15064 if (DEFAULT_ABI == ABI_AIX
15065 && DOT_SYMBOLS
15066 && TREE_CODE (decl) == FUNCTION_DECL)
15067 {
15068 static const char * const visibility_types[] = {
15069 NULL, "protected", "hidden", "internal"
15070 };
15071
15072 const char *name, *type;
15073
15074 name = ((* targetm.strip_name_encoding)
15075 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
15076 type = visibility_types[vis];
15077
15078 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
15079 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
15080 }
15081 else
15082 default_assemble_visibility (decl, vis);
15083 }
15084 #endif
15085 \f
15086 /* Write PATCH_AREA_SIZE NOPs into the asm outfile FILE around a function
15087 entry. If RECORD_P is true and the target supports named sections,
15088 the location of the NOPs will be recorded in a special object section
15089 called "__patchable_function_entries". This routine may be called
15090 twice per function to put NOPs before and after the function
15091 entry. */
15092
15093 void
15094 rs6000_print_patchable_function_entry (FILE *file,
15095 unsigned HOST_WIDE_INT patch_area_size,
15096 bool record_p)
15097 {
15098 bool global_entry_needed_p = rs6000_global_entry_point_prologue_needed_p ();
15099 /* For a function which needs global entry point, we will emit the
15100 patchable area before and after local entry point under the control of
15101 cfun->machine->global_entry_emitted, see the handling in function
15102 rs6000_output_function_prologue. */
15103 if (!global_entry_needed_p || cfun->machine->global_entry_emitted)
15104 default_print_patchable_function_entry (file, patch_area_size, record_p);
15105 }
15106 \f
15107 enum rtx_code
15108 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
15109 {
15110 /* Reversal of FP compares takes care -- an ordered compare
15111 becomes an unordered compare and vice versa. */
15112 if (mode == CCFPmode
15113 && (!flag_finite_math_only
15114 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
15115 || code == UNEQ || code == LTGT))
15116 return reverse_condition_maybe_unordered (code);
15117 else
15118 return reverse_condition (code);
15119 }
15120
15121 /* Check if C (as 64bit integer) can be rotated to a constant which constains
15122 nonzero bits at the LOWBITS low bits only.
15123
15124 Return true if C can be rotated to such constant. If so, *ROT is written
15125 to the number by which C is rotated.
15126 Return false otherwise. */
15127
15128 bool
15129 can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int *rot)
15130 {
15131 int clz = HOST_BITS_PER_WIDE_INT - lowbits;
15132
15133 /* case a. 0..0xxx: already at least clz zeros. */
15134 int lz = clz_hwi (c);
15135 if (lz >= clz)
15136 {
15137 *rot = 0;
15138 return true;
15139 }
15140
15141 /* case b. 0..0xxx0..0: at least clz zeros. */
15142 int tz = ctz_hwi (c);
15143 if (lz + tz >= clz)
15144 {
15145 *rot = HOST_BITS_PER_WIDE_INT - tz;
15146 return true;
15147 }
15148
15149 /* case c. xx10.....0xx: rotate 'clz - 1' bits first, then check case b.
15150 ^bit -> Vbit, , then zeros are at head or tail.
15151 00...00xxx100, 'clz - 1' >= 'bits of xxxx'. */
15152 const int rot_bits = lowbits + 1;
15153 unsigned HOST_WIDE_INT rc = (c >> rot_bits) | (c << (clz - 1));
15154 tz = ctz_hwi (rc);
15155 if (clz_hwi (rc) + tz >= clz)
15156 {
15157 *rot = HOST_BITS_PER_WIDE_INT - (tz + rot_bits);
15158 return true;
15159 }
15160
15161 return false;
15162 }
15163
15164 /* Check if C (as 64bit integer) can be rotated to a positive 16bits constant
15165 which contains 48bits leading zeros and 16bits of any value. */
15166
15167 bool
15168 can_be_rotated_to_positive_16bits (HOST_WIDE_INT c)
15169 {
15170 int rot = 0;
15171 bool res = can_be_rotated_to_lowbits (c, 16, &rot);
15172 return res && rot > 0;
15173 }
15174
15175 /* Check if C (as 64bit integer) can be rotated to a negative 15bits constant
15176 which contains 49bits leading ones and 15bits of any value. */
15177
15178 bool
15179 can_be_rotated_to_negative_15bits (HOST_WIDE_INT c)
15180 {
15181 int rot = 0;
15182 bool res = can_be_rotated_to_lowbits (~c, 15, &rot);
15183 return res && rot > 0;
15184 }
15185
15186 /* Generate a compare for CODE. Return a brand-new rtx that
15187 represents the result of the compare. */
15188
15189 static rtx
15190 rs6000_generate_compare (rtx cmp, machine_mode mode)
15191 {
15192 machine_mode comp_mode;
15193 rtx compare_result;
15194 enum rtx_code code = GET_CODE (cmp);
15195 rtx op0 = XEXP (cmp, 0);
15196 rtx op1 = XEXP (cmp, 1);
15197
15198 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
15199 comp_mode = CCmode;
15200 else if (FLOAT_MODE_P (mode))
15201 comp_mode = CCFPmode;
15202 else if (code == GTU || code == LTU
15203 || code == GEU || code == LEU)
15204 comp_mode = CCUNSmode;
15205 else if ((code == EQ || code == NE)
15206 && unsigned_reg_p (op0)
15207 && (unsigned_reg_p (op1)
15208 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
15209 /* These are unsigned values, perhaps there will be a later
15210 ordering compare that can be shared with this one. */
15211 comp_mode = CCUNSmode;
15212 else
15213 comp_mode = CCmode;
15214
15215 /* If we have an unsigned compare, make sure we don't have a signed value as
15216 an immediate. */
15217 if (comp_mode == CCUNSmode && CONST_INT_P (op1)
15218 && INTVAL (op1) < 0)
15219 {
15220 op0 = copy_rtx_if_shared (op0);
15221 op1 = force_reg (GET_MODE (op0), op1);
15222 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
15223 }
15224
15225 /* First, the compare. */
15226 compare_result = gen_reg_rtx (comp_mode);
15227
15228 /* IEEE 128-bit support in VSX registers when we do not have hardware
15229 support. */
15230 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
15231 {
15232 rtx libfunc = NULL_RTX;
15233 bool check_nan = false;
15234 rtx dest;
15235
15236 switch (code)
15237 {
15238 case EQ:
15239 case NE:
15240 libfunc = optab_libfunc (eq_optab, mode);
15241 break;
15242
15243 case GT:
15244 case GE:
15245 libfunc = optab_libfunc (ge_optab, mode);
15246 break;
15247
15248 case LT:
15249 case LE:
15250 libfunc = optab_libfunc (le_optab, mode);
15251 break;
15252
15253 case UNORDERED:
15254 case ORDERED:
15255 libfunc = optab_libfunc (unord_optab, mode);
15256 code = (code == UNORDERED) ? NE : EQ;
15257 break;
15258
15259 case UNGE:
15260 case UNGT:
15261 check_nan = true;
15262 libfunc = optab_libfunc (ge_optab, mode);
15263 code = (code == UNGE) ? GE : GT;
15264 break;
15265
15266 case UNLE:
15267 case UNLT:
15268 check_nan = true;
15269 libfunc = optab_libfunc (le_optab, mode);
15270 code = (code == UNLE) ? LE : LT;
15271 break;
15272
15273 case UNEQ:
15274 case LTGT:
15275 check_nan = true;
15276 libfunc = optab_libfunc (eq_optab, mode);
15277 code = (code = UNEQ) ? EQ : NE;
15278 break;
15279
15280 default:
15281 gcc_unreachable ();
15282 }
15283
15284 gcc_assert (libfunc);
15285
15286 if (!check_nan)
15287 dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
15288 SImode, op0, mode, op1, mode);
15289
15290 /* The library signals an exception for signalling NaNs, so we need to
15291 handle isgreater, etc. by first checking isordered. */
15292 else
15293 {
15294 rtx ne_rtx, normal_dest, unord_dest;
15295 rtx unord_func = optab_libfunc (unord_optab, mode);
15296 rtx join_label = gen_label_rtx ();
15297 rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label);
15298 rtx unord_cmp = gen_reg_rtx (comp_mode);
15299
15300
15301 /* Test for either value being a NaN. */
15302 gcc_assert (unord_func);
15303 unord_dest = emit_library_call_value (unord_func, NULL_RTX, LCT_CONST,
15304 SImode, op0, mode, op1, mode);
15305
15306 /* Set value (0) if either value is a NaN, and jump to the join
15307 label. */
15308 dest = gen_reg_rtx (SImode);
15309 emit_move_insn (dest, const1_rtx);
15310 emit_insn (gen_rtx_SET (unord_cmp,
15311 gen_rtx_COMPARE (comp_mode, unord_dest,
15312 const0_rtx)));
15313
15314 ne_rtx = gen_rtx_NE (comp_mode, unord_cmp, const0_rtx);
15315 emit_jump_insn (gen_rtx_SET (pc_rtx,
15316 gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
15317 join_ref,
15318 pc_rtx)));
15319
15320 /* Do the normal comparison, knowing that the values are not
15321 NaNs. */
15322 normal_dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
15323 SImode, op0, mode, op1, mode);
15324
15325 emit_insn (gen_cstoresi4 (dest,
15326 gen_rtx_fmt_ee (code, SImode, normal_dest,
15327 const0_rtx),
15328 normal_dest, const0_rtx));
15329
15330 /* Join NaN and non-Nan paths. Compare dest against 0. */
15331 emit_label (join_label);
15332 code = NE;
15333 }
15334
15335 emit_insn (gen_rtx_SET (compare_result,
15336 gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
15337 }
15338
15339 else
15340 {
15341 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
15342 CLOBBERs to match cmptf_internal2 pattern. */
15343 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
15344 && FLOAT128_IBM_P (GET_MODE (op0))
15345 && TARGET_HARD_FLOAT)
15346 emit_insn (gen_rtx_PARALLEL (VOIDmode,
15347 gen_rtvec (10,
15348 gen_rtx_SET (compare_result,
15349 gen_rtx_COMPARE (comp_mode, op0, op1)),
15350 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15351 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15352 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15353 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15354 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15355 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15356 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15357 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15358 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
15359 else if (GET_CODE (op1) == UNSPEC
15360 && XINT (op1, 1) == UNSPEC_SP_TEST)
15361 {
15362 rtx op1b = XVECEXP (op1, 0, 0);
15363 comp_mode = CCEQmode;
15364 compare_result = gen_reg_rtx (CCEQmode);
15365 if (TARGET_64BIT)
15366 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
15367 else
15368 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
15369 }
15370 else
15371 emit_insn (gen_rtx_SET (compare_result,
15372 gen_rtx_COMPARE (comp_mode, op0, op1)));
15373 }
15374
15375 validate_condition_mode (code, GET_MODE (compare_result));
15376
15377 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
15378 }
15379
15380 \f
15381 /* Return the diagnostic message string if the binary operation OP is
15382 not permitted on TYPE1 and TYPE2, NULL otherwise. */
15383
15384 static const char*
15385 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED,
15386 const_tree type1,
15387 const_tree type2)
15388 {
15389 machine_mode mode1 = TYPE_MODE (type1);
15390 machine_mode mode2 = TYPE_MODE (type2);
15391
15392 /* For complex modes, use the inner type. */
15393 if (COMPLEX_MODE_P (mode1))
15394 mode1 = GET_MODE_INNER (mode1);
15395
15396 if (COMPLEX_MODE_P (mode2))
15397 mode2 = GET_MODE_INNER (mode2);
15398
15399 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
15400 double to intermix unless -mfloat128-convert. */
15401 if (mode1 == mode2)
15402 return NULL;
15403
15404 if (!TARGET_FLOAT128_CVT)
15405 {
15406 if ((FLOAT128_IEEE_P (mode1) && FLOAT128_IBM_P (mode2))
15407 || (FLOAT128_IBM_P (mode1) && FLOAT128_IEEE_P (mode2)))
15408 return N_("Invalid mixing of IEEE 128-bit and IBM 128-bit floating "
15409 "point types");
15410 }
15411
15412 return NULL;
15413 }
15414
15415 \f
15416 /* Expand floating point conversion to/from __float128 and __ibm128. */
15417
15418 void
15419 rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
15420 {
15421 machine_mode dest_mode = GET_MODE (dest);
15422 machine_mode src_mode = GET_MODE (src);
15423 convert_optab cvt = unknown_optab;
15424 bool do_move = false;
15425 rtx libfunc = NULL_RTX;
15426 rtx dest2;
15427 typedef rtx (*rtx_2func_t) (rtx, rtx);
15428 rtx_2func_t hw_convert = (rtx_2func_t)0;
15429 size_t kf_or_tf;
15430
15431 struct hw_conv_t {
15432 rtx_2func_t from_df;
15433 rtx_2func_t from_sf;
15434 rtx_2func_t from_si_sign;
15435 rtx_2func_t from_si_uns;
15436 rtx_2func_t from_di_sign;
15437 rtx_2func_t from_di_uns;
15438 rtx_2func_t to_df;
15439 rtx_2func_t to_sf;
15440 rtx_2func_t to_si_sign;
15441 rtx_2func_t to_si_uns;
15442 rtx_2func_t to_di_sign;
15443 rtx_2func_t to_di_uns;
15444 } hw_conversions[2] = {
15445 /* convertions to/from KFmode */
15446 {
15447 gen_extenddfkf2_hw, /* KFmode <- DFmode. */
15448 gen_extendsfkf2_hw, /* KFmode <- SFmode. */
15449 gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */
15450 gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */
15451 gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */
15452 gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */
15453 gen_trunckfdf2_hw, /* DFmode <- KFmode. */
15454 gen_trunckfsf2_hw, /* SFmode <- KFmode. */
15455 gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */
15456 gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */
15457 gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */
15458 gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */
15459 },
15460
15461 /* convertions to/from TFmode */
15462 {
15463 gen_extenddftf2_hw, /* TFmode <- DFmode. */
15464 gen_extendsftf2_hw, /* TFmode <- SFmode. */
15465 gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */
15466 gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */
15467 gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */
15468 gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */
15469 gen_trunctfdf2_hw, /* DFmode <- TFmode. */
15470 gen_trunctfsf2_hw, /* SFmode <- TFmode. */
15471 gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */
15472 gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */
15473 gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */
15474 gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */
15475 },
15476 };
15477
15478 if (dest_mode == src_mode)
15479 gcc_unreachable ();
15480
15481 /* Eliminate memory operations. */
15482 if (MEM_P (src))
15483 src = force_reg (src_mode, src);
15484
15485 if (MEM_P (dest))
15486 {
15487 rtx tmp = gen_reg_rtx (dest_mode);
15488 rs6000_expand_float128_convert (tmp, src, unsigned_p);
15489 rs6000_emit_move (dest, tmp, dest_mode);
15490 return;
15491 }
15492
15493 /* Convert to IEEE 128-bit floating point. */
15494 if (FLOAT128_IEEE_P (dest_mode))
15495 {
15496 if (dest_mode == KFmode)
15497 kf_or_tf = 0;
15498 else if (dest_mode == TFmode)
15499 kf_or_tf = 1;
15500 else
15501 gcc_unreachable ();
15502
15503 switch (src_mode)
15504 {
15505 case E_DFmode:
15506 cvt = sext_optab;
15507 hw_convert = hw_conversions[kf_or_tf].from_df;
15508 break;
15509
15510 case E_SFmode:
15511 cvt = sext_optab;
15512 hw_convert = hw_conversions[kf_or_tf].from_sf;
15513 break;
15514
15515 case E_KFmode:
15516 case E_IFmode:
15517 case E_TFmode:
15518 if (FLOAT128_IBM_P (src_mode))
15519 cvt = sext_optab;
15520 else
15521 do_move = true;
15522 break;
15523
15524 case E_SImode:
15525 if (unsigned_p)
15526 {
15527 cvt = ufloat_optab;
15528 hw_convert = hw_conversions[kf_or_tf].from_si_uns;
15529 }
15530 else
15531 {
15532 cvt = sfloat_optab;
15533 hw_convert = hw_conversions[kf_or_tf].from_si_sign;
15534 }
15535 break;
15536
15537 case E_DImode:
15538 if (unsigned_p)
15539 {
15540 cvt = ufloat_optab;
15541 hw_convert = hw_conversions[kf_or_tf].from_di_uns;
15542 }
15543 else
15544 {
15545 cvt = sfloat_optab;
15546 hw_convert = hw_conversions[kf_or_tf].from_di_sign;
15547 }
15548 break;
15549
15550 default:
15551 gcc_unreachable ();
15552 }
15553 }
15554
15555 /* Convert from IEEE 128-bit floating point. */
15556 else if (FLOAT128_IEEE_P (src_mode))
15557 {
15558 if (src_mode == KFmode)
15559 kf_or_tf = 0;
15560 else if (src_mode == TFmode)
15561 kf_or_tf = 1;
15562 else
15563 gcc_unreachable ();
15564
15565 switch (dest_mode)
15566 {
15567 case E_DFmode:
15568 cvt = trunc_optab;
15569 hw_convert = hw_conversions[kf_or_tf].to_df;
15570 break;
15571
15572 case E_SFmode:
15573 cvt = trunc_optab;
15574 hw_convert = hw_conversions[kf_or_tf].to_sf;
15575 break;
15576
15577 case E_KFmode:
15578 case E_IFmode:
15579 case E_TFmode:
15580 if (FLOAT128_IBM_P (dest_mode))
15581 cvt = trunc_optab;
15582 else
15583 do_move = true;
15584 break;
15585
15586 case E_SImode:
15587 if (unsigned_p)
15588 {
15589 cvt = ufix_optab;
15590 hw_convert = hw_conversions[kf_or_tf].to_si_uns;
15591 }
15592 else
15593 {
15594 cvt = sfix_optab;
15595 hw_convert = hw_conversions[kf_or_tf].to_si_sign;
15596 }
15597 break;
15598
15599 case E_DImode:
15600 if (unsigned_p)
15601 {
15602 cvt = ufix_optab;
15603 hw_convert = hw_conversions[kf_or_tf].to_di_uns;
15604 }
15605 else
15606 {
15607 cvt = sfix_optab;
15608 hw_convert = hw_conversions[kf_or_tf].to_di_sign;
15609 }
15610 break;
15611
15612 default:
15613 gcc_unreachable ();
15614 }
15615 }
15616
15617 /* Both IBM format. */
15618 else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode))
15619 do_move = true;
15620
15621 else
15622 gcc_unreachable ();
15623
15624 /* Handle conversion between TFmode/KFmode/IFmode. */
15625 if (do_move)
15626 emit_insn (gen_rtx_SET (dest, gen_rtx_FLOAT_EXTEND (dest_mode, src)));
15627
15628 /* Handle conversion if we have hardware support. */
15629 else if (TARGET_FLOAT128_HW && hw_convert)
15630 emit_insn ((hw_convert) (dest, src));
15631
15632 /* Call an external function to do the conversion. */
15633 else if (cvt != unknown_optab)
15634 {
15635 libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
15636 gcc_assert (libfunc != NULL_RTX);
15637
15638 dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode,
15639 src, src_mode);
15640
15641 gcc_assert (dest2 != NULL_RTX);
15642 if (!rtx_equal_p (dest, dest2))
15643 emit_move_insn (dest, dest2);
15644 }
15645
15646 else
15647 gcc_unreachable ();
15648
15649 return;
15650 }
15651
15652 \f
15653 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
15654 can be used as that dest register. Return the dest register. */
15655
15656 rtx
15657 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
15658 {
15659 if (op2 == const0_rtx)
15660 return op1;
15661
15662 if (GET_CODE (scratch) == SCRATCH)
15663 scratch = gen_reg_rtx (mode);
15664
15665 if (logical_operand (op2, mode))
15666 emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
15667 else
15668 emit_insn (gen_rtx_SET (scratch,
15669 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
15670
15671 return scratch;
15672 }
15673
15674 /* Emit code doing a cror of two CR bits, for FP comparisons with a CODE that
15675 requires this. The result is mode MODE. */
15676 rtx
15677 rs6000_emit_fp_cror (rtx_code code, machine_mode mode, rtx x)
15678 {
15679 rtx cond[2];
15680 int n = 0;
15681 if (code == LTGT || code == LE || code == UNLT)
15682 cond[n++] = gen_rtx_fmt_ee (LT, mode, x, const0_rtx);
15683 if (code == LTGT || code == GE || code == UNGT)
15684 cond[n++] = gen_rtx_fmt_ee (GT, mode, x, const0_rtx);
15685 if (code == LE || code == GE || code == UNEQ)
15686 cond[n++] = gen_rtx_fmt_ee (EQ, mode, x, const0_rtx);
15687 if (code == UNLT || code == UNGT || code == UNEQ)
15688 cond[n++] = gen_rtx_fmt_ee (UNORDERED, mode, x, const0_rtx);
15689
15690 gcc_assert (n == 2);
15691
15692 rtx cc = gen_reg_rtx (CCEQmode);
15693 rtx logical = gen_rtx_IOR (mode, cond[0], cond[1]);
15694 emit_insn (gen_cceq_ior_compare (mode, cc, logical, cond[0], x, cond[1], x));
15695
15696 return cc;
15697 }
15698
15699 void
15700 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
15701 {
15702 rtx condition_rtx = rs6000_generate_compare (operands[1], mode);
15703 rtx_code cond_code = GET_CODE (condition_rtx);
15704
15705 if (FLOAT_MODE_P (mode) && HONOR_NANS (mode)
15706 && !(FLOAT128_VECTOR_P (mode) && !TARGET_FLOAT128_HW))
15707 ;
15708 else if (cond_code == NE
15709 || cond_code == GE || cond_code == LE
15710 || cond_code == GEU || cond_code == LEU
15711 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
15712 {
15713 rtx not_result = gen_reg_rtx (CCEQmode);
15714 rtx not_op, rev_cond_rtx;
15715 machine_mode cc_mode;
15716
15717 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
15718
15719 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
15720 SImode, XEXP (condition_rtx, 0), const0_rtx);
15721 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
15722 emit_insn (gen_rtx_SET (not_result, not_op));
15723 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
15724 }
15725
15726 machine_mode op_mode = GET_MODE (XEXP (operands[1], 0));
15727 if (op_mode == VOIDmode)
15728 op_mode = GET_MODE (XEXP (operands[1], 1));
15729
15730 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
15731 {
15732 PUT_MODE (condition_rtx, DImode);
15733 convert_move (operands[0], condition_rtx, 0);
15734 }
15735 else
15736 {
15737 PUT_MODE (condition_rtx, SImode);
15738 emit_insn (gen_rtx_SET (operands[0], condition_rtx));
15739 }
15740 }
15741
15742 /* Emit a branch of kind CODE to location LOC. */
15743
15744 void
15745 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
15746 {
15747 rtx condition_rtx = rs6000_generate_compare (operands[0], mode);
15748 rtx loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
15749 rtx ite = gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx, loc_ref, pc_rtx);
15750 emit_jump_insn (gen_rtx_SET (pc_rtx, ite));
15751 }
15752
15753 /* Return the string to output a conditional branch to LABEL, which is
15754 the operand template of the label, or NULL if the branch is really a
15755 conditional return.
15756
15757 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
15758 condition code register and its mode specifies what kind of
15759 comparison we made.
15760
15761 REVERSED is nonzero if we should reverse the sense of the comparison.
15762
15763 INSN is the insn. */
15764
15765 char *
15766 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
15767 {
15768 static char string[64];
15769 enum rtx_code code = GET_CODE (op);
15770 rtx cc_reg = XEXP (op, 0);
15771 machine_mode mode = GET_MODE (cc_reg);
15772 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
15773 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
15774 int really_reversed = reversed ^ need_longbranch;
15775 char *s = string;
15776 const char *ccode;
15777 const char *pred;
15778 rtx note;
15779
15780 validate_condition_mode (code, mode);
15781
15782 /* Work out which way this really branches. We could use
15783 reverse_condition_maybe_unordered here always but this
15784 makes the resulting assembler clearer. */
15785 if (really_reversed)
15786 {
15787 /* Reversal of FP compares takes care -- an ordered compare
15788 becomes an unordered compare and vice versa. */
15789 if (mode == CCFPmode)
15790 code = reverse_condition_maybe_unordered (code);
15791 else
15792 code = reverse_condition (code);
15793 }
15794
15795 switch (code)
15796 {
15797 /* Not all of these are actually distinct opcodes, but
15798 we distinguish them for clarity of the resulting assembler. */
15799 case NE: case LTGT:
15800 ccode = "ne"; break;
15801 case EQ: case UNEQ:
15802 ccode = "eq"; break;
15803 case GE: case GEU:
15804 ccode = "ge"; break;
15805 case GT: case GTU: case UNGT:
15806 ccode = "gt"; break;
15807 case LE: case LEU:
15808 ccode = "le"; break;
15809 case LT: case LTU: case UNLT:
15810 ccode = "lt"; break;
15811 case UNORDERED: ccode = "un"; break;
15812 case ORDERED: ccode = "nu"; break;
15813 case UNGE: ccode = "nl"; break;
15814 case UNLE: ccode = "ng"; break;
15815 default:
15816 gcc_unreachable ();
15817 }
15818
15819 /* Maybe we have a guess as to how likely the branch is. */
15820 pred = "";
15821 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
15822 if (note != NULL_RTX)
15823 {
15824 /* PROB is the difference from 50%. */
15825 int prob = profile_probability::from_reg_br_prob_note (XINT (note, 0))
15826 .to_reg_br_prob_base () - REG_BR_PROB_BASE / 2;
15827
15828 /* Only hint for highly probable/improbable branches on newer cpus when
15829 we have real profile data, as static prediction overrides processor
15830 dynamic prediction. For older cpus we may as well always hint, but
15831 assume not taken for branches that are very close to 50% as a
15832 mispredicted taken branch is more expensive than a
15833 mispredicted not-taken branch. */
15834 if (rs6000_always_hint
15835 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
15836 && (profile_status_for_fn (cfun) != PROFILE_GUESSED)
15837 && br_prob_note_reliable_p (note)))
15838 {
15839 if (abs (prob) > REG_BR_PROB_BASE / 20
15840 && ((prob > 0) ^ need_longbranch))
15841 pred = "+";
15842 else
15843 pred = "-";
15844 }
15845 }
15846
15847 if (label == NULL)
15848 s += sprintf (s, "b%slr%s ", ccode, pred);
15849 else
15850 s += sprintf (s, "b%s%s ", ccode, pred);
15851
15852 /* We need to escape any '%' characters in the reg_names string.
15853 Assume they'd only be the first character.... */
15854 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
15855 *s++ = '%';
15856 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
15857
15858 if (label != NULL)
15859 {
15860 /* If the branch distance was too far, we may have to use an
15861 unconditional branch to go the distance. */
15862 if (need_longbranch)
15863 s += sprintf (s, ",$+8\n\tb %s", label);
15864 else
15865 s += sprintf (s, ",%s", label);
15866 }
15867
15868 return string;
15869 }
15870
15871 /* Return insn for VSX or Altivec comparisons. */
15872
15873 static rtx
15874 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
15875 {
15876 rtx mask;
15877 machine_mode mode = GET_MODE (op0);
15878
15879 switch (code)
15880 {
15881 default:
15882 break;
15883
15884 case GE:
15885 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15886 return NULL_RTX;
15887 /* FALLTHRU */
15888
15889 case EQ:
15890 case GT:
15891 case GTU:
15892 case ORDERED:
15893 case UNORDERED:
15894 case UNEQ:
15895 case LTGT:
15896 mask = gen_reg_rtx (mode);
15897 emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
15898 return mask;
15899 }
15900
15901 return NULL_RTX;
15902 }
15903
15904 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
15905 DMODE is expected destination mode. This is a recursive function. */
15906
15907 static rtx
15908 rs6000_emit_vector_compare (enum rtx_code rcode,
15909 rtx op0, rtx op1,
15910 machine_mode dmode)
15911 {
15912 rtx mask;
15913 bool swap_operands = false;
15914 bool try_again = false;
15915
15916 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
15917 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
15918
15919 /* See if the comparison works as is. */
15920 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
15921 if (mask)
15922 return mask;
15923
15924 switch (rcode)
15925 {
15926 case LT:
15927 rcode = GT;
15928 swap_operands = true;
15929 try_again = true;
15930 break;
15931 case LTU:
15932 rcode = GTU;
15933 swap_operands = true;
15934 try_again = true;
15935 break;
15936 case NE:
15937 case UNLE:
15938 case UNLT:
15939 case UNGE:
15940 case UNGT:
15941 /* Invert condition and try again.
15942 e.g., A != B becomes ~(A==B). */
15943 {
15944 enum rtx_code rev_code;
15945 enum insn_code nor_code;
15946 rtx mask2;
15947
15948 rev_code = reverse_condition_maybe_unordered (rcode);
15949 if (rev_code == UNKNOWN)
15950 return NULL_RTX;
15951
15952 nor_code = optab_handler (one_cmpl_optab, dmode);
15953 if (nor_code == CODE_FOR_nothing)
15954 return NULL_RTX;
15955
15956 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
15957 if (!mask2)
15958 return NULL_RTX;
15959
15960 mask = gen_reg_rtx (dmode);
15961 emit_insn (GEN_FCN (nor_code) (mask, mask2));
15962 return mask;
15963 }
15964 break;
15965 case GE:
15966 case GEU:
15967 case LE:
15968 case LEU:
15969 /* Try GT/GTU/LT/LTU OR EQ */
15970 {
15971 rtx c_rtx, eq_rtx;
15972 enum insn_code ior_code;
15973 enum rtx_code new_code;
15974
15975 switch (rcode)
15976 {
15977 case GE:
15978 new_code = GT;
15979 break;
15980
15981 case GEU:
15982 new_code = GTU;
15983 break;
15984
15985 case LE:
15986 new_code = LT;
15987 break;
15988
15989 case LEU:
15990 new_code = LTU;
15991 break;
15992
15993 default:
15994 gcc_unreachable ();
15995 }
15996
15997 ior_code = optab_handler (ior_optab, dmode);
15998 if (ior_code == CODE_FOR_nothing)
15999 return NULL_RTX;
16000
16001 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
16002 if (!c_rtx)
16003 return NULL_RTX;
16004
16005 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
16006 if (!eq_rtx)
16007 return NULL_RTX;
16008
16009 mask = gen_reg_rtx (dmode);
16010 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
16011 return mask;
16012 }
16013 break;
16014 default:
16015 return NULL_RTX;
16016 }
16017
16018 if (try_again)
16019 {
16020 if (swap_operands)
16021 std::swap (op0, op1);
16022
16023 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
16024 if (mask)
16025 return mask;
16026 }
16027
16028 /* You only get two chances. */
16029 return NULL_RTX;
16030 }
16031
16032 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
16033 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
16034 operands for the relation operation COND. */
16035
16036 int
16037 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
16038 rtx cond, rtx cc_op0, rtx cc_op1)
16039 {
16040 machine_mode dest_mode = GET_MODE (dest);
16041 machine_mode mask_mode = GET_MODE (cc_op0);
16042 enum rtx_code rcode = GET_CODE (cond);
16043 rtx mask;
16044 bool invert_move = false;
16045
16046 if (VECTOR_UNIT_NONE_P (dest_mode))
16047 return 0;
16048
16049 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
16050 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
16051
16052 switch (rcode)
16053 {
16054 /* Swap operands if we can, and fall back to doing the operation as
16055 specified, and doing a NOR to invert the test. */
16056 case NE:
16057 case UNLE:
16058 case UNLT:
16059 case UNGE:
16060 case UNGT:
16061 /* Invert condition and try again.
16062 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
16063 invert_move = true;
16064 rcode = reverse_condition_maybe_unordered (rcode);
16065 if (rcode == UNKNOWN)
16066 return 0;
16067 break;
16068
16069 case GE:
16070 case LE:
16071 if (GET_MODE_CLASS (mask_mode) == MODE_VECTOR_INT)
16072 {
16073 /* Invert condition to avoid compound test. */
16074 invert_move = true;
16075 rcode = reverse_condition (rcode);
16076 }
16077 break;
16078
16079 case GTU:
16080 case GEU:
16081 case LTU:
16082 case LEU:
16083
16084 /* Invert condition to avoid compound test if necessary. */
16085 if (rcode == GEU || rcode == LEU)
16086 {
16087 invert_move = true;
16088 rcode = reverse_condition (rcode);
16089 }
16090 break;
16091
16092 default:
16093 break;
16094 }
16095
16096 /* Get the vector mask for the given relational operations. */
16097 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
16098
16099 if (!mask)
16100 return 0;
16101
16102 if (mask_mode != dest_mode)
16103 mask = simplify_gen_subreg (dest_mode, mask, mask_mode, 0);
16104
16105 if (invert_move)
16106 std::swap (op_true, op_false);
16107
16108 /* Optimize vec1 == vec2, to know the mask generates -1/0. */
16109 if (GET_MODE_CLASS (dest_mode) == MODE_VECTOR_INT
16110 && (GET_CODE (op_true) == CONST_VECTOR
16111 || GET_CODE (op_false) == CONST_VECTOR))
16112 {
16113 rtx constant_0 = CONST0_RTX (dest_mode);
16114 rtx constant_m1 = CONSTM1_RTX (dest_mode);
16115
16116 if (op_true == constant_m1 && op_false == constant_0)
16117 {
16118 emit_move_insn (dest, mask);
16119 return 1;
16120 }
16121
16122 else if (op_true == constant_0 && op_false == constant_m1)
16123 {
16124 emit_insn (gen_rtx_SET (dest, gen_rtx_NOT (dest_mode, mask)));
16125 return 1;
16126 }
16127
16128 /* If we can't use the vector comparison directly, perhaps we can use
16129 the mask for the true or false fields, instead of loading up a
16130 constant. */
16131 if (op_true == constant_m1)
16132 op_true = mask;
16133
16134 if (op_false == constant_0)
16135 op_false = mask;
16136 }
16137
16138 if (!REG_P (op_true) && !SUBREG_P (op_true))
16139 op_true = force_reg (dest_mode, op_true);
16140
16141 if (!REG_P (op_false) && !SUBREG_P (op_false))
16142 op_false = force_reg (dest_mode, op_false);
16143
16144 rtx tmp = gen_rtx_IOR (dest_mode,
16145 gen_rtx_AND (dest_mode, gen_rtx_NOT (dest_mode, mask),
16146 op_false),
16147 gen_rtx_AND (dest_mode, mask, op_true));
16148 emit_insn (gen_rtx_SET (dest, tmp));
16149 return 1;
16150 }
16151
16152 /* Possibly emit the xsmaxc{dp,qp} and xsminc{dp,qp} instructions to emit a
16153 maximum or minimum with "C" semantics.
16154
16155 Unless you use -ffast-math, you can't use these instructions to replace
16156 conditions that implicitly reverse the condition because the comparison
16157 might generate a NaN or signed zer0.
16158
16159 I.e. the following can be replaced all of the time
16160 ret = (op1 > op2) ? op1 : op2 ; generate xsmaxcdp
16161 ret = (op1 >= op2) ? op1 : op2 ; generate xsmaxcdp
16162 ret = (op1 < op2) ? op1 : op2; ; generate xsmincdp
16163 ret = (op1 <= op2) ? op1 : op2; ; generate xsmincdp
16164
16165 The following can be replaced only if -ffast-math is used:
16166 ret = (op1 < op2) ? op2 : op1 ; generate xsmaxcdp
16167 ret = (op1 <= op2) ? op2 : op1 ; generate xsmaxcdp
16168 ret = (op1 > op2) ? op2 : op1; ; generate xsmincdp
16169 ret = (op1 >= op2) ? op2 : op1; ; generate xsmincdp
16170
16171 Move TRUE_COND to DEST if OP of the operands of the last comparison is
16172 nonzero/true, FALSE_COND if it is zero/false.
16173
16174 Return false if we can't generate the appropriate minimum or maximum, and
16175 true if we can did the minimum or maximum. */
16176
16177 static bool
16178 rs6000_maybe_emit_maxc_minc (rtx dest, rtx op, rtx true_cond, rtx false_cond)
16179 {
16180 enum rtx_code code = GET_CODE (op);
16181 rtx op0 = XEXP (op, 0);
16182 rtx op1 = XEXP (op, 1);
16183 machine_mode compare_mode = GET_MODE (op0);
16184 machine_mode result_mode = GET_MODE (dest);
16185
16186 if (result_mode != compare_mode)
16187 return false;
16188
16189 /* See the comments of this function, it simply expects GE/GT/LE/LT in
16190 the checks, but for the reversible equivalent UNLT/UNLE/UNGT/UNGE,
16191 we need to do the reversions first to make the following checks
16192 support fewer cases, like:
16193
16194 (a UNLT b) ? op1 : op2 => (a >= b) ? op2 : op1;
16195 (a UNLE b) ? op1 : op2 => (a > b) ? op2 : op1;
16196 (a UNGT b) ? op1 : op2 => (a <= b) ? op2 : op1;
16197 (a UNGE b) ? op1 : op2 => (a < b) ? op2 : op1;
16198
16199 By the way, if we see these UNLT/UNLE/UNGT/UNGE it's guaranteed
16200 that we have 4-way condition codes (LT/GT/EQ/UN), so we do not
16201 have to check for fast-math or the like. */
16202 if (code == UNGE || code == UNGT || code == UNLE || code == UNLT)
16203 {
16204 code = reverse_condition_maybe_unordered (code);
16205 std::swap (true_cond, false_cond);
16206 }
16207
16208 bool max_p;
16209 if (code == GE || code == GT)
16210 max_p = true;
16211 else if (code == LE || code == LT)
16212 max_p = false;
16213 else
16214 return false;
16215
16216 if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond))
16217 ;
16218
16219 /* Only when NaNs and signed-zeros are not in effect, smax could be
16220 used for `op0 < op1 ? op1 : op0`, and smin could be used for
16221 `op0 > op1 ? op1 : op0`. */
16222 else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond)
16223 && !HONOR_NANS (compare_mode) && !HONOR_SIGNED_ZEROS (compare_mode))
16224 max_p = !max_p;
16225
16226 else
16227 return false;
16228
16229 rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1);
16230 return true;
16231 }
16232
16233 /* Possibly emit a floating point conditional move by generating a compare that
16234 sets a mask instruction and a XXSEL select instruction.
16235
16236 Move TRUE_COND to DEST if OP of the operands of the last comparison is
16237 nonzero/true, FALSE_COND if it is zero/false.
16238
16239 Return false if the operation cannot be generated, and true if we could
16240 generate the instruction. */
16241
16242 static bool
16243 rs6000_maybe_emit_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
16244 {
16245 enum rtx_code code = GET_CODE (op);
16246 rtx op0 = XEXP (op, 0);
16247 rtx op1 = XEXP (op, 1);
16248 machine_mode compare_mode = GET_MODE (op0);
16249 machine_mode result_mode = GET_MODE (dest);
16250 rtx compare_rtx;
16251 rtx cmove_rtx;
16252 rtx clobber_rtx;
16253
16254 if (!can_create_pseudo_p ())
16255 return 0;
16256
16257 /* We allow the comparison to be either SFmode/DFmode and the true/false
16258 condition to be either SFmode/DFmode. I.e. we allow:
16259
16260 float a, b;
16261 double c, d, r;
16262
16263 r = (a == b) ? c : d;
16264
16265 and:
16266
16267 double a, b;
16268 float c, d, r;
16269
16270 r = (a == b) ? c : d;
16271
16272 but we don't allow intermixing the IEEE 128-bit floating point types with
16273 the 32/64-bit scalar types. */
16274
16275 if (!(compare_mode == result_mode
16276 || (compare_mode == SFmode && result_mode == DFmode)
16277 || (compare_mode == DFmode && result_mode == SFmode)))
16278 return false;
16279
16280 switch (code)
16281 {
16282 case EQ:
16283 case GE:
16284 case GT:
16285 break;
16286
16287 case NE:
16288 case LT:
16289 case LE:
16290 code = swap_condition (code);
16291 std::swap (op0, op1);
16292 break;
16293
16294 default:
16295 return false;
16296 }
16297
16298 /* Generate: [(parallel [(set (dest)
16299 (if_then_else (op (cmp1) (cmp2))
16300 (true)
16301 (false)))
16302 (clobber (scratch))])]. */
16303
16304 compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1);
16305 cmove_rtx = gen_rtx_SET (dest,
16306 gen_rtx_IF_THEN_ELSE (result_mode,
16307 compare_rtx,
16308 true_cond,
16309 false_cond));
16310
16311 clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode));
16312 emit_insn (gen_rtx_PARALLEL (VOIDmode,
16313 gen_rtvec (2, cmove_rtx, clobber_rtx)));
16314
16315 return true;
16316 }
16317
16318 /* Helper function to return true if the target has instructions to do a
16319 compare and set mask instruction that can be used with XXSEL to implement a
16320 conditional move. It is also assumed that such a target also supports the
16321 "C" minimum and maximum instructions. */
16322
16323 static bool
16324 have_compare_and_set_mask (machine_mode mode)
16325 {
16326 switch (mode)
16327 {
16328 case E_SFmode:
16329 case E_DFmode:
16330 return TARGET_P9_MINMAX;
16331
16332 case E_KFmode:
16333 case E_TFmode:
16334 return TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode);
16335
16336 default:
16337 break;
16338 }
16339
16340 return false;
16341 }
16342
16343 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
16344 operands of the last comparison is nonzero/true, FALSE_COND if it
16345 is zero/false. Return 0 if the hardware has no such operation. */
16346
16347 bool
16348 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
16349 {
16350 enum rtx_code code = GET_CODE (op);
16351 rtx op0 = XEXP (op, 0);
16352 rtx op1 = XEXP (op, 1);
16353 machine_mode compare_mode = GET_MODE (op0);
16354 machine_mode result_mode = GET_MODE (dest);
16355 rtx temp;
16356 bool is_against_zero;
16357
16358 /* These modes should always match. */
16359 if (GET_MODE (op1) != compare_mode
16360 /* In the isel case however, we can use a compare immediate, so
16361 op1 may be a small constant. */
16362 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
16363 return false;
16364 if (GET_MODE (true_cond) != result_mode)
16365 return false;
16366 if (GET_MODE (false_cond) != result_mode)
16367 return false;
16368
16369 /* See if we can use the "C" minimum, "C" maximum, and compare and set mask
16370 instructions. */
16371 if (have_compare_and_set_mask (compare_mode)
16372 && have_compare_and_set_mask (result_mode))
16373 {
16374 if (rs6000_maybe_emit_maxc_minc (dest, op, true_cond, false_cond))
16375 return true;
16376
16377 if (rs6000_maybe_emit_fp_cmove (dest, op, true_cond, false_cond))
16378 return true;
16379 }
16380
16381 /* Don't allow using floating point comparisons for integer results for
16382 now. */
16383 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
16384 return false;
16385
16386 /* First, work out if the hardware can do this at all, or
16387 if it's too slow.... */
16388 if (!FLOAT_MODE_P (compare_mode))
16389 {
16390 if (TARGET_ISEL)
16391 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
16392 return false;
16393 }
16394
16395 is_against_zero = op1 == CONST0_RTX (compare_mode);
16396
16397 /* A floating-point subtract might overflow, underflow, or produce
16398 an inexact result, thus changing the floating-point flags, so it
16399 can't be generated if we care about that. It's safe if one side
16400 of the construct is zero, since then no subtract will be
16401 generated. */
16402 if (SCALAR_FLOAT_MODE_P (compare_mode)
16403 && flag_trapping_math && ! is_against_zero)
16404 return false;
16405
16406 /* Eliminate half of the comparisons by switching operands, this
16407 makes the remaining code simpler. */
16408 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
16409 || code == LTGT || code == LT || code == UNLE)
16410 {
16411 code = reverse_condition_maybe_unordered (code);
16412 temp = true_cond;
16413 true_cond = false_cond;
16414 false_cond = temp;
16415 }
16416
16417 /* UNEQ and LTGT take four instructions for a comparison with zero,
16418 it'll probably be faster to use a branch here too. */
16419 if (code == UNEQ && HONOR_NANS (compare_mode))
16420 return false;
16421
16422 /* We're going to try to implement comparisons by performing
16423 a subtract, then comparing against zero. Unfortunately,
16424 Inf - Inf is NaN which is not zero, and so if we don't
16425 know that the operand is finite and the comparison
16426 would treat EQ different to UNORDERED, we can't do it. */
16427 if (HONOR_INFINITIES (compare_mode)
16428 && code != GT && code != UNGE
16429 && (!CONST_DOUBLE_P (op1)
16430 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1)))
16431 /* Constructs of the form (a OP b ? a : b) are safe. */
16432 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
16433 || (! rtx_equal_p (op0, true_cond)
16434 && ! rtx_equal_p (op1, true_cond))))
16435 return false;
16436
16437 /* At this point we know we can use fsel. */
16438
16439 /* Don't allow compare_mode other than SFmode or DFmode, for others there
16440 is no fsel instruction. */
16441 if (compare_mode != SFmode && compare_mode != DFmode)
16442 return false;
16443
16444 /* Reduce the comparison to a comparison against zero. */
16445 if (! is_against_zero)
16446 {
16447 temp = gen_reg_rtx (compare_mode);
16448 emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
16449 op0 = temp;
16450 op1 = CONST0_RTX (compare_mode);
16451 }
16452
16453 /* If we don't care about NaNs we can reduce some of the comparisons
16454 down to faster ones. */
16455 if (! HONOR_NANS (compare_mode))
16456 switch (code)
16457 {
16458 case GT:
16459 code = LE;
16460 temp = true_cond;
16461 true_cond = false_cond;
16462 false_cond = temp;
16463 break;
16464 case UNGE:
16465 code = GE;
16466 break;
16467 case UNEQ:
16468 code = EQ;
16469 break;
16470 default:
16471 break;
16472 }
16473
16474 /* Now, reduce everything down to a GE. */
16475 switch (code)
16476 {
16477 case GE:
16478 break;
16479
16480 case LE:
16481 temp = gen_reg_rtx (compare_mode);
16482 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
16483 op0 = temp;
16484 break;
16485
16486 case ORDERED:
16487 temp = gen_reg_rtx (compare_mode);
16488 emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
16489 op0 = temp;
16490 break;
16491
16492 case EQ:
16493 temp = gen_reg_rtx (compare_mode);
16494 emit_insn (gen_rtx_SET (temp,
16495 gen_rtx_NEG (compare_mode,
16496 gen_rtx_ABS (compare_mode, op0))));
16497 op0 = temp;
16498 break;
16499
16500 case UNGE:
16501 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
16502 temp = gen_reg_rtx (result_mode);
16503 emit_insn (gen_rtx_SET (temp,
16504 gen_rtx_IF_THEN_ELSE (result_mode,
16505 gen_rtx_GE (VOIDmode,
16506 op0, op1),
16507 true_cond, false_cond)));
16508 false_cond = true_cond;
16509 true_cond = temp;
16510
16511 temp = gen_reg_rtx (compare_mode);
16512 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
16513 op0 = temp;
16514 break;
16515
16516 case GT:
16517 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
16518 temp = gen_reg_rtx (result_mode);
16519 emit_insn (gen_rtx_SET (temp,
16520 gen_rtx_IF_THEN_ELSE (result_mode,
16521 gen_rtx_GE (VOIDmode,
16522 op0, op1),
16523 true_cond, false_cond)));
16524 true_cond = false_cond;
16525 false_cond = temp;
16526
16527 temp = gen_reg_rtx (compare_mode);
16528 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
16529 op0 = temp;
16530 break;
16531
16532 default:
16533 gcc_unreachable ();
16534 }
16535
16536 emit_insn (gen_rtx_SET (dest,
16537 gen_rtx_IF_THEN_ELSE (result_mode,
16538 gen_rtx_GE (VOIDmode,
16539 op0, op1),
16540 true_cond, false_cond)));
16541 return true;
16542 }
16543
16544 /* Same as above, but for ints (isel). */
16545
16546 bool
16547 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
16548 {
16549 rtx condition_rtx, cr;
16550 machine_mode mode = GET_MODE (dest);
16551 enum rtx_code cond_code;
16552 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
16553 bool signedp;
16554
16555 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
16556 return false;
16557
16558 /* PR104335: We now need to expect CC-mode "comparisons"
16559 coming from ifcvt. The following code expects proper
16560 comparisons so better abort here. */
16561 if (GET_MODE_CLASS (GET_MODE (XEXP (op, 0))) == MODE_CC)
16562 return false;
16563
16564 /* We still have to do the compare, because isel doesn't do a
16565 compare, it just looks at the CRx bits set by a previous compare
16566 instruction. */
16567 condition_rtx = rs6000_generate_compare (op, mode);
16568 cond_code = GET_CODE (condition_rtx);
16569 cr = XEXP (condition_rtx, 0);
16570 signedp = GET_MODE (cr) == CCmode;
16571
16572 isel_func = (mode == SImode
16573 ? (signedp ? gen_isel_cc_si : gen_isel_ccuns_si)
16574 : (signedp ? gen_isel_cc_di : gen_isel_ccuns_di));
16575
16576 switch (cond_code)
16577 {
16578 case LT: case GT: case LTU: case GTU: case EQ:
16579 /* isel handles these directly. */
16580 break;
16581
16582 default:
16583 /* We need to swap the sense of the comparison. */
16584 {
16585 std::swap (false_cond, true_cond);
16586 PUT_CODE (condition_rtx, reverse_condition (cond_code));
16587 }
16588 break;
16589 }
16590
16591 false_cond = force_reg (mode, false_cond);
16592 if (true_cond != const0_rtx)
16593 true_cond = force_reg (mode, true_cond);
16594
16595 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
16596
16597 return true;
16598 }
16599
16600 void
16601 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
16602 {
16603 machine_mode mode = GET_MODE (op0);
16604 enum rtx_code c;
16605 rtx target;
16606
16607 /* VSX/altivec have direct min/max insns. */
16608 if ((code == SMAX || code == SMIN)
16609 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
16610 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))
16611 || (TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode))))
16612 {
16613 emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
16614 return;
16615 }
16616
16617 if (code == SMAX || code == SMIN)
16618 c = GE;
16619 else
16620 c = GEU;
16621
16622 if (code == SMAX || code == UMAX)
16623 target = emit_conditional_move (dest, { c, op0, op1, mode },
16624 op0, op1, mode, 0);
16625 else
16626 target = emit_conditional_move (dest, { c, op0, op1, mode },
16627 op1, op0, mode, 0);
16628 gcc_assert (target);
16629 if (target != dest)
16630 emit_move_insn (dest, target);
16631 }
16632
16633 /* A subroutine of the atomic operation splitters. Jump to LABEL if
16634 COND is true. Mark the jump as unlikely to be taken. */
16635
16636 static void
16637 emit_unlikely_jump (rtx cond, rtx label)
16638 {
16639 rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
16640 rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
16641 add_reg_br_prob_note (insn, profile_probability::very_unlikely ());
16642 }
16643
16644 /* A subroutine of the atomic operation splitters. Emit a load-locked
16645 instruction in MODE. For QI/HImode, possibly use a pattern than includes
16646 the zero_extend operation. */
16647
16648 static void
16649 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
16650 {
16651 rtx (*fn) (rtx, rtx) = NULL;
16652
16653 switch (mode)
16654 {
16655 case E_QImode:
16656 fn = gen_load_lockedqi;
16657 break;
16658 case E_HImode:
16659 fn = gen_load_lockedhi;
16660 break;
16661 case E_SImode:
16662 if (GET_MODE (mem) == QImode)
16663 fn = gen_load_lockedqi_si;
16664 else if (GET_MODE (mem) == HImode)
16665 fn = gen_load_lockedhi_si;
16666 else
16667 fn = gen_load_lockedsi;
16668 break;
16669 case E_DImode:
16670 fn = gen_load_lockeddi;
16671 break;
16672 case E_TImode:
16673 fn = gen_load_lockedti;
16674 break;
16675 default:
16676 gcc_unreachable ();
16677 }
16678 emit_insn (fn (reg, mem));
16679 }
16680
16681 /* A subroutine of the atomic operation splitters. Emit a store-conditional
16682 instruction in MODE. */
16683
16684 static void
16685 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
16686 {
16687 rtx (*fn) (rtx, rtx, rtx) = NULL;
16688
16689 switch (mode)
16690 {
16691 case E_QImode:
16692 fn = gen_store_conditionalqi;
16693 break;
16694 case E_HImode:
16695 fn = gen_store_conditionalhi;
16696 break;
16697 case E_SImode:
16698 fn = gen_store_conditionalsi;
16699 break;
16700 case E_DImode:
16701 fn = gen_store_conditionaldi;
16702 break;
16703 case E_TImode:
16704 fn = gen_store_conditionalti;
16705 break;
16706 default:
16707 gcc_unreachable ();
16708 }
16709
16710 /* Emit sync before stwcx. to address PPC405 Erratum. */
16711 if (PPC405_ERRATUM77)
16712 emit_insn (gen_hwsync ());
16713
16714 emit_insn (fn (res, mem, val));
16715 }
16716
16717 /* Expand barriers before and after a load_locked/store_cond sequence. */
16718
16719 static rtx
16720 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
16721 {
16722 rtx addr = XEXP (mem, 0);
16723
16724 if (!legitimate_indirect_address_p (addr, reload_completed)
16725 && !legitimate_indexed_address_p (addr, reload_completed))
16726 {
16727 addr = force_reg (Pmode, addr);
16728 mem = replace_equiv_address_nv (mem, addr);
16729 }
16730
16731 switch (model)
16732 {
16733 case MEMMODEL_RELAXED:
16734 case MEMMODEL_CONSUME:
16735 case MEMMODEL_ACQUIRE:
16736 break;
16737 case MEMMODEL_RELEASE:
16738 case MEMMODEL_ACQ_REL:
16739 emit_insn (gen_lwsync ());
16740 break;
16741 case MEMMODEL_SEQ_CST:
16742 emit_insn (gen_hwsync ());
16743 break;
16744 default:
16745 gcc_unreachable ();
16746 }
16747 return mem;
16748 }
16749
16750 static void
16751 rs6000_post_atomic_barrier (enum memmodel model)
16752 {
16753 switch (model)
16754 {
16755 case MEMMODEL_RELAXED:
16756 case MEMMODEL_CONSUME:
16757 case MEMMODEL_RELEASE:
16758 break;
16759 case MEMMODEL_ACQUIRE:
16760 case MEMMODEL_ACQ_REL:
16761 case MEMMODEL_SEQ_CST:
16762 emit_insn (gen_isync ());
16763 break;
16764 default:
16765 gcc_unreachable ();
16766 }
16767 }
16768
16769 /* A subroutine of the various atomic expanders. For sub-word operations,
16770 we must adjust things to operate on SImode. Given the original MEM,
16771 return a new aligned memory. Also build and return the quantities by
16772 which to shift and mask. */
16773
16774 static rtx
16775 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
16776 {
16777 rtx addr, align, shift, mask, mem;
16778 HOST_WIDE_INT shift_mask;
16779 machine_mode mode = GET_MODE (orig_mem);
16780
16781 /* For smaller modes, we have to implement this via SImode. */
16782 shift_mask = (mode == QImode ? 0x18 : 0x10);
16783
16784 addr = XEXP (orig_mem, 0);
16785 addr = force_reg (GET_MODE (addr), addr);
16786
16787 /* Aligned memory containing subword. Generate a new memory. We
16788 do not want any of the existing MEM_ATTR data, as we're now
16789 accessing memory outside the original object. */
16790 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
16791 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16792 mem = gen_rtx_MEM (SImode, align);
16793 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
16794 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
16795 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
16796
16797 /* Shift amount for subword relative to aligned word. */
16798 shift = gen_reg_rtx (SImode);
16799 addr = gen_lowpart (SImode, addr);
16800 rtx tmp = gen_reg_rtx (SImode);
16801 emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
16802 emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
16803 if (BYTES_BIG_ENDIAN)
16804 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
16805 shift, 1, OPTAB_LIB_WIDEN);
16806 *pshift = shift;
16807
16808 /* Mask for insertion. */
16809 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
16810 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
16811 *pmask = mask;
16812
16813 return mem;
16814 }
16815
16816 /* A subroutine of the various atomic expanders. For sub-word operands,
16817 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
16818
16819 static rtx
16820 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
16821 {
16822 rtx x;
16823
16824 x = gen_reg_rtx (SImode);
16825 emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
16826 gen_rtx_NOT (SImode, mask),
16827 oldval)));
16828
16829 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
16830
16831 return x;
16832 }
16833
16834 /* A subroutine of the various atomic expanders. For sub-word operands,
16835 extract WIDE to NARROW via SHIFT. */
16836
16837 static void
16838 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
16839 {
16840 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
16841 wide, 1, OPTAB_LIB_WIDEN);
16842 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
16843 }
16844
16845 /* Expand an atomic compare and swap operation. */
16846
16847 void
16848 rs6000_expand_atomic_compare_and_swap (rtx operands[])
16849 {
16850 rtx boolval, retval, mem, oldval, newval, cond;
16851 rtx label1, label2, x, mask, shift;
16852 machine_mode mode, orig_mode;
16853 enum memmodel mod_s, mod_f;
16854 bool is_weak;
16855
16856 boolval = operands[0];
16857 retval = operands[1];
16858 mem = operands[2];
16859 oldval = operands[3];
16860 newval = operands[4];
16861 is_weak = (INTVAL (operands[5]) != 0);
16862 mod_s = memmodel_base (INTVAL (operands[6]));
16863 mod_f = memmodel_base (INTVAL (operands[7]));
16864 orig_mode = mode = GET_MODE (mem);
16865
16866 mask = shift = NULL_RTX;
16867 if (mode == QImode || mode == HImode)
16868 {
16869 /* Before power8, we didn't have access to lbarx/lharx, so generate a
16870 lwarx and shift/mask operations. With power8, we need to do the
16871 comparison in SImode, but the store is still done in QI/HImode. */
16872 oldval = convert_modes (SImode, mode, oldval, 1);
16873
16874 if (!TARGET_SYNC_HI_QI)
16875 {
16876 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
16877
16878 /* Shift and mask OLDVAL into position with the word. */
16879 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
16880 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16881
16882 /* Shift and mask NEWVAL into position within the word. */
16883 newval = convert_modes (SImode, mode, newval, 1);
16884 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
16885 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16886 }
16887
16888 /* Prepare to adjust the return value. */
16889 retval = gen_reg_rtx (SImode);
16890 mode = SImode;
16891 }
16892 else if (reg_overlap_mentioned_p (retval, oldval))
16893 oldval = copy_to_reg (oldval);
16894
16895 if (mode != TImode && !reg_or_short_operand (oldval, mode))
16896 oldval = copy_to_mode_reg (mode, oldval);
16897
16898 if (reg_overlap_mentioned_p (retval, newval))
16899 newval = copy_to_reg (newval);
16900
16901 mem = rs6000_pre_atomic_barrier (mem, mod_s);
16902
16903 label1 = NULL_RTX;
16904 if (!is_weak)
16905 {
16906 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
16907 emit_label (XEXP (label1, 0));
16908 }
16909 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
16910
16911 emit_load_locked (mode, retval, mem);
16912
16913 x = retval;
16914 if (mask)
16915 x = expand_simple_binop (SImode, AND, retval, mask,
16916 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16917
16918 cond = gen_reg_rtx (CCmode);
16919 /* If we have TImode, synthesize a comparison. */
16920 if (mode != TImode)
16921 x = gen_rtx_COMPARE (CCmode, x, oldval);
16922 else
16923 {
16924 rtx xor1_result = gen_reg_rtx (DImode);
16925 rtx xor2_result = gen_reg_rtx (DImode);
16926 rtx or_result = gen_reg_rtx (DImode);
16927 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
16928 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
16929 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
16930 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
16931
16932 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
16933 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
16934 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
16935 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
16936 }
16937
16938 emit_insn (gen_rtx_SET (cond, x));
16939
16940 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
16941 emit_unlikely_jump (x, label2);
16942
16943 x = newval;
16944 if (mask)
16945 x = rs6000_mask_atomic_subword (retval, newval, mask);
16946
16947 emit_store_conditional (orig_mode, cond, mem, x);
16948
16949 if (!is_weak)
16950 {
16951 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
16952 emit_unlikely_jump (x, label1);
16953 }
16954
16955 if (!is_mm_relaxed (mod_f))
16956 emit_label (XEXP (label2, 0));
16957
16958 rs6000_post_atomic_barrier (mod_s);
16959
16960 if (is_mm_relaxed (mod_f))
16961 emit_label (XEXP (label2, 0));
16962
16963 if (shift)
16964 rs6000_finish_atomic_subword (operands[1], retval, shift);
16965 else if (mode != GET_MODE (operands[1]))
16966 convert_move (operands[1], retval, 1);
16967
16968 /* In all cases, CR0 contains EQ on success, and NE on failure. */
16969 x = gen_rtx_EQ (SImode, cond, const0_rtx);
16970 emit_insn (gen_rtx_SET (boolval, x));
16971 }
16972
16973 /* Expand an atomic exchange operation. */
16974
16975 void
16976 rs6000_expand_atomic_exchange (rtx operands[])
16977 {
16978 rtx retval, mem, val, cond;
16979 machine_mode mode;
16980 enum memmodel model;
16981 rtx label, x, mask, shift;
16982
16983 retval = operands[0];
16984 mem = operands[1];
16985 val = operands[2];
16986 model = memmodel_base (INTVAL (operands[3]));
16987 mode = GET_MODE (mem);
16988
16989 mask = shift = NULL_RTX;
16990 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
16991 {
16992 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
16993
16994 /* Shift and mask VAL into position with the word. */
16995 val = convert_modes (SImode, mode, val, 1);
16996 val = expand_simple_binop (SImode, ASHIFT, val, shift,
16997 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16998
16999 /* Prepare to adjust the return value. */
17000 retval = gen_reg_rtx (SImode);
17001 mode = SImode;
17002 }
17003
17004 mem = rs6000_pre_atomic_barrier (mem, model);
17005
17006 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
17007 emit_label (XEXP (label, 0));
17008
17009 emit_load_locked (mode, retval, mem);
17010
17011 x = val;
17012 if (mask)
17013 x = rs6000_mask_atomic_subword (retval, val, mask);
17014
17015 cond = gen_reg_rtx (CCmode);
17016 emit_store_conditional (mode, cond, mem, x);
17017
17018 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
17019 emit_unlikely_jump (x, label);
17020
17021 rs6000_post_atomic_barrier (model);
17022
17023 if (shift)
17024 rs6000_finish_atomic_subword (operands[0], retval, shift);
17025 }
17026
17027 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
17028 to perform. MEM is the memory on which to operate. VAL is the second
17029 operand of the binary operator. BEFORE and AFTER are optional locations to
17030 return the value of MEM either before of after the operation. MODEL_RTX
17031 is a CONST_INT containing the memory model to use. */
17032
17033 void
17034 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
17035 rtx orig_before, rtx orig_after, rtx model_rtx)
17036 {
17037 enum memmodel model = memmodel_base (INTVAL (model_rtx));
17038 machine_mode mode = GET_MODE (mem);
17039 machine_mode store_mode = mode;
17040 rtx label, x, cond, mask, shift;
17041 rtx before = orig_before, after = orig_after;
17042
17043 mask = shift = NULL_RTX;
17044 /* On power8, we want to use SImode for the operation. On previous systems,
17045 use the operation in a subword and shift/mask to get the proper byte or
17046 halfword. */
17047 if (mode == QImode || mode == HImode)
17048 {
17049 if (TARGET_SYNC_HI_QI)
17050 {
17051 val = convert_modes (SImode, mode, val, 1);
17052
17053 /* Prepare to adjust the return value. */
17054 before = gen_reg_rtx (SImode);
17055 if (after)
17056 after = gen_reg_rtx (SImode);
17057 mode = SImode;
17058 }
17059 else
17060 {
17061 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
17062
17063 /* Shift and mask VAL into position with the word. */
17064 val = convert_modes (SImode, mode, val, 1);
17065 val = expand_simple_binop (SImode, ASHIFT, val, shift,
17066 NULL_RTX, 1, OPTAB_LIB_WIDEN);
17067
17068 switch (code)
17069 {
17070 case IOR:
17071 case XOR:
17072 /* We've already zero-extended VAL. That is sufficient to
17073 make certain that it does not affect other bits. */
17074 mask = NULL;
17075 break;
17076
17077 case AND:
17078 /* If we make certain that all of the other bits in VAL are
17079 set, that will be sufficient to not affect other bits. */
17080 x = gen_rtx_NOT (SImode, mask);
17081 x = gen_rtx_IOR (SImode, x, val);
17082 emit_insn (gen_rtx_SET (val, x));
17083 mask = NULL;
17084 break;
17085
17086 case NOT:
17087 case PLUS:
17088 case MINUS:
17089 /* These will all affect bits outside the field and need
17090 adjustment via MASK within the loop. */
17091 break;
17092
17093 default:
17094 gcc_unreachable ();
17095 }
17096
17097 /* Prepare to adjust the return value. */
17098 before = gen_reg_rtx (SImode);
17099 if (after)
17100 after = gen_reg_rtx (SImode);
17101 store_mode = mode = SImode;
17102 }
17103 }
17104
17105 mem = rs6000_pre_atomic_barrier (mem, model);
17106
17107 label = gen_label_rtx ();
17108 emit_label (label);
17109 label = gen_rtx_LABEL_REF (VOIDmode, label);
17110
17111 if (before == NULL_RTX)
17112 before = gen_reg_rtx (mode);
17113
17114 emit_load_locked (mode, before, mem);
17115
17116 if (code == NOT)
17117 {
17118 x = expand_simple_binop (mode, AND, before, val,
17119 NULL_RTX, 1, OPTAB_LIB_WIDEN);
17120 after = expand_simple_unop (mode, NOT, x, after, 1);
17121 }
17122 else
17123 {
17124 after = expand_simple_binop (mode, code, before, val,
17125 after, 1, OPTAB_LIB_WIDEN);
17126 }
17127
17128 x = after;
17129 if (mask)
17130 {
17131 x = expand_simple_binop (SImode, AND, after, mask,
17132 NULL_RTX, 1, OPTAB_LIB_WIDEN);
17133 x = rs6000_mask_atomic_subword (before, x, mask);
17134 }
17135 else if (store_mode != mode)
17136 x = convert_modes (store_mode, mode, x, 1);
17137
17138 cond = gen_reg_rtx (CCmode);
17139 emit_store_conditional (store_mode, cond, mem, x);
17140
17141 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
17142 emit_unlikely_jump (x, label);
17143
17144 rs6000_post_atomic_barrier (model);
17145
17146 if (shift)
17147 {
17148 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
17149 then do the calcuations in a SImode register. */
17150 if (orig_before)
17151 rs6000_finish_atomic_subword (orig_before, before, shift);
17152 if (orig_after)
17153 rs6000_finish_atomic_subword (orig_after, after, shift);
17154 }
17155 else if (store_mode != mode)
17156 {
17157 /* QImode/HImode on machines with lbarx/lharx where we do the native
17158 operation and then do the calcuations in a SImode register. */
17159 if (orig_before)
17160 convert_move (orig_before, before, 1);
17161 if (orig_after)
17162 convert_move (orig_after, after, 1);
17163 }
17164 else if (orig_after && after != orig_after)
17165 emit_move_insn (orig_after, after);
17166 }
17167
17168 static GTY(()) alias_set_type TOC_alias_set = -1;
17169
17170 alias_set_type
17171 get_TOC_alias_set (void)
17172 {
17173 if (TOC_alias_set == -1)
17174 TOC_alias_set = new_alias_set ();
17175 return TOC_alias_set;
17176 }
17177
17178 /* The mode the ABI uses for a word. This is not the same as word_mode
17179 for -m32 -mpowerpc64. This is used to implement various target hooks. */
17180
17181 static scalar_int_mode
17182 rs6000_abi_word_mode (void)
17183 {
17184 return TARGET_32BIT ? SImode : DImode;
17185 }
17186
17187 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
17188 static char *
17189 rs6000_offload_options (void)
17190 {
17191 if (TARGET_64BIT)
17192 return xstrdup ("-foffload-abi=lp64");
17193 else
17194 return xstrdup ("-foffload-abi=ilp32");
17195 }
17196
17197 \f
17198 /* A quick summary of the various types of 'constant-pool tables'
17199 under PowerPC:
17200
17201 Target Flags Name One table per
17202 AIX (none) AIX TOC object file
17203 AIX -mfull-toc AIX TOC object file
17204 AIX -mminimal-toc AIX minimal TOC translation unit
17205 SVR4/EABI (none) SVR4 SDATA object file
17206 SVR4/EABI -fpic SVR4 pic object file
17207 SVR4/EABI -fPIC SVR4 PIC translation unit
17208 SVR4/EABI -mrelocatable EABI TOC function
17209 SVR4/EABI -maix AIX TOC object file
17210 SVR4/EABI -maix -mminimal-toc
17211 AIX minimal TOC translation unit
17212
17213 Name Reg. Set by entries contains:
17214 made by addrs? fp? sum?
17215
17216 AIX TOC 2 crt0 as Y option option
17217 AIX minimal TOC 30 prolog gcc Y Y option
17218 SVR4 SDATA 13 crt0 gcc N Y N
17219 SVR4 pic 30 prolog ld Y not yet N
17220 SVR4 PIC 30 prolog gcc Y option option
17221 EABI TOC 30 prolog gcc Y option option
17222
17223 */
17224
17225 /* Hash functions for the hash table. */
17226
17227 static unsigned
17228 rs6000_hash_constant (rtx k)
17229 {
17230 enum rtx_code code = GET_CODE (k);
17231 machine_mode mode = GET_MODE (k);
17232 unsigned result = (code << 3) ^ mode;
17233 const char *format;
17234 int flen, fidx;
17235
17236 format = GET_RTX_FORMAT (code);
17237 flen = strlen (format);
17238 fidx = 0;
17239
17240 switch (code)
17241 {
17242 case LABEL_REF:
17243 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
17244
17245 case CONST_WIDE_INT:
17246 {
17247 int i;
17248 flen = CONST_WIDE_INT_NUNITS (k);
17249 for (i = 0; i < flen; i++)
17250 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
17251 return result;
17252 }
17253
17254 case CONST_DOUBLE:
17255 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
17256
17257 case CODE_LABEL:
17258 fidx = 3;
17259 break;
17260
17261 default:
17262 break;
17263 }
17264
17265 for (; fidx < flen; fidx++)
17266 switch (format[fidx])
17267 {
17268 case 's':
17269 {
17270 unsigned i, len;
17271 const char *str = XSTR (k, fidx);
17272 len = strlen (str);
17273 result = result * 613 + len;
17274 for (i = 0; i < len; i++)
17275 result = result * 613 + (unsigned) str[i];
17276 break;
17277 }
17278 case 'u':
17279 case 'e':
17280 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
17281 break;
17282 case 'i':
17283 case 'n':
17284 result = result * 613 + (unsigned) XINT (k, fidx);
17285 break;
17286 case 'w':
17287 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
17288 result = result * 613 + (unsigned) XWINT (k, fidx);
17289 else
17290 {
17291 size_t i;
17292 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
17293 result = result * 613 + (unsigned) (XWINT (k, fidx)
17294 >> CHAR_BIT * i);
17295 }
17296 break;
17297 case '0':
17298 break;
17299 default:
17300 gcc_unreachable ();
17301 }
17302
17303 return result;
17304 }
17305
17306 hashval_t
17307 toc_hasher::hash (toc_hash_struct *thc)
17308 {
17309 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
17310 }
17311
17312 /* Compare H1 and H2 for equivalence. */
17313
17314 bool
17315 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
17316 {
17317 rtx r1 = h1->key;
17318 rtx r2 = h2->key;
17319
17320 if (h1->key_mode != h2->key_mode)
17321 return 0;
17322
17323 return rtx_equal_p (r1, r2);
17324 }
17325
17326 /* These are the names given by the C++ front-end to vtables, and
17327 vtable-like objects. Ideally, this logic should not be here;
17328 instead, there should be some programmatic way of inquiring as
17329 to whether or not an object is a vtable. */
17330
17331 #define VTABLE_NAME_P(NAME) \
17332 (startswith (name, "_vt.") \
17333 || startswith (name, "_ZTV") \
17334 || startswith (name, "_ZTT") \
17335 || startswith (name, "_ZTI") \
17336 || startswith (name, "_ZTC"))
17337
17338 #ifdef NO_DOLLAR_IN_LABEL
17339 /* Return a GGC-allocated character string translating dollar signs in
17340 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
17341
17342 const char *
17343 rs6000_xcoff_strip_dollar (const char *name)
17344 {
17345 char *strip, *p;
17346 const char *q;
17347 size_t len;
17348
17349 q = (const char *) strchr (name, '$');
17350
17351 if (q == 0 || q == name)
17352 return name;
17353
17354 len = strlen (name);
17355 strip = XALLOCAVEC (char, len + 1);
17356 strcpy (strip, name);
17357 p = strip + (q - name);
17358 while (p)
17359 {
17360 *p = '_';
17361 p = strchr (p + 1, '$');
17362 }
17363
17364 return ggc_alloc_string (strip, len);
17365 }
17366 #endif
17367
17368 void
17369 rs6000_output_symbol_ref (FILE *file, rtx x)
17370 {
17371 const char *name = XSTR (x, 0);
17372
17373 /* Currently C++ toc references to vtables can be emitted before it
17374 is decided whether the vtable is public or private. If this is
17375 the case, then the linker will eventually complain that there is
17376 a reference to an unknown section. Thus, for vtables only,
17377 we emit the TOC reference to reference the identifier and not the
17378 symbol. */
17379 if (VTABLE_NAME_P (name))
17380 {
17381 RS6000_OUTPUT_BASENAME (file, name);
17382 }
17383 else
17384 assemble_name (file, name);
17385 }
17386
17387 /* Output a TOC entry. We derive the entry name from what is being
17388 written. */
17389
17390 void
17391 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
17392 {
17393 char buf[256];
17394 const char *name = buf;
17395 rtx base = x;
17396 HOST_WIDE_INT offset = 0;
17397
17398 gcc_assert (!TARGET_NO_TOC_OR_PCREL);
17399
17400 /* When the linker won't eliminate them, don't output duplicate
17401 TOC entries (this happens on AIX if there is any kind of TOC,
17402 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
17403 CODE_LABELs. */
17404 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
17405 {
17406 struct toc_hash_struct *h;
17407
17408 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
17409 time because GGC is not initialized at that point. */
17410 if (toc_hash_table == NULL)
17411 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
17412
17413 h = ggc_alloc<toc_hash_struct> ();
17414 h->key = x;
17415 h->key_mode = mode;
17416 h->labelno = labelno;
17417
17418 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
17419 if (*found == NULL)
17420 *found = h;
17421 else /* This is indeed a duplicate.
17422 Set this label equal to that label. */
17423 {
17424 fputs ("\t.set ", file);
17425 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
17426 fprintf (file, "%d,", labelno);
17427 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
17428 fprintf (file, "%d\n", ((*found)->labelno));
17429
17430 #ifdef HAVE_AS_TLS
17431 if (TARGET_XCOFF && SYMBOL_REF_P (x)
17432 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
17433 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
17434 {
17435 fputs ("\t.set ", file);
17436 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
17437 fprintf (file, "%d,", labelno);
17438 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
17439 fprintf (file, "%d\n", ((*found)->labelno));
17440 }
17441 #endif
17442 return;
17443 }
17444 }
17445
17446 /* If we're going to put a double constant in the TOC, make sure it's
17447 aligned properly when strict alignment is on. */
17448 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
17449 && STRICT_ALIGNMENT
17450 && GET_MODE_BITSIZE (mode) >= 64
17451 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
17452 ASM_OUTPUT_ALIGN (file, 3);
17453 }
17454
17455 (*targetm.asm_out.internal_label) (file, "LC", labelno);
17456
17457 /* Handle FP constants specially. Note that if we have a minimal
17458 TOC, things we put here aren't actually in the TOC, so we can allow
17459 FP constants. */
17460 if (CONST_DOUBLE_P (x)
17461 && (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode
17462 || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode))
17463 {
17464 long k[4];
17465
17466 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
17467 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k);
17468 else
17469 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
17470
17471 if (TARGET_64BIT)
17472 {
17473 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17474 fputs (DOUBLE_INT_ASM_OP, file);
17475 else
17476 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
17477 k[0] & 0xffffffff, k[1] & 0xffffffff,
17478 k[2] & 0xffffffff, k[3] & 0xffffffff);
17479 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
17480 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
17481 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
17482 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
17483 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
17484 return;
17485 }
17486 else
17487 {
17488 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17489 fputs ("\t.long ", file);
17490 else
17491 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
17492 k[0] & 0xffffffff, k[1] & 0xffffffff,
17493 k[2] & 0xffffffff, k[3] & 0xffffffff);
17494 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
17495 k[0] & 0xffffffff, k[1] & 0xffffffff,
17496 k[2] & 0xffffffff, k[3] & 0xffffffff);
17497 return;
17498 }
17499 }
17500 else if (CONST_DOUBLE_P (x)
17501 && (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
17502 {
17503 long k[2];
17504
17505 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
17506 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k);
17507 else
17508 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
17509
17510 if (TARGET_64BIT)
17511 {
17512 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17513 fputs (DOUBLE_INT_ASM_OP, file);
17514 else
17515 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
17516 k[0] & 0xffffffff, k[1] & 0xffffffff);
17517 fprintf (file, "0x%lx%08lx\n",
17518 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
17519 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
17520 return;
17521 }
17522 else
17523 {
17524 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17525 fputs ("\t.long ", file);
17526 else
17527 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
17528 k[0] & 0xffffffff, k[1] & 0xffffffff);
17529 fprintf (file, "0x%lx,0x%lx\n",
17530 k[0] & 0xffffffff, k[1] & 0xffffffff);
17531 return;
17532 }
17533 }
17534 else if (CONST_DOUBLE_P (x)
17535 && (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
17536 {
17537 long l;
17538
17539 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
17540 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l);
17541 else
17542 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
17543
17544 if (TARGET_64BIT)
17545 {
17546 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17547 fputs (DOUBLE_INT_ASM_OP, file);
17548 else
17549 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
17550 if (WORDS_BIG_ENDIAN)
17551 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
17552 else
17553 fprintf (file, "0x%lx\n", l & 0xffffffff);
17554 return;
17555 }
17556 else
17557 {
17558 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17559 fputs ("\t.long ", file);
17560 else
17561 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
17562 fprintf (file, "0x%lx\n", l & 0xffffffff);
17563 return;
17564 }
17565 }
17566 else if (GET_MODE (x) == VOIDmode && CONST_INT_P (x))
17567 {
17568 unsigned HOST_WIDE_INT low;
17569 HOST_WIDE_INT high;
17570
17571 low = INTVAL (x) & 0xffffffff;
17572 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
17573
17574 /* TOC entries are always Pmode-sized, so when big-endian
17575 smaller integer constants in the TOC need to be padded.
17576 (This is still a win over putting the constants in
17577 a separate constant pool, because then we'd have
17578 to have both a TOC entry _and_ the actual constant.)
17579
17580 For a 32-bit target, CONST_INT values are loaded and shifted
17581 entirely within `low' and can be stored in one TOC entry. */
17582
17583 /* It would be easy to make this work, but it doesn't now. */
17584 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
17585
17586 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
17587 {
17588 low |= high << 32;
17589 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
17590 high = (HOST_WIDE_INT) low >> 32;
17591 low &= 0xffffffff;
17592 }
17593
17594 if (TARGET_64BIT)
17595 {
17596 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17597 fputs (DOUBLE_INT_ASM_OP, file);
17598 else
17599 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
17600 (long) high & 0xffffffff, (long) low & 0xffffffff);
17601 fprintf (file, "0x%lx%08lx\n",
17602 (long) high & 0xffffffff, (long) low & 0xffffffff);
17603 return;
17604 }
17605 else
17606 {
17607 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
17608 {
17609 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17610 fputs ("\t.long ", file);
17611 else
17612 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
17613 (long) high & 0xffffffff, (long) low & 0xffffffff);
17614 fprintf (file, "0x%lx,0x%lx\n",
17615 (long) high & 0xffffffff, (long) low & 0xffffffff);
17616 }
17617 else
17618 {
17619 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17620 fputs ("\t.long ", file);
17621 else
17622 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
17623 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
17624 }
17625 return;
17626 }
17627 }
17628
17629 if (GET_CODE (x) == CONST)
17630 {
17631 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
17632 && CONST_INT_P (XEXP (XEXP (x, 0), 1)));
17633
17634 base = XEXP (XEXP (x, 0), 0);
17635 offset = INTVAL (XEXP (XEXP (x, 0), 1));
17636 }
17637
17638 switch (GET_CODE (base))
17639 {
17640 case SYMBOL_REF:
17641 name = XSTR (base, 0);
17642 break;
17643
17644 case LABEL_REF:
17645 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
17646 CODE_LABEL_NUMBER (XEXP (base, 0)));
17647 break;
17648
17649 case CODE_LABEL:
17650 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
17651 break;
17652
17653 default:
17654 gcc_unreachable ();
17655 }
17656
17657 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17658 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
17659 else
17660 {
17661 fputs ("\t.tc ", file);
17662 RS6000_OUTPUT_BASENAME (file, name);
17663
17664 if (offset < 0)
17665 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
17666 else if (offset)
17667 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
17668
17669 /* Mark large TOC symbols on AIX with [TE] so they are mapped
17670 after other TOC symbols, reducing overflow of small TOC access
17671 to [TC] symbols. */
17672 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
17673 ? "[TE]," : "[TC],", file);
17674 }
17675
17676 /* Currently C++ toc references to vtables can be emitted before it
17677 is decided whether the vtable is public or private. If this is
17678 the case, then the linker will eventually complain that there is
17679 a TOC reference to an unknown section. Thus, for vtables only,
17680 we emit the TOC reference to reference the symbol and not the
17681 section. */
17682 if (VTABLE_NAME_P (name))
17683 {
17684 RS6000_OUTPUT_BASENAME (file, name);
17685 if (offset < 0)
17686 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
17687 else if (offset > 0)
17688 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
17689 }
17690 else
17691 output_addr_const (file, x);
17692
17693 #if HAVE_AS_TLS
17694 if (TARGET_XCOFF && SYMBOL_REF_P (base))
17695 {
17696 switch (SYMBOL_REF_TLS_MODEL (base))
17697 {
17698 case 0:
17699 break;
17700 case TLS_MODEL_LOCAL_EXEC:
17701 fputs ("@le", file);
17702 break;
17703 case TLS_MODEL_INITIAL_EXEC:
17704 fputs ("@ie", file);
17705 break;
17706 /* Use global-dynamic for local-dynamic. */
17707 case TLS_MODEL_GLOBAL_DYNAMIC:
17708 case TLS_MODEL_LOCAL_DYNAMIC:
17709 putc ('\n', file);
17710 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
17711 fputs ("\t.tc .", file);
17712 RS6000_OUTPUT_BASENAME (file, name);
17713 fputs ("[TC],", file);
17714 output_addr_const (file, x);
17715 fputs ("@m", file);
17716 break;
17717 default:
17718 gcc_unreachable ();
17719 }
17720 }
17721 #endif
17722
17723 putc ('\n', file);
17724 }
17725 \f
17726 /* Output an assembler pseudo-op to write an ASCII string of N characters
17727 starting at P to FILE.
17728
17729 On the RS/6000, we have to do this using the .byte operation and
17730 write out special characters outside the quoted string.
17731 Also, the assembler is broken; very long strings are truncated,
17732 so we must artificially break them up early. */
17733
17734 void
17735 output_ascii (FILE *file, const char *p, int n)
17736 {
17737 char c;
17738 int i, count_string;
17739 const char *for_string = "\t.byte \"";
17740 const char *for_decimal = "\t.byte ";
17741 const char *to_close = NULL;
17742
17743 count_string = 0;
17744 for (i = 0; i < n; i++)
17745 {
17746 c = *p++;
17747 if (c >= ' ' && c < 0177)
17748 {
17749 if (for_string)
17750 fputs (for_string, file);
17751 putc (c, file);
17752
17753 /* Write two quotes to get one. */
17754 if (c == '"')
17755 {
17756 putc (c, file);
17757 ++count_string;
17758 }
17759
17760 for_string = NULL;
17761 for_decimal = "\"\n\t.byte ";
17762 to_close = "\"\n";
17763 ++count_string;
17764
17765 if (count_string >= 512)
17766 {
17767 fputs (to_close, file);
17768
17769 for_string = "\t.byte \"";
17770 for_decimal = "\t.byte ";
17771 to_close = NULL;
17772 count_string = 0;
17773 }
17774 }
17775 else
17776 {
17777 if (for_decimal)
17778 fputs (for_decimal, file);
17779 fprintf (file, "%d", c);
17780
17781 for_string = "\n\t.byte \"";
17782 for_decimal = ", ";
17783 to_close = "\n";
17784 count_string = 0;
17785 }
17786 }
17787
17788 /* Now close the string if we have written one. Then end the line. */
17789 if (to_close)
17790 fputs (to_close, file);
17791 }
17792 \f
17793 /* Generate a unique section name for FILENAME for a section type
17794 represented by SECTION_DESC. Output goes into BUF.
17795
17796 SECTION_DESC can be any string, as long as it is different for each
17797 possible section type.
17798
17799 We name the section in the same manner as xlc. The name begins with an
17800 underscore followed by the filename (after stripping any leading directory
17801 names) with the last period replaced by the string SECTION_DESC. If
17802 FILENAME does not contain a period, SECTION_DESC is appended to the end of
17803 the name. */
17804
17805 void
17806 rs6000_gen_section_name (char **buf, const char *filename,
17807 const char *section_desc)
17808 {
17809 const char *q, *after_last_slash, *last_period = 0;
17810 char *p;
17811 int len;
17812
17813 after_last_slash = filename;
17814 for (q = filename; *q; q++)
17815 {
17816 if (*q == '/')
17817 after_last_slash = q + 1;
17818 else if (*q == '.')
17819 last_period = q;
17820 }
17821
17822 len = strlen (after_last_slash) + strlen (section_desc) + 2;
17823 *buf = (char *) xmalloc (len);
17824
17825 p = *buf;
17826 *p++ = '_';
17827
17828 for (q = after_last_slash; *q; q++)
17829 {
17830 if (q == last_period)
17831 {
17832 strcpy (p, section_desc);
17833 p += strlen (section_desc);
17834 break;
17835 }
17836
17837 else if (ISALNUM (*q))
17838 *p++ = *q;
17839 }
17840
17841 if (last_period == 0)
17842 strcpy (p, section_desc);
17843 else
17844 *p = '\0';
17845 }
17846 \f
17847 /* Emit profile function. */
17848
17849 void
17850 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
17851 {
17852 /* Non-standard profiling for kernels, which just saves LR then calls
17853 _mcount without worrying about arg saves. The idea is to change
17854 the function prologue as little as possible as it isn't easy to
17855 account for arg save/restore code added just for _mcount. */
17856 if (TARGET_PROFILE_KERNEL)
17857 return;
17858
17859 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
17860 {
17861 #ifndef NO_PROFILE_COUNTERS
17862 # define NO_PROFILE_COUNTERS 0
17863 #endif
17864 if (NO_PROFILE_COUNTERS)
17865 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
17866 LCT_NORMAL, VOIDmode);
17867 else
17868 {
17869 char buf[30];
17870 const char *label_name;
17871 rtx fun;
17872
17873 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
17874 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
17875 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
17876
17877 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
17878 LCT_NORMAL, VOIDmode, fun, Pmode);
17879 }
17880 }
17881 else if (DEFAULT_ABI == ABI_DARWIN)
17882 {
17883 const char *mcount_name = RS6000_MCOUNT;
17884 int caller_addr_regno = LR_REGNO;
17885
17886 /* Be conservative and always set this, at least for now. */
17887 crtl->uses_pic_offset_table = 1;
17888
17889 #if TARGET_MACHO
17890 /* For PIC code, set up a stub and collect the caller's address
17891 from r0, which is where the prologue puts it. */
17892 if (MACHOPIC_INDIRECT
17893 && crtl->uses_pic_offset_table)
17894 caller_addr_regno = 0;
17895 #endif
17896 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
17897 LCT_NORMAL, VOIDmode,
17898 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
17899 }
17900 }
17901
17902 /* Write function profiler code. */
17903
17904 void
17905 output_function_profiler (FILE *file, int labelno)
17906 {
17907 char buf[100];
17908
17909 switch (DEFAULT_ABI)
17910 {
17911 default:
17912 gcc_unreachable ();
17913
17914 case ABI_V4:
17915 if (!TARGET_32BIT)
17916 {
17917 warning (0, "no profiling of 64-bit code for this ABI");
17918 return;
17919 }
17920 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
17921 fprintf (file, "\tmflr %s\n", reg_names[0]);
17922 if (NO_PROFILE_COUNTERS)
17923 {
17924 asm_fprintf (file, "\tstw %s,4(%s)\n",
17925 reg_names[0], reg_names[1]);
17926 }
17927 else if (TARGET_SECURE_PLT && flag_pic)
17928 {
17929 if (TARGET_LINK_STACK)
17930 {
17931 char name[32];
17932 get_ppc476_thunk_name (name);
17933 asm_fprintf (file, "\tbl %s\n", name);
17934 }
17935 else
17936 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
17937 asm_fprintf (file, "\tstw %s,4(%s)\n",
17938 reg_names[0], reg_names[1]);
17939 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
17940 asm_fprintf (file, "\taddis %s,%s,",
17941 reg_names[12], reg_names[12]);
17942 assemble_name (file, buf);
17943 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
17944 assemble_name (file, buf);
17945 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
17946 }
17947 else if (flag_pic == 1)
17948 {
17949 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
17950 asm_fprintf (file, "\tstw %s,4(%s)\n",
17951 reg_names[0], reg_names[1]);
17952 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
17953 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
17954 assemble_name (file, buf);
17955 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
17956 }
17957 else if (flag_pic > 1)
17958 {
17959 asm_fprintf (file, "\tstw %s,4(%s)\n",
17960 reg_names[0], reg_names[1]);
17961 /* Now, we need to get the address of the label. */
17962 if (TARGET_LINK_STACK)
17963 {
17964 char name[32];
17965 get_ppc476_thunk_name (name);
17966 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
17967 assemble_name (file, buf);
17968 fputs ("-.\n1:", file);
17969 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
17970 asm_fprintf (file, "\taddi %s,%s,4\n",
17971 reg_names[11], reg_names[11]);
17972 }
17973 else
17974 {
17975 fputs ("\tbcl 20,31,1f\n\t.long ", file);
17976 assemble_name (file, buf);
17977 fputs ("-.\n1:", file);
17978 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
17979 }
17980 asm_fprintf (file, "\tlwz %s,0(%s)\n",
17981 reg_names[0], reg_names[11]);
17982 asm_fprintf (file, "\tadd %s,%s,%s\n",
17983 reg_names[0], reg_names[0], reg_names[11]);
17984 }
17985 else
17986 {
17987 asm_fprintf (file, "\tlis %s,", reg_names[12]);
17988 assemble_name (file, buf);
17989 fputs ("@ha\n", file);
17990 asm_fprintf (file, "\tstw %s,4(%s)\n",
17991 reg_names[0], reg_names[1]);
17992 asm_fprintf (file, "\tla %s,", reg_names[0]);
17993 assemble_name (file, buf);
17994 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
17995 }
17996
17997 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
17998 fprintf (file, "\tbl %s%s\n",
17999 RS6000_MCOUNT, flag_pic ? "@plt" : "");
18000 break;
18001
18002 case ABI_AIX:
18003 case ABI_ELFv2:
18004 case ABI_DARWIN:
18005 /* Don't do anything, done in output_profile_hook (). */
18006 break;
18007 }
18008 }
18009
18010 \f
18011
18012 /* The following variable value is the last issued insn. */
18013
18014 static rtx_insn *last_scheduled_insn;
18015
18016 /* The following variable helps to balance issuing of load and
18017 store instructions */
18018
18019 static int load_store_pendulum;
18020
18021 /* The following variable helps pair divide insns during scheduling. */
18022 static int divide_cnt;
18023 /* The following variable helps pair and alternate vector and vector load
18024 insns during scheduling. */
18025 static int vec_pairing;
18026
18027
18028 /* Power4 load update and store update instructions are cracked into a
18029 load or store and an integer insn which are executed in the same cycle.
18030 Branches have their own dispatch slot which does not count against the
18031 GCC issue rate, but it changes the program flow so there are no other
18032 instructions to issue in this cycle. */
18033
18034 static int
18035 rs6000_variable_issue_1 (rtx_insn *insn, int more)
18036 {
18037 last_scheduled_insn = insn;
18038 if (GET_CODE (PATTERN (insn)) == USE
18039 || GET_CODE (PATTERN (insn)) == CLOBBER)
18040 {
18041 cached_can_issue_more = more;
18042 return cached_can_issue_more;
18043 }
18044
18045 if (insn_terminates_group_p (insn, current_group))
18046 {
18047 cached_can_issue_more = 0;
18048 return cached_can_issue_more;
18049 }
18050
18051 /* If no reservation, but reach here */
18052 if (recog_memoized (insn) < 0)
18053 return more;
18054
18055 if (rs6000_sched_groups)
18056 {
18057 if (is_microcoded_insn (insn))
18058 cached_can_issue_more = 0;
18059 else if (is_cracked_insn (insn))
18060 cached_can_issue_more = more > 2 ? more - 2 : 0;
18061 else
18062 cached_can_issue_more = more - 1;
18063
18064 return cached_can_issue_more;
18065 }
18066
18067 if (rs6000_tune == PROCESSOR_CELL && is_nonpipeline_insn (insn))
18068 return 0;
18069
18070 cached_can_issue_more = more - 1;
18071 return cached_can_issue_more;
18072 }
18073
18074 static int
18075 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
18076 {
18077 int r = rs6000_variable_issue_1 (insn, more);
18078 if (verbose)
18079 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
18080 return r;
18081 }
18082
18083 /* Adjust the cost of a scheduling dependency. Return the new cost of
18084 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
18085
18086 static int
18087 rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
18088 unsigned int)
18089 {
18090 enum attr_type attr_type;
18091
18092 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
18093 return cost;
18094
18095 switch (dep_type)
18096 {
18097 case REG_DEP_TRUE:
18098 {
18099 /* Data dependency; DEP_INSN writes a register that INSN reads
18100 some cycles later. */
18101
18102 /* Separate a load from a narrower, dependent store. */
18103 if ((rs6000_sched_groups || rs6000_tune == PROCESSOR_POWER9
18104 || rs6000_tune == PROCESSOR_POWER10
18105 || rs6000_tune == PROCESSOR_FUTURE)
18106 && GET_CODE (PATTERN (insn)) == SET
18107 && GET_CODE (PATTERN (dep_insn)) == SET
18108 && MEM_P (XEXP (PATTERN (insn), 1))
18109 && MEM_P (XEXP (PATTERN (dep_insn), 0))
18110 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
18111 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
18112 return cost + 14;
18113
18114 attr_type = get_attr_type (insn);
18115
18116 switch (attr_type)
18117 {
18118 case TYPE_JMPREG:
18119 /* Tell the first scheduling pass about the latency between
18120 a mtctr and bctr (and mtlr and br/blr). The first
18121 scheduling pass will not know about this latency since
18122 the mtctr instruction, which has the latency associated
18123 to it, will be generated by reload. */
18124 return 4;
18125 case TYPE_BRANCH:
18126 /* Leave some extra cycles between a compare and its
18127 dependent branch, to inhibit expensive mispredicts. */
18128 if ((rs6000_tune == PROCESSOR_PPC603
18129 || rs6000_tune == PROCESSOR_PPC604
18130 || rs6000_tune == PROCESSOR_PPC604e
18131 || rs6000_tune == PROCESSOR_PPC620
18132 || rs6000_tune == PROCESSOR_PPC630
18133 || rs6000_tune == PROCESSOR_PPC750
18134 || rs6000_tune == PROCESSOR_PPC7400
18135 || rs6000_tune == PROCESSOR_PPC7450
18136 || rs6000_tune == PROCESSOR_PPCE5500
18137 || rs6000_tune == PROCESSOR_PPCE6500
18138 || rs6000_tune == PROCESSOR_POWER4
18139 || rs6000_tune == PROCESSOR_POWER5
18140 || rs6000_tune == PROCESSOR_POWER7
18141 || rs6000_tune == PROCESSOR_POWER8
18142 || rs6000_tune == PROCESSOR_POWER9
18143 || rs6000_tune == PROCESSOR_POWER10
18144 || rs6000_tune == PROCESSOR_FUTURE
18145 || rs6000_tune == PROCESSOR_CELL)
18146 && recog_memoized (dep_insn)
18147 && (INSN_CODE (dep_insn) >= 0))
18148
18149 switch (get_attr_type (dep_insn))
18150 {
18151 case TYPE_CMP:
18152 case TYPE_FPCOMPARE:
18153 case TYPE_CR_LOGICAL:
18154 return cost + 2;
18155 case TYPE_EXTS:
18156 case TYPE_MUL:
18157 if (get_attr_dot (dep_insn) == DOT_YES)
18158 return cost + 2;
18159 else
18160 break;
18161 case TYPE_SHIFT:
18162 if (get_attr_dot (dep_insn) == DOT_YES
18163 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
18164 return cost + 2;
18165 else
18166 break;
18167 default:
18168 break;
18169 }
18170 break;
18171
18172 case TYPE_STORE:
18173 case TYPE_FPSTORE:
18174 if ((rs6000_tune == PROCESSOR_POWER6)
18175 && recog_memoized (dep_insn)
18176 && (INSN_CODE (dep_insn) >= 0))
18177 {
18178
18179 if (GET_CODE (PATTERN (insn)) != SET)
18180 /* If this happens, we have to extend this to schedule
18181 optimally. Return default for now. */
18182 return cost;
18183
18184 /* Adjust the cost for the case where the value written
18185 by a fixed point operation is used as the address
18186 gen value on a store. */
18187 switch (get_attr_type (dep_insn))
18188 {
18189 case TYPE_LOAD:
18190 case TYPE_CNTLZ:
18191 {
18192 if (! rs6000_store_data_bypass_p (dep_insn, insn))
18193 return get_attr_sign_extend (dep_insn)
18194 == SIGN_EXTEND_YES ? 6 : 4;
18195 break;
18196 }
18197 case TYPE_SHIFT:
18198 {
18199 if (! rs6000_store_data_bypass_p (dep_insn, insn))
18200 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
18201 6 : 3;
18202 break;
18203 }
18204 case TYPE_INTEGER:
18205 case TYPE_ADD:
18206 case TYPE_LOGICAL:
18207 case TYPE_EXTS:
18208 case TYPE_INSERT:
18209 {
18210 if (! rs6000_store_data_bypass_p (dep_insn, insn))
18211 return 3;
18212 break;
18213 }
18214 case TYPE_STORE:
18215 case TYPE_FPLOAD:
18216 case TYPE_FPSTORE:
18217 {
18218 if (get_attr_update (dep_insn) == UPDATE_YES
18219 && ! rs6000_store_data_bypass_p (dep_insn, insn))
18220 return 3;
18221 break;
18222 }
18223 case TYPE_MUL:
18224 {
18225 if (! rs6000_store_data_bypass_p (dep_insn, insn))
18226 return 17;
18227 break;
18228 }
18229 case TYPE_DIV:
18230 {
18231 if (! rs6000_store_data_bypass_p (dep_insn, insn))
18232 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
18233 break;
18234 }
18235 default:
18236 break;
18237 }
18238 }
18239 break;
18240
18241 case TYPE_LOAD:
18242 if ((rs6000_tune == PROCESSOR_POWER6)
18243 && recog_memoized (dep_insn)
18244 && (INSN_CODE (dep_insn) >= 0))
18245 {
18246
18247 /* Adjust the cost for the case where the value written
18248 by a fixed point instruction is used within the address
18249 gen portion of a subsequent load(u)(x) */
18250 switch (get_attr_type (dep_insn))
18251 {
18252 case TYPE_LOAD:
18253 case TYPE_CNTLZ:
18254 {
18255 if (set_to_load_agen (dep_insn, insn))
18256 return get_attr_sign_extend (dep_insn)
18257 == SIGN_EXTEND_YES ? 6 : 4;
18258 break;
18259 }
18260 case TYPE_SHIFT:
18261 {
18262 if (set_to_load_agen (dep_insn, insn))
18263 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
18264 6 : 3;
18265 break;
18266 }
18267 case TYPE_INTEGER:
18268 case TYPE_ADD:
18269 case TYPE_LOGICAL:
18270 case TYPE_EXTS:
18271 case TYPE_INSERT:
18272 {
18273 if (set_to_load_agen (dep_insn, insn))
18274 return 3;
18275 break;
18276 }
18277 case TYPE_STORE:
18278 case TYPE_FPLOAD:
18279 case TYPE_FPSTORE:
18280 {
18281 if (get_attr_update (dep_insn) == UPDATE_YES
18282 && set_to_load_agen (dep_insn, insn))
18283 return 3;
18284 break;
18285 }
18286 case TYPE_MUL:
18287 {
18288 if (set_to_load_agen (dep_insn, insn))
18289 return 17;
18290 break;
18291 }
18292 case TYPE_DIV:
18293 {
18294 if (set_to_load_agen (dep_insn, insn))
18295 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
18296 break;
18297 }
18298 default:
18299 break;
18300 }
18301 }
18302 break;
18303
18304 default:
18305 break;
18306 }
18307
18308 /* Fall out to return default cost. */
18309 }
18310 break;
18311
18312 case REG_DEP_OUTPUT:
18313 /* Output dependency; DEP_INSN writes a register that INSN writes some
18314 cycles later. */
18315 if ((rs6000_tune == PROCESSOR_POWER6)
18316 && recog_memoized (dep_insn)
18317 && (INSN_CODE (dep_insn) >= 0))
18318 {
18319 attr_type = get_attr_type (insn);
18320
18321 switch (attr_type)
18322 {
18323 case TYPE_FP:
18324 case TYPE_FPSIMPLE:
18325 if (get_attr_type (dep_insn) == TYPE_FP
18326 || get_attr_type (dep_insn) == TYPE_FPSIMPLE)
18327 return 1;
18328 break;
18329 default:
18330 break;
18331 }
18332 }
18333 /* Fall through, no cost for output dependency. */
18334 /* FALLTHRU */
18335
18336 case REG_DEP_ANTI:
18337 /* Anti dependency; DEP_INSN reads a register that INSN writes some
18338 cycles later. */
18339 return 0;
18340
18341 default:
18342 gcc_unreachable ();
18343 }
18344
18345 return cost;
18346 }
18347
18348 /* Debug version of rs6000_adjust_cost. */
18349
18350 static int
18351 rs6000_debug_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
18352 int cost, unsigned int dw)
18353 {
18354 int ret = rs6000_adjust_cost (insn, dep_type, dep_insn, cost, dw);
18355
18356 if (ret != cost)
18357 {
18358 const char *dep;
18359
18360 switch (dep_type)
18361 {
18362 default: dep = "unknown depencency"; break;
18363 case REG_DEP_TRUE: dep = "data dependency"; break;
18364 case REG_DEP_OUTPUT: dep = "output dependency"; break;
18365 case REG_DEP_ANTI: dep = "anti depencency"; break;
18366 }
18367
18368 fprintf (stderr,
18369 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
18370 "%s, insn:\n", ret, cost, dep);
18371
18372 debug_rtx (insn);
18373 }
18374
18375 return ret;
18376 }
18377
18378 /* The function returns a true if INSN is microcoded.
18379 Return false otherwise. */
18380
18381 static bool
18382 is_microcoded_insn (rtx_insn *insn)
18383 {
18384 if (!insn || !NONDEBUG_INSN_P (insn)
18385 || GET_CODE (PATTERN (insn)) == USE
18386 || GET_CODE (PATTERN (insn)) == CLOBBER)
18387 return false;
18388
18389 if (rs6000_tune == PROCESSOR_CELL)
18390 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
18391
18392 if (rs6000_sched_groups
18393 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
18394 {
18395 enum attr_type type = get_attr_type (insn);
18396 if ((type == TYPE_LOAD
18397 && get_attr_update (insn) == UPDATE_YES
18398 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
18399 || ((type == TYPE_LOAD || type == TYPE_STORE)
18400 && get_attr_update (insn) == UPDATE_YES
18401 && get_attr_indexed (insn) == INDEXED_YES)
18402 || type == TYPE_MFCR)
18403 return true;
18404 }
18405
18406 return false;
18407 }
18408
18409 /* The function returns true if INSN is cracked into 2 instructions
18410 by the processor (and therefore occupies 2 issue slots). */
18411
18412 static bool
18413 is_cracked_insn (rtx_insn *insn)
18414 {
18415 if (!insn || !NONDEBUG_INSN_P (insn)
18416 || GET_CODE (PATTERN (insn)) == USE
18417 || GET_CODE (PATTERN (insn)) == CLOBBER)
18418 return false;
18419
18420 if (rs6000_sched_groups
18421 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
18422 {
18423 enum attr_type type = get_attr_type (insn);
18424 if ((type == TYPE_LOAD
18425 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
18426 && get_attr_update (insn) == UPDATE_NO)
18427 || (type == TYPE_LOAD
18428 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
18429 && get_attr_update (insn) == UPDATE_YES
18430 && get_attr_indexed (insn) == INDEXED_NO)
18431 || (type == TYPE_STORE
18432 && get_attr_update (insn) == UPDATE_YES
18433 && get_attr_indexed (insn) == INDEXED_NO)
18434 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
18435 && get_attr_update (insn) == UPDATE_YES)
18436 || (type == TYPE_CR_LOGICAL
18437 && get_attr_cr_logical_3op (insn) == CR_LOGICAL_3OP_YES)
18438 || (type == TYPE_EXTS
18439 && get_attr_dot (insn) == DOT_YES)
18440 || (type == TYPE_SHIFT
18441 && get_attr_dot (insn) == DOT_YES
18442 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
18443 || (type == TYPE_MUL
18444 && get_attr_dot (insn) == DOT_YES)
18445 || type == TYPE_DIV
18446 || (type == TYPE_INSERT
18447 && get_attr_size (insn) == SIZE_32))
18448 return true;
18449 }
18450
18451 return false;
18452 }
18453
18454 /* The function returns true if INSN can be issued only from
18455 the branch slot. */
18456
18457 static bool
18458 is_branch_slot_insn (rtx_insn *insn)
18459 {
18460 if (!insn || !NONDEBUG_INSN_P (insn)
18461 || GET_CODE (PATTERN (insn)) == USE
18462 || GET_CODE (PATTERN (insn)) == CLOBBER)
18463 return false;
18464
18465 if (rs6000_sched_groups)
18466 {
18467 enum attr_type type = get_attr_type (insn);
18468 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
18469 return true;
18470 return false;
18471 }
18472
18473 return false;
18474 }
18475
18476 /* The function returns true if out_inst sets a value that is
18477 used in the address generation computation of in_insn */
18478 static bool
18479 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
18480 {
18481 rtx out_set, in_set;
18482
18483 /* For performance reasons, only handle the simple case where
18484 both loads are a single_set. */
18485 out_set = single_set (out_insn);
18486 if (out_set)
18487 {
18488 in_set = single_set (in_insn);
18489 if (in_set)
18490 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
18491 }
18492
18493 return false;
18494 }
18495
18496 /* Try to determine base/offset/size parts of the given MEM.
18497 Return true if successful, false if all the values couldn't
18498 be determined.
18499
18500 This function only looks for REG or REG+CONST address forms.
18501 REG+REG address form will return false. */
18502
18503 static bool
18504 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
18505 HOST_WIDE_INT *size)
18506 {
18507 rtx addr_rtx;
18508 if (MEM_SIZE_KNOWN_P (mem))
18509 *size = MEM_SIZE (mem);
18510 else
18511 return false;
18512
18513 addr_rtx = (XEXP (mem, 0));
18514 if (GET_CODE (addr_rtx) == PRE_MODIFY)
18515 addr_rtx = XEXP (addr_rtx, 1);
18516
18517 *offset = 0;
18518 while (GET_CODE (addr_rtx) == PLUS
18519 && CONST_INT_P (XEXP (addr_rtx, 1)))
18520 {
18521 *offset += INTVAL (XEXP (addr_rtx, 1));
18522 addr_rtx = XEXP (addr_rtx, 0);
18523 }
18524 if (!REG_P (addr_rtx))
18525 return false;
18526
18527 *base = addr_rtx;
18528 return true;
18529 }
18530
18531 /* If the target storage locations of arguments MEM1 and MEM2 are
18532 adjacent, then return the argument that has the lower address.
18533 Otherwise, return NULL_RTX. */
18534
18535 static rtx
18536 adjacent_mem_locations (rtx mem1, rtx mem2)
18537 {
18538 rtx reg1, reg2;
18539 HOST_WIDE_INT off1, size1, off2, size2;
18540
18541 if (MEM_P (mem1)
18542 && MEM_P (mem2)
18543 && get_memref_parts (mem1, &reg1, &off1, &size1)
18544 && get_memref_parts (mem2, &reg2, &off2, &size2)
18545 && REGNO (reg1) == REGNO (reg2))
18546 {
18547 if (off1 + size1 == off2)
18548 return mem1;
18549 else if (off2 + size2 == off1)
18550 return mem2;
18551 }
18552
18553 return NULL_RTX;
18554 }
18555
18556 /* This function returns true if it can be determined that the two MEM
18557 locations overlap by at least 1 byte based on base reg/offset/size. */
18558
18559 static bool
18560 mem_locations_overlap (rtx mem1, rtx mem2)
18561 {
18562 rtx reg1, reg2;
18563 HOST_WIDE_INT off1, size1, off2, size2;
18564
18565 if (get_memref_parts (mem1, &reg1, &off1, &size1)
18566 && get_memref_parts (mem2, &reg2, &off2, &size2))
18567 return ((REGNO (reg1) == REGNO (reg2))
18568 && (((off1 <= off2) && (off1 + size1 > off2))
18569 || ((off2 <= off1) && (off2 + size2 > off1))));
18570
18571 return false;
18572 }
18573
18574 /* A C statement (sans semicolon) to update the integer scheduling
18575 priority INSN_PRIORITY (INSN). Increase the priority to execute the
18576 INSN earlier, reduce the priority to execute INSN later. Do not
18577 define this macro if you do not need to adjust the scheduling
18578 priorities of insns. */
18579
18580 static int
18581 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
18582 {
18583 rtx load_mem, str_mem;
18584 /* On machines (like the 750) which have asymmetric integer units,
18585 where one integer unit can do multiply and divides and the other
18586 can't, reduce the priority of multiply/divide so it is scheduled
18587 before other integer operations. */
18588
18589 #if 0
18590 if (! INSN_P (insn))
18591 return priority;
18592
18593 if (GET_CODE (PATTERN (insn)) == USE)
18594 return priority;
18595
18596 switch (rs6000_tune) {
18597 case PROCESSOR_PPC750:
18598 switch (get_attr_type (insn))
18599 {
18600 default:
18601 break;
18602
18603 case TYPE_MUL:
18604 case TYPE_DIV:
18605 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
18606 priority, priority);
18607 if (priority >= 0 && priority < 0x01000000)
18608 priority >>= 3;
18609 break;
18610 }
18611 }
18612 #endif
18613
18614 if (insn_must_be_first_in_group (insn)
18615 && reload_completed
18616 && current_sched_info->sched_max_insns_priority
18617 && rs6000_sched_restricted_insns_priority)
18618 {
18619
18620 /* Prioritize insns that can be dispatched only in the first
18621 dispatch slot. */
18622 if (rs6000_sched_restricted_insns_priority == 1)
18623 /* Attach highest priority to insn. This means that in
18624 haifa-sched.cc:ready_sort(), dispatch-slot restriction considerations
18625 precede 'priority' (critical path) considerations. */
18626 return current_sched_info->sched_max_insns_priority;
18627 else if (rs6000_sched_restricted_insns_priority == 2)
18628 /* Increase priority of insn by a minimal amount. This means that in
18629 haifa-sched.cc:ready_sort(), only 'priority' (critical path)
18630 considerations precede dispatch-slot restriction considerations. */
18631 return (priority + 1);
18632 }
18633
18634 if (rs6000_tune == PROCESSOR_POWER6
18635 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
18636 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
18637 /* Attach highest priority to insn if the scheduler has just issued two
18638 stores and this instruction is a load, or two loads and this instruction
18639 is a store. Power6 wants loads and stores scheduled alternately
18640 when possible */
18641 return current_sched_info->sched_max_insns_priority;
18642
18643 return priority;
18644 }
18645
18646 /* Return true if the instruction is nonpipelined on the Cell. */
18647 static bool
18648 is_nonpipeline_insn (rtx_insn *insn)
18649 {
18650 enum attr_type type;
18651 if (!insn || !NONDEBUG_INSN_P (insn)
18652 || GET_CODE (PATTERN (insn)) == USE
18653 || GET_CODE (PATTERN (insn)) == CLOBBER)
18654 return false;
18655
18656 type = get_attr_type (insn);
18657 if (type == TYPE_MUL
18658 || type == TYPE_DIV
18659 || type == TYPE_SDIV
18660 || type == TYPE_DDIV
18661 || type == TYPE_SSQRT
18662 || type == TYPE_DSQRT
18663 || type == TYPE_MFCR
18664 || type == TYPE_MFCRF
18665 || type == TYPE_MFJMPR)
18666 {
18667 return true;
18668 }
18669 return false;
18670 }
18671
18672
18673 /* Return how many instructions the machine can issue per cycle. */
18674
18675 static int
18676 rs6000_issue_rate (void)
18677 {
18678 /* Unless scheduling for register pressure, use issue rate of 1 for
18679 first scheduling pass to decrease degradation. */
18680 if (!reload_completed && !flag_sched_pressure)
18681 return 1;
18682
18683 switch (rs6000_tune) {
18684 case PROCESSOR_RS64A:
18685 case PROCESSOR_PPC601: /* ? */
18686 case PROCESSOR_PPC7450:
18687 return 3;
18688 case PROCESSOR_PPC440:
18689 case PROCESSOR_PPC603:
18690 case PROCESSOR_PPC750:
18691 case PROCESSOR_PPC7400:
18692 case PROCESSOR_PPC8540:
18693 case PROCESSOR_PPC8548:
18694 case PROCESSOR_CELL:
18695 case PROCESSOR_PPCE300C2:
18696 case PROCESSOR_PPCE300C3:
18697 case PROCESSOR_PPCE500MC:
18698 case PROCESSOR_PPCE500MC64:
18699 case PROCESSOR_PPCE5500:
18700 case PROCESSOR_PPCE6500:
18701 case PROCESSOR_TITAN:
18702 return 2;
18703 case PROCESSOR_PPC476:
18704 case PROCESSOR_PPC604:
18705 case PROCESSOR_PPC604e:
18706 case PROCESSOR_PPC620:
18707 case PROCESSOR_PPC630:
18708 return 4;
18709 case PROCESSOR_POWER4:
18710 case PROCESSOR_POWER5:
18711 case PROCESSOR_POWER6:
18712 case PROCESSOR_POWER7:
18713 return 5;
18714 case PROCESSOR_POWER8:
18715 return 7;
18716 case PROCESSOR_POWER9:
18717 return 6;
18718 case PROCESSOR_POWER10:
18719 case PROCESSOR_FUTURE:
18720 return 8;
18721 default:
18722 return 1;
18723 }
18724 }
18725
18726 /* Return how many instructions to look ahead for better insn
18727 scheduling. */
18728
18729 static int
18730 rs6000_use_sched_lookahead (void)
18731 {
18732 switch (rs6000_tune)
18733 {
18734 case PROCESSOR_PPC8540:
18735 case PROCESSOR_PPC8548:
18736 return 4;
18737
18738 case PROCESSOR_CELL:
18739 return (reload_completed ? 8 : 0);
18740
18741 default:
18742 return 0;
18743 }
18744 }
18745
18746 /* We are choosing insn from the ready queue. Return zero if INSN can be
18747 chosen. */
18748 static int
18749 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
18750 {
18751 if (ready_index == 0)
18752 return 0;
18753
18754 if (rs6000_tune != PROCESSOR_CELL)
18755 return 0;
18756
18757 gcc_assert (insn != NULL_RTX && INSN_P (insn));
18758
18759 if (!reload_completed
18760 || is_nonpipeline_insn (insn)
18761 || is_microcoded_insn (insn))
18762 return 1;
18763
18764 return 0;
18765 }
18766
18767 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
18768 and return true. */
18769
18770 static bool
18771 find_mem_ref (rtx pat, rtx *mem_ref)
18772 {
18773 const char * fmt;
18774 int i, j;
18775
18776 /* stack_tie does not produce any real memory traffic. */
18777 if (tie_operand (pat, VOIDmode))
18778 return false;
18779
18780 if (MEM_P (pat))
18781 {
18782 *mem_ref = pat;
18783 return true;
18784 }
18785
18786 /* Recursively process the pattern. */
18787 fmt = GET_RTX_FORMAT (GET_CODE (pat));
18788
18789 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
18790 {
18791 if (fmt[i] == 'e')
18792 {
18793 if (find_mem_ref (XEXP (pat, i), mem_ref))
18794 return true;
18795 }
18796 else if (fmt[i] == 'E')
18797 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
18798 {
18799 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
18800 return true;
18801 }
18802 }
18803
18804 return false;
18805 }
18806
18807 /* Determine if PAT is a PATTERN of a load insn. */
18808
18809 static bool
18810 is_load_insn1 (rtx pat, rtx *load_mem)
18811 {
18812 if (!pat || pat == NULL_RTX)
18813 return false;
18814
18815 if (GET_CODE (pat) == SET)
18816 {
18817 if (REG_P (SET_DEST (pat)))
18818 return find_mem_ref (SET_SRC (pat), load_mem);
18819 else
18820 return false;
18821 }
18822
18823 if (GET_CODE (pat) == PARALLEL)
18824 {
18825 int i;
18826
18827 for (i = 0; i < XVECLEN (pat, 0); i++)
18828 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
18829 return true;
18830 }
18831
18832 return false;
18833 }
18834
18835 /* Determine if INSN loads from memory. */
18836
18837 static bool
18838 is_load_insn (rtx insn, rtx *load_mem)
18839 {
18840 if (!insn || !INSN_P (insn))
18841 return false;
18842
18843 if (CALL_P (insn))
18844 return false;
18845
18846 return is_load_insn1 (PATTERN (insn), load_mem);
18847 }
18848
18849 /* Determine if PAT is a PATTERN of a store insn. */
18850
18851 static bool
18852 is_store_insn1 (rtx pat, rtx *str_mem)
18853 {
18854 if (!pat || pat == NULL_RTX)
18855 return false;
18856
18857 if (GET_CODE (pat) == SET)
18858 {
18859 if (REG_P (SET_SRC (pat)) || SUBREG_P (SET_SRC (pat)))
18860 return find_mem_ref (SET_DEST (pat), str_mem);
18861 else
18862 return false;
18863 }
18864
18865 if (GET_CODE (pat) == PARALLEL)
18866 {
18867 int i;
18868
18869 for (i = 0; i < XVECLEN (pat, 0); i++)
18870 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
18871 return true;
18872 }
18873
18874 return false;
18875 }
18876
18877 /* Determine if INSN stores to memory. */
18878
18879 static bool
18880 is_store_insn (rtx insn, rtx *str_mem)
18881 {
18882 if (!insn || !INSN_P (insn))
18883 return false;
18884
18885 return is_store_insn1 (PATTERN (insn), str_mem);
18886 }
18887
18888 /* Return whether TYPE is a Power9 pairable vector instruction type. */
18889
18890 static bool
18891 is_power9_pairable_vec_type (enum attr_type type)
18892 {
18893 switch (type)
18894 {
18895 case TYPE_VECSIMPLE:
18896 case TYPE_VECCOMPLEX:
18897 case TYPE_VECDIV:
18898 case TYPE_VECCMP:
18899 case TYPE_VECPERM:
18900 case TYPE_VECFLOAT:
18901 case TYPE_VECFDIV:
18902 case TYPE_VECDOUBLE:
18903 return true;
18904 default:
18905 break;
18906 }
18907 return false;
18908 }
18909
18910 /* Returns whether the dependence between INSN and NEXT is considered
18911 costly by the given target. */
18912
18913 static bool
18914 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
18915 {
18916 rtx insn;
18917 rtx next;
18918 rtx load_mem, str_mem;
18919
18920 /* If the flag is not enabled - no dependence is considered costly;
18921 allow all dependent insns in the same group.
18922 This is the most aggressive option. */
18923 if (rs6000_sched_costly_dep == no_dep_costly)
18924 return false;
18925
18926 /* If the flag is set to 1 - a dependence is always considered costly;
18927 do not allow dependent instructions in the same group.
18928 This is the most conservative option. */
18929 if (rs6000_sched_costly_dep == all_deps_costly)
18930 return true;
18931
18932 insn = DEP_PRO (dep);
18933 next = DEP_CON (dep);
18934
18935 if (rs6000_sched_costly_dep == store_to_load_dep_costly
18936 && is_load_insn (next, &load_mem)
18937 && is_store_insn (insn, &str_mem))
18938 /* Prevent load after store in the same group. */
18939 return true;
18940
18941 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
18942 && is_load_insn (next, &load_mem)
18943 && is_store_insn (insn, &str_mem)
18944 && DEP_TYPE (dep) == REG_DEP_TRUE
18945 && mem_locations_overlap(str_mem, load_mem))
18946 /* Prevent load after store in the same group if it is a true
18947 dependence. */
18948 return true;
18949
18950 /* The flag is set to X; dependences with latency >= X are considered costly,
18951 and will not be scheduled in the same group. */
18952 if (rs6000_sched_costly_dep <= max_dep_latency
18953 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
18954 return true;
18955
18956 return false;
18957 }
18958
18959 /* Return the next insn after INSN that is found before TAIL is reached,
18960 skipping any "non-active" insns - insns that will not actually occupy
18961 an issue slot. Return NULL_RTX if such an insn is not found. */
18962
18963 static rtx_insn *
18964 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
18965 {
18966 if (insn == NULL_RTX || insn == tail)
18967 return NULL;
18968
18969 while (1)
18970 {
18971 insn = NEXT_INSN (insn);
18972 if (insn == NULL_RTX || insn == tail)
18973 return NULL;
18974
18975 if (CALL_P (insn)
18976 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
18977 || (NONJUMP_INSN_P (insn)
18978 && GET_CODE (PATTERN (insn)) != USE
18979 && GET_CODE (PATTERN (insn)) != CLOBBER
18980 && INSN_CODE (insn) != CODE_FOR_stack_tie))
18981 break;
18982 }
18983 return insn;
18984 }
18985
18986 /* Move instruction at POS to the end of the READY list. */
18987
18988 static void
18989 move_to_end_of_ready (rtx_insn **ready, int pos, int lastpos)
18990 {
18991 rtx_insn *tmp;
18992 int i;
18993
18994 tmp = ready[pos];
18995 for (i = pos; i < lastpos; i++)
18996 ready[i] = ready[i + 1];
18997 ready[lastpos] = tmp;
18998 }
18999
19000 /* Do Power6 specific sched_reorder2 reordering of ready list. */
19001
19002 static int
19003 power6_sched_reorder2 (rtx_insn **ready, int lastpos)
19004 {
19005 /* For Power6, we need to handle some special cases to try and keep the
19006 store queue from overflowing and triggering expensive flushes.
19007
19008 This code monitors how load and store instructions are being issued
19009 and skews the ready list one way or the other to increase the likelihood
19010 that a desired instruction is issued at the proper time.
19011
19012 A couple of things are done. First, we maintain a "load_store_pendulum"
19013 to track the current state of load/store issue.
19014
19015 - If the pendulum is at zero, then no loads or stores have been
19016 issued in the current cycle so we do nothing.
19017
19018 - If the pendulum is 1, then a single load has been issued in this
19019 cycle and we attempt to locate another load in the ready list to
19020 issue with it.
19021
19022 - If the pendulum is -2, then two stores have already been
19023 issued in this cycle, so we increase the priority of the first load
19024 in the ready list to increase it's likelihood of being chosen first
19025 in the next cycle.
19026
19027 - If the pendulum is -1, then a single store has been issued in this
19028 cycle and we attempt to locate another store in the ready list to
19029 issue with it, preferring a store to an adjacent memory location to
19030 facilitate store pairing in the store queue.
19031
19032 - If the pendulum is 2, then two loads have already been
19033 issued in this cycle, so we increase the priority of the first store
19034 in the ready list to increase it's likelihood of being chosen first
19035 in the next cycle.
19036
19037 - If the pendulum < -2 or > 2, then do nothing.
19038
19039 Note: This code covers the most common scenarios. There exist non
19040 load/store instructions which make use of the LSU and which
19041 would need to be accounted for to strictly model the behavior
19042 of the machine. Those instructions are currently unaccounted
19043 for to help minimize compile time overhead of this code.
19044 */
19045 int pos;
19046 rtx load_mem, str_mem;
19047
19048 if (is_store_insn (last_scheduled_insn, &str_mem))
19049 /* Issuing a store, swing the load_store_pendulum to the left */
19050 load_store_pendulum--;
19051 else if (is_load_insn (last_scheduled_insn, &load_mem))
19052 /* Issuing a load, swing the load_store_pendulum to the right */
19053 load_store_pendulum++;
19054 else
19055 return cached_can_issue_more;
19056
19057 /* If the pendulum is balanced, or there is only one instruction on
19058 the ready list, then all is well, so return. */
19059 if ((load_store_pendulum == 0) || (lastpos <= 0))
19060 return cached_can_issue_more;
19061
19062 if (load_store_pendulum == 1)
19063 {
19064 /* A load has been issued in this cycle. Scan the ready list
19065 for another load to issue with it */
19066 pos = lastpos;
19067
19068 while (pos >= 0)
19069 {
19070 if (is_load_insn (ready[pos], &load_mem))
19071 {
19072 /* Found a load. Move it to the head of the ready list,
19073 and adjust it's priority so that it is more likely to
19074 stay there */
19075 move_to_end_of_ready (ready, pos, lastpos);
19076
19077 if (!sel_sched_p ()
19078 && INSN_PRIORITY_KNOWN (ready[lastpos]))
19079 INSN_PRIORITY (ready[lastpos])++;
19080 break;
19081 }
19082 pos--;
19083 }
19084 }
19085 else if (load_store_pendulum == -2)
19086 {
19087 /* Two stores have been issued in this cycle. Increase the
19088 priority of the first load in the ready list to favor it for
19089 issuing in the next cycle. */
19090 pos = lastpos;
19091
19092 while (pos >= 0)
19093 {
19094 if (is_load_insn (ready[pos], &load_mem)
19095 && !sel_sched_p ()
19096 && INSN_PRIORITY_KNOWN (ready[pos]))
19097 {
19098 INSN_PRIORITY (ready[pos])++;
19099
19100 /* Adjust the pendulum to account for the fact that a load
19101 was found and increased in priority. This is to prevent
19102 increasing the priority of multiple loads */
19103 load_store_pendulum--;
19104
19105 break;
19106 }
19107 pos--;
19108 }
19109 }
19110 else if (load_store_pendulum == -1)
19111 {
19112 /* A store has been issued in this cycle. Scan the ready list for
19113 another store to issue with it, preferring a store to an adjacent
19114 memory location */
19115 int first_store_pos = -1;
19116
19117 pos = lastpos;
19118
19119 while (pos >= 0)
19120 {
19121 if (is_store_insn (ready[pos], &str_mem))
19122 {
19123 rtx str_mem2;
19124 /* Maintain the index of the first store found on the
19125 list */
19126 if (first_store_pos == -1)
19127 first_store_pos = pos;
19128
19129 if (is_store_insn (last_scheduled_insn, &str_mem2)
19130 && adjacent_mem_locations (str_mem, str_mem2))
19131 {
19132 /* Found an adjacent store. Move it to the head of the
19133 ready list, and adjust it's priority so that it is
19134 more likely to stay there */
19135 move_to_end_of_ready (ready, pos, lastpos);
19136
19137 if (!sel_sched_p ()
19138 && INSN_PRIORITY_KNOWN (ready[lastpos]))
19139 INSN_PRIORITY (ready[lastpos])++;
19140
19141 first_store_pos = -1;
19142
19143 break;
19144 };
19145 }
19146 pos--;
19147 }
19148
19149 if (first_store_pos >= 0)
19150 {
19151 /* An adjacent store wasn't found, but a non-adjacent store was,
19152 so move the non-adjacent store to the front of the ready
19153 list, and adjust its priority so that it is more likely to
19154 stay there. */
19155 move_to_end_of_ready (ready, first_store_pos, lastpos);
19156 if (!sel_sched_p ()
19157 && INSN_PRIORITY_KNOWN (ready[lastpos]))
19158 INSN_PRIORITY (ready[lastpos])++;
19159 }
19160 }
19161 else if (load_store_pendulum == 2)
19162 {
19163 /* Two loads have been issued in this cycle. Increase the priority
19164 of the first store in the ready list to favor it for issuing in
19165 the next cycle. */
19166 pos = lastpos;
19167
19168 while (pos >= 0)
19169 {
19170 if (is_store_insn (ready[pos], &str_mem)
19171 && !sel_sched_p ()
19172 && INSN_PRIORITY_KNOWN (ready[pos]))
19173 {
19174 INSN_PRIORITY (ready[pos])++;
19175
19176 /* Adjust the pendulum to account for the fact that a store
19177 was found and increased in priority. This is to prevent
19178 increasing the priority of multiple stores */
19179 load_store_pendulum++;
19180
19181 break;
19182 }
19183 pos--;
19184 }
19185 }
19186
19187 return cached_can_issue_more;
19188 }
19189
19190 /* Do Power9 specific sched_reorder2 reordering of ready list. */
19191
19192 static int
19193 power9_sched_reorder2 (rtx_insn **ready, int lastpos)
19194 {
19195 int pos;
19196 enum attr_type type, type2;
19197
19198 type = get_attr_type (last_scheduled_insn);
19199
19200 /* Try to issue fixed point divides back-to-back in pairs so they will be
19201 routed to separate execution units and execute in parallel. */
19202 if (type == TYPE_DIV && divide_cnt == 0)
19203 {
19204 /* First divide has been scheduled. */
19205 divide_cnt = 1;
19206
19207 /* Scan the ready list looking for another divide, if found move it
19208 to the end of the list so it is chosen next. */
19209 pos = lastpos;
19210 while (pos >= 0)
19211 {
19212 if (recog_memoized (ready[pos]) >= 0
19213 && get_attr_type (ready[pos]) == TYPE_DIV)
19214 {
19215 move_to_end_of_ready (ready, pos, lastpos);
19216 break;
19217 }
19218 pos--;
19219 }
19220 }
19221 else
19222 {
19223 /* Last insn was the 2nd divide or not a divide, reset the counter. */
19224 divide_cnt = 0;
19225
19226 /* The best dispatch throughput for vector and vector load insns can be
19227 achieved by interleaving a vector and vector load such that they'll
19228 dispatch to the same superslice. If this pairing cannot be achieved
19229 then it is best to pair vector insns together and vector load insns
19230 together.
19231
19232 To aid in this pairing, vec_pairing maintains the current state with
19233 the following values:
19234
19235 0 : Initial state, no vecload/vector pairing has been started.
19236
19237 1 : A vecload or vector insn has been issued and a candidate for
19238 pairing has been found and moved to the end of the ready
19239 list. */
19240 if (type == TYPE_VECLOAD)
19241 {
19242 /* Issued a vecload. */
19243 if (vec_pairing == 0)
19244 {
19245 int vecload_pos = -1;
19246 /* We issued a single vecload, look for a vector insn to pair it
19247 with. If one isn't found, try to pair another vecload. */
19248 pos = lastpos;
19249 while (pos >= 0)
19250 {
19251 if (recog_memoized (ready[pos]) >= 0)
19252 {
19253 type2 = get_attr_type (ready[pos]);
19254 if (is_power9_pairable_vec_type (type2))
19255 {
19256 /* Found a vector insn to pair with, move it to the
19257 end of the ready list so it is scheduled next. */
19258 move_to_end_of_ready (ready, pos, lastpos);
19259 vec_pairing = 1;
19260 return cached_can_issue_more;
19261 }
19262 else if (type2 == TYPE_VECLOAD && vecload_pos == -1)
19263 /* Remember position of first vecload seen. */
19264 vecload_pos = pos;
19265 }
19266 pos--;
19267 }
19268 if (vecload_pos >= 0)
19269 {
19270 /* Didn't find a vector to pair with but did find a vecload,
19271 move it to the end of the ready list. */
19272 move_to_end_of_ready (ready, vecload_pos, lastpos);
19273 vec_pairing = 1;
19274 return cached_can_issue_more;
19275 }
19276 }
19277 }
19278 else if (is_power9_pairable_vec_type (type))
19279 {
19280 /* Issued a vector operation. */
19281 if (vec_pairing == 0)
19282 {
19283 int vec_pos = -1;
19284 /* We issued a single vector insn, look for a vecload to pair it
19285 with. If one isn't found, try to pair another vector. */
19286 pos = lastpos;
19287 while (pos >= 0)
19288 {
19289 if (recog_memoized (ready[pos]) >= 0)
19290 {
19291 type2 = get_attr_type (ready[pos]);
19292 if (type2 == TYPE_VECLOAD)
19293 {
19294 /* Found a vecload insn to pair with, move it to the
19295 end of the ready list so it is scheduled next. */
19296 move_to_end_of_ready (ready, pos, lastpos);
19297 vec_pairing = 1;
19298 return cached_can_issue_more;
19299 }
19300 else if (is_power9_pairable_vec_type (type2)
19301 && vec_pos == -1)
19302 /* Remember position of first vector insn seen. */
19303 vec_pos = pos;
19304 }
19305 pos--;
19306 }
19307 if (vec_pos >= 0)
19308 {
19309 /* Didn't find a vecload to pair with but did find a vector
19310 insn, move it to the end of the ready list. */
19311 move_to_end_of_ready (ready, vec_pos, lastpos);
19312 vec_pairing = 1;
19313 return cached_can_issue_more;
19314 }
19315 }
19316 }
19317
19318 /* We've either finished a vec/vecload pair, couldn't find an insn to
19319 continue the current pair, or the last insn had nothing to do with
19320 with pairing. In any case, reset the state. */
19321 vec_pairing = 0;
19322 }
19323
19324 return cached_can_issue_more;
19325 }
19326
19327 /* Determine if INSN is a store to memory that can be fused with a similar
19328 adjacent store. */
19329
19330 static bool
19331 is_fusable_store (rtx_insn *insn, rtx *str_mem)
19332 {
19333 /* Insn must be a non-prefixed base+disp form store. */
19334 if (is_store_insn (insn, str_mem)
19335 && get_attr_prefixed (insn) == PREFIXED_NO
19336 && get_attr_update (insn) == UPDATE_NO
19337 && get_attr_indexed (insn) == INDEXED_NO)
19338 {
19339 /* Further restrictions by mode and size. */
19340 if (!MEM_SIZE_KNOWN_P (*str_mem))
19341 return false;
19342
19343 machine_mode mode = GET_MODE (*str_mem);
19344 HOST_WIDE_INT size = MEM_SIZE (*str_mem);
19345
19346 if (INTEGRAL_MODE_P (mode))
19347 /* Must be word or dword size. */
19348 return (size == 4 || size == 8);
19349 else if (FLOAT_MODE_P (mode))
19350 /* Must be dword size. */
19351 return (size == 8);
19352 }
19353
19354 return false;
19355 }
19356
19357 /* Do Power10 specific reordering of the ready list. */
19358
19359 static int
19360 power10_sched_reorder (rtx_insn **ready, int lastpos)
19361 {
19362 rtx mem1;
19363
19364 /* Do store fusion during sched2 only. */
19365 if (!reload_completed)
19366 return cached_can_issue_more;
19367
19368 /* If the prior insn finished off a store fusion pair then simply
19369 reset the counter and return, nothing more to do. */
19370 if (load_store_pendulum != 0)
19371 {
19372 load_store_pendulum = 0;
19373 return cached_can_issue_more;
19374 }
19375
19376 /* Try to pair certain store insns to adjacent memory locations
19377 so that the hardware will fuse them to a single operation. */
19378 if (TARGET_P10_FUSION && is_fusable_store (last_scheduled_insn, &mem1))
19379 {
19380
19381 /* A fusable store was just scheduled. Scan the ready list for another
19382 store that it can fuse with. */
19383 int pos = lastpos;
19384 while (pos >= 0)
19385 {
19386 rtx mem2;
19387 /* GPR stores can be ascending or descending offsets, FPR/VSR stores
19388 must be ascending only. */
19389 if (is_fusable_store (ready[pos], &mem2)
19390 && ((INTEGRAL_MODE_P (GET_MODE (mem1))
19391 && adjacent_mem_locations (mem1, mem2))
19392 || (FLOAT_MODE_P (GET_MODE (mem1))
19393 && (adjacent_mem_locations (mem1, mem2) == mem1))))
19394 {
19395 /* Found a fusable store. Move it to the end of the ready list
19396 so it is scheduled next. */
19397 move_to_end_of_ready (ready, pos, lastpos);
19398
19399 load_store_pendulum = -1;
19400 break;
19401 }
19402 pos--;
19403 }
19404 }
19405
19406 return cached_can_issue_more;
19407 }
19408
19409 /* We are about to begin issuing insns for this clock cycle. */
19410
19411 static int
19412 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
19413 rtx_insn **ready ATTRIBUTE_UNUSED,
19414 int *pn_ready ATTRIBUTE_UNUSED,
19415 int clock_var ATTRIBUTE_UNUSED)
19416 {
19417 int n_ready = *pn_ready;
19418
19419 if (sched_verbose)
19420 fprintf (dump, "// rs6000_sched_reorder :\n");
19421
19422 /* Reorder the ready list, if the second to last ready insn
19423 is a nonepipeline insn. */
19424 if (rs6000_tune == PROCESSOR_CELL && n_ready > 1)
19425 {
19426 if (is_nonpipeline_insn (ready[n_ready - 1])
19427 && (recog_memoized (ready[n_ready - 2]) > 0))
19428 /* Simply swap first two insns. */
19429 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
19430 }
19431
19432 if (rs6000_tune == PROCESSOR_POWER6)
19433 load_store_pendulum = 0;
19434
19435 /* Do Power10 dependent reordering. For now, assume "future" has the same
19436 dependent reordering as power10. */
19437 if ((rs6000_tune == PROCESSOR_POWER10
19438 || rs6000_tune == PROCESSOR_FUTURE) && last_scheduled_insn)
19439 power10_sched_reorder (ready, n_ready - 1);
19440
19441 return rs6000_issue_rate ();
19442 }
19443
19444 /* Like rs6000_sched_reorder, but called after issuing each insn. */
19445
19446 static int
19447 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
19448 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
19449 {
19450 if (sched_verbose)
19451 fprintf (dump, "// rs6000_sched_reorder2 :\n");
19452
19453 /* Do Power6 dependent reordering if necessary. */
19454 if (rs6000_tune == PROCESSOR_POWER6 && last_scheduled_insn)
19455 return power6_sched_reorder2 (ready, *pn_ready - 1);
19456
19457 /* Do Power9 dependent reordering if necessary. */
19458 if (rs6000_tune == PROCESSOR_POWER9 && last_scheduled_insn
19459 && recog_memoized (last_scheduled_insn) >= 0)
19460 return power9_sched_reorder2 (ready, *pn_ready - 1);
19461
19462 /* Do Power10 dependent reordering. For now, assume "future" has the same
19463 dependent reordering as power10. */
19464 if ((rs6000_tune == PROCESSOR_POWER10
19465 || rs6000_tune == PROCESSOR_FUTURE) && last_scheduled_insn)
19466 return power10_sched_reorder (ready, *pn_ready - 1);
19467
19468 return cached_can_issue_more;
19469 }
19470
19471 /* Return whether the presence of INSN causes a dispatch group termination
19472 of group WHICH_GROUP.
19473
19474 If WHICH_GROUP == current_group, this function will return true if INSN
19475 causes the termination of the current group (i.e, the dispatch group to
19476 which INSN belongs). This means that INSN will be the last insn in the
19477 group it belongs to.
19478
19479 If WHICH_GROUP == previous_group, this function will return true if INSN
19480 causes the termination of the previous group (i.e, the dispatch group that
19481 precedes the group to which INSN belongs). This means that INSN will be
19482 the first insn in the group it belongs to). */
19483
19484 static bool
19485 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
19486 {
19487 bool first, last;
19488
19489 if (! insn)
19490 return false;
19491
19492 first = insn_must_be_first_in_group (insn);
19493 last = insn_must_be_last_in_group (insn);
19494
19495 if (first && last)
19496 return true;
19497
19498 if (which_group == current_group)
19499 return last;
19500 else if (which_group == previous_group)
19501 return first;
19502
19503 return false;
19504 }
19505
19506
19507 static bool
19508 insn_must_be_first_in_group (rtx_insn *insn)
19509 {
19510 enum attr_type type;
19511
19512 if (!insn
19513 || NOTE_P (insn)
19514 || DEBUG_INSN_P (insn)
19515 || GET_CODE (PATTERN (insn)) == USE
19516 || GET_CODE (PATTERN (insn)) == CLOBBER)
19517 return false;
19518
19519 switch (rs6000_tune)
19520 {
19521 case PROCESSOR_POWER5:
19522 if (is_cracked_insn (insn))
19523 return true;
19524 /* FALLTHRU */
19525 case PROCESSOR_POWER4:
19526 if (is_microcoded_insn (insn))
19527 return true;
19528
19529 if (!rs6000_sched_groups)
19530 return false;
19531
19532 type = get_attr_type (insn);
19533
19534 switch (type)
19535 {
19536 case TYPE_MFCR:
19537 case TYPE_MFCRF:
19538 case TYPE_MTCR:
19539 case TYPE_CR_LOGICAL:
19540 case TYPE_MTJMPR:
19541 case TYPE_MFJMPR:
19542 case TYPE_DIV:
19543 case TYPE_LOAD_L:
19544 case TYPE_STORE_C:
19545 case TYPE_ISYNC:
19546 case TYPE_SYNC:
19547 return true;
19548 default:
19549 break;
19550 }
19551 break;
19552 case PROCESSOR_POWER6:
19553 type = get_attr_type (insn);
19554
19555 switch (type)
19556 {
19557 case TYPE_EXTS:
19558 case TYPE_CNTLZ:
19559 case TYPE_TRAP:
19560 case TYPE_MUL:
19561 case TYPE_INSERT:
19562 case TYPE_FPCOMPARE:
19563 case TYPE_MFCR:
19564 case TYPE_MTCR:
19565 case TYPE_MFJMPR:
19566 case TYPE_MTJMPR:
19567 case TYPE_ISYNC:
19568 case TYPE_SYNC:
19569 case TYPE_LOAD_L:
19570 case TYPE_STORE_C:
19571 return true;
19572 case TYPE_SHIFT:
19573 if (get_attr_dot (insn) == DOT_NO
19574 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
19575 return true;
19576 else
19577 break;
19578 case TYPE_DIV:
19579 if (get_attr_size (insn) == SIZE_32)
19580 return true;
19581 else
19582 break;
19583 case TYPE_LOAD:
19584 case TYPE_STORE:
19585 case TYPE_FPLOAD:
19586 case TYPE_FPSTORE:
19587 if (get_attr_update (insn) == UPDATE_YES)
19588 return true;
19589 else
19590 break;
19591 default:
19592 break;
19593 }
19594 break;
19595 case PROCESSOR_POWER7:
19596 type = get_attr_type (insn);
19597
19598 switch (type)
19599 {
19600 case TYPE_CR_LOGICAL:
19601 case TYPE_MFCR:
19602 case TYPE_MFCRF:
19603 case TYPE_MTCR:
19604 case TYPE_DIV:
19605 case TYPE_ISYNC:
19606 case TYPE_LOAD_L:
19607 case TYPE_STORE_C:
19608 case TYPE_MFJMPR:
19609 case TYPE_MTJMPR:
19610 return true;
19611 case TYPE_MUL:
19612 case TYPE_SHIFT:
19613 case TYPE_EXTS:
19614 if (get_attr_dot (insn) == DOT_YES)
19615 return true;
19616 else
19617 break;
19618 case TYPE_LOAD:
19619 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19620 || get_attr_update (insn) == UPDATE_YES)
19621 return true;
19622 else
19623 break;
19624 case TYPE_STORE:
19625 case TYPE_FPLOAD:
19626 case TYPE_FPSTORE:
19627 if (get_attr_update (insn) == UPDATE_YES)
19628 return true;
19629 else
19630 break;
19631 default:
19632 break;
19633 }
19634 break;
19635 case PROCESSOR_POWER8:
19636 type = get_attr_type (insn);
19637
19638 switch (type)
19639 {
19640 case TYPE_CR_LOGICAL:
19641 case TYPE_MFCR:
19642 case TYPE_MFCRF:
19643 case TYPE_MTCR:
19644 case TYPE_SYNC:
19645 case TYPE_ISYNC:
19646 case TYPE_LOAD_L:
19647 case TYPE_STORE_C:
19648 case TYPE_VECSTORE:
19649 case TYPE_MFJMPR:
19650 case TYPE_MTJMPR:
19651 return true;
19652 case TYPE_SHIFT:
19653 case TYPE_EXTS:
19654 case TYPE_MUL:
19655 if (get_attr_dot (insn) == DOT_YES)
19656 return true;
19657 else
19658 break;
19659 case TYPE_LOAD:
19660 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19661 || get_attr_update (insn) == UPDATE_YES)
19662 return true;
19663 else
19664 break;
19665 case TYPE_STORE:
19666 if (get_attr_update (insn) == UPDATE_YES
19667 && get_attr_indexed (insn) == INDEXED_YES)
19668 return true;
19669 else
19670 break;
19671 default:
19672 break;
19673 }
19674 break;
19675 default:
19676 break;
19677 }
19678
19679 return false;
19680 }
19681
19682 static bool
19683 insn_must_be_last_in_group (rtx_insn *insn)
19684 {
19685 enum attr_type type;
19686
19687 if (!insn
19688 || NOTE_P (insn)
19689 || DEBUG_INSN_P (insn)
19690 || GET_CODE (PATTERN (insn)) == USE
19691 || GET_CODE (PATTERN (insn)) == CLOBBER)
19692 return false;
19693
19694 switch (rs6000_tune) {
19695 case PROCESSOR_POWER4:
19696 case PROCESSOR_POWER5:
19697 if (is_microcoded_insn (insn))
19698 return true;
19699
19700 if (is_branch_slot_insn (insn))
19701 return true;
19702
19703 break;
19704 case PROCESSOR_POWER6:
19705 type = get_attr_type (insn);
19706
19707 switch (type)
19708 {
19709 case TYPE_EXTS:
19710 case TYPE_CNTLZ:
19711 case TYPE_TRAP:
19712 case TYPE_MUL:
19713 case TYPE_FPCOMPARE:
19714 case TYPE_MFCR:
19715 case TYPE_MTCR:
19716 case TYPE_MFJMPR:
19717 case TYPE_MTJMPR:
19718 case TYPE_ISYNC:
19719 case TYPE_SYNC:
19720 case TYPE_LOAD_L:
19721 case TYPE_STORE_C:
19722 return true;
19723 case TYPE_SHIFT:
19724 if (get_attr_dot (insn) == DOT_NO
19725 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
19726 return true;
19727 else
19728 break;
19729 case TYPE_DIV:
19730 if (get_attr_size (insn) == SIZE_32)
19731 return true;
19732 else
19733 break;
19734 default:
19735 break;
19736 }
19737 break;
19738 case PROCESSOR_POWER7:
19739 type = get_attr_type (insn);
19740
19741 switch (type)
19742 {
19743 case TYPE_ISYNC:
19744 case TYPE_SYNC:
19745 case TYPE_LOAD_L:
19746 case TYPE_STORE_C:
19747 return true;
19748 case TYPE_LOAD:
19749 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19750 && get_attr_update (insn) == UPDATE_YES)
19751 return true;
19752 else
19753 break;
19754 case TYPE_STORE:
19755 if (get_attr_update (insn) == UPDATE_YES
19756 && get_attr_indexed (insn) == INDEXED_YES)
19757 return true;
19758 else
19759 break;
19760 default:
19761 break;
19762 }
19763 break;
19764 case PROCESSOR_POWER8:
19765 type = get_attr_type (insn);
19766
19767 switch (type)
19768 {
19769 case TYPE_MFCR:
19770 case TYPE_MTCR:
19771 case TYPE_ISYNC:
19772 case TYPE_SYNC:
19773 case TYPE_LOAD_L:
19774 case TYPE_STORE_C:
19775 return true;
19776 case TYPE_LOAD:
19777 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19778 && get_attr_update (insn) == UPDATE_YES)
19779 return true;
19780 else
19781 break;
19782 case TYPE_STORE:
19783 if (get_attr_update (insn) == UPDATE_YES
19784 && get_attr_indexed (insn) == INDEXED_YES)
19785 return true;
19786 else
19787 break;
19788 default:
19789 break;
19790 }
19791 break;
19792 default:
19793 break;
19794 }
19795
19796 return false;
19797 }
19798
19799 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
19800 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
19801
19802 static bool
19803 is_costly_group (rtx *group_insns, rtx next_insn)
19804 {
19805 int i;
19806 int issue_rate = rs6000_issue_rate ();
19807
19808 for (i = 0; i < issue_rate; i++)
19809 {
19810 sd_iterator_def sd_it;
19811 dep_t dep;
19812 rtx insn = group_insns[i];
19813
19814 if (!insn)
19815 continue;
19816
19817 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
19818 {
19819 rtx next = DEP_CON (dep);
19820
19821 if (next == next_insn
19822 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
19823 return true;
19824 }
19825 }
19826
19827 return false;
19828 }
19829
19830 /* Utility of the function redefine_groups.
19831 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
19832 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
19833 to keep it "far" (in a separate group) from GROUP_INSNS, following
19834 one of the following schemes, depending on the value of the flag
19835 -minsert_sched_nops = X:
19836 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
19837 in order to force NEXT_INSN into a separate group.
19838 (2) X < sched_finish_regroup_exact: insert exactly X nops.
19839 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
19840 insertion (has a group just ended, how many vacant issue slots remain in the
19841 last group, and how many dispatch groups were encountered so far). */
19842
19843 static int
19844 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
19845 rtx_insn *next_insn, bool *group_end, int can_issue_more,
19846 int *group_count)
19847 {
19848 rtx nop;
19849 bool force;
19850 int issue_rate = rs6000_issue_rate ();
19851 bool end = *group_end;
19852 int i;
19853
19854 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
19855 return can_issue_more;
19856
19857 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
19858 return can_issue_more;
19859
19860 force = is_costly_group (group_insns, next_insn);
19861 if (!force)
19862 return can_issue_more;
19863
19864 if (sched_verbose > 6)
19865 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
19866 *group_count ,can_issue_more);
19867
19868 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
19869 {
19870 if (*group_end)
19871 can_issue_more = 0;
19872
19873 /* Since only a branch can be issued in the last issue_slot, it is
19874 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
19875 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
19876 in this case the last nop will start a new group and the branch
19877 will be forced to the new group. */
19878 if (can_issue_more && !is_branch_slot_insn (next_insn))
19879 can_issue_more--;
19880
19881 /* Do we have a special group ending nop? */
19882 if (rs6000_tune == PROCESSOR_POWER6 || rs6000_tune == PROCESSOR_POWER7
19883 || rs6000_tune == PROCESSOR_POWER8)
19884 {
19885 nop = gen_group_ending_nop ();
19886 emit_insn_before (nop, next_insn);
19887 can_issue_more = 0;
19888 }
19889 else
19890 while (can_issue_more > 0)
19891 {
19892 nop = gen_nop ();
19893 emit_insn_before (nop, next_insn);
19894 can_issue_more--;
19895 }
19896
19897 *group_end = true;
19898 return 0;
19899 }
19900
19901 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
19902 {
19903 int n_nops = rs6000_sched_insert_nops;
19904
19905 /* Nops can't be issued from the branch slot, so the effective
19906 issue_rate for nops is 'issue_rate - 1'. */
19907 if (can_issue_more == 0)
19908 can_issue_more = issue_rate;
19909 can_issue_more--;
19910 if (can_issue_more == 0)
19911 {
19912 can_issue_more = issue_rate - 1;
19913 (*group_count)++;
19914 end = true;
19915 for (i = 0; i < issue_rate; i++)
19916 {
19917 group_insns[i] = 0;
19918 }
19919 }
19920
19921 while (n_nops > 0)
19922 {
19923 nop = gen_nop ();
19924 emit_insn_before (nop, next_insn);
19925 if (can_issue_more == issue_rate - 1) /* new group begins */
19926 end = false;
19927 can_issue_more--;
19928 if (can_issue_more == 0)
19929 {
19930 can_issue_more = issue_rate - 1;
19931 (*group_count)++;
19932 end = true;
19933 for (i = 0; i < issue_rate; i++)
19934 {
19935 group_insns[i] = 0;
19936 }
19937 }
19938 n_nops--;
19939 }
19940
19941 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
19942 can_issue_more++;
19943
19944 /* Is next_insn going to start a new group? */
19945 *group_end
19946 = (end
19947 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
19948 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
19949 || (can_issue_more < issue_rate &&
19950 insn_terminates_group_p (next_insn, previous_group)));
19951 if (*group_end && end)
19952 (*group_count)--;
19953
19954 if (sched_verbose > 6)
19955 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
19956 *group_count, can_issue_more);
19957 return can_issue_more;
19958 }
19959
19960 return can_issue_more;
19961 }
19962
19963 /* This function tries to synch the dispatch groups that the compiler "sees"
19964 with the dispatch groups that the processor dispatcher is expected to
19965 form in practice. It tries to achieve this synchronization by forcing the
19966 estimated processor grouping on the compiler (as opposed to the function
19967 'pad_goups' which tries to force the scheduler's grouping on the processor).
19968
19969 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
19970 examines the (estimated) dispatch groups that will be formed by the processor
19971 dispatcher. It marks these group boundaries to reflect the estimated
19972 processor grouping, overriding the grouping that the scheduler had marked.
19973 Depending on the value of the flag '-minsert-sched-nops' this function can
19974 force certain insns into separate groups or force a certain distance between
19975 them by inserting nops, for example, if there exists a "costly dependence"
19976 between the insns.
19977
19978 The function estimates the group boundaries that the processor will form as
19979 follows: It keeps track of how many vacant issue slots are available after
19980 each insn. A subsequent insn will start a new group if one of the following
19981 4 cases applies:
19982 - no more vacant issue slots remain in the current dispatch group.
19983 - only the last issue slot, which is the branch slot, is vacant, but the next
19984 insn is not a branch.
19985 - only the last 2 or less issue slots, including the branch slot, are vacant,
19986 which means that a cracked insn (which occupies two issue slots) can't be
19987 issued in this group.
19988 - less than 'issue_rate' slots are vacant, and the next insn always needs to
19989 start a new group. */
19990
19991 static int
19992 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
19993 rtx_insn *tail)
19994 {
19995 rtx_insn *insn, *next_insn;
19996 int issue_rate;
19997 int can_issue_more;
19998 int slot, i;
19999 bool group_end;
20000 int group_count = 0;
20001 rtx *group_insns;
20002
20003 /* Initialize. */
20004 issue_rate = rs6000_issue_rate ();
20005 group_insns = XALLOCAVEC (rtx, issue_rate);
20006 for (i = 0; i < issue_rate; i++)
20007 {
20008 group_insns[i] = 0;
20009 }
20010 can_issue_more = issue_rate;
20011 slot = 0;
20012 insn = get_next_active_insn (prev_head_insn, tail);
20013 group_end = false;
20014
20015 while (insn != NULL_RTX)
20016 {
20017 slot = (issue_rate - can_issue_more);
20018 group_insns[slot] = insn;
20019 can_issue_more =
20020 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
20021 if (insn_terminates_group_p (insn, current_group))
20022 can_issue_more = 0;
20023
20024 next_insn = get_next_active_insn (insn, tail);
20025 if (next_insn == NULL_RTX)
20026 return group_count + 1;
20027
20028 /* Is next_insn going to start a new group? */
20029 group_end
20030 = (can_issue_more == 0
20031 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
20032 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
20033 || (can_issue_more < issue_rate &&
20034 insn_terminates_group_p (next_insn, previous_group)));
20035
20036 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
20037 next_insn, &group_end, can_issue_more,
20038 &group_count);
20039
20040 if (group_end)
20041 {
20042 group_count++;
20043 can_issue_more = 0;
20044 for (i = 0; i < issue_rate; i++)
20045 {
20046 group_insns[i] = 0;
20047 }
20048 }
20049
20050 if (GET_MODE (next_insn) == TImode && can_issue_more)
20051 PUT_MODE (next_insn, VOIDmode);
20052 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
20053 PUT_MODE (next_insn, TImode);
20054
20055 insn = next_insn;
20056 if (can_issue_more == 0)
20057 can_issue_more = issue_rate;
20058 } /* while */
20059
20060 return group_count;
20061 }
20062
20063 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
20064 dispatch group boundaries that the scheduler had marked. Pad with nops
20065 any dispatch groups which have vacant issue slots, in order to force the
20066 scheduler's grouping on the processor dispatcher. The function
20067 returns the number of dispatch groups found. */
20068
20069 static int
20070 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
20071 rtx_insn *tail)
20072 {
20073 rtx_insn *insn, *next_insn;
20074 rtx nop;
20075 int issue_rate;
20076 int can_issue_more;
20077 int group_end;
20078 int group_count = 0;
20079
20080 /* Initialize issue_rate. */
20081 issue_rate = rs6000_issue_rate ();
20082 can_issue_more = issue_rate;
20083
20084 insn = get_next_active_insn (prev_head_insn, tail);
20085 next_insn = get_next_active_insn (insn, tail);
20086
20087 while (insn != NULL_RTX)
20088 {
20089 can_issue_more =
20090 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
20091
20092 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
20093
20094 if (next_insn == NULL_RTX)
20095 break;
20096
20097 if (group_end)
20098 {
20099 /* If the scheduler had marked group termination at this location
20100 (between insn and next_insn), and neither insn nor next_insn will
20101 force group termination, pad the group with nops to force group
20102 termination. */
20103 if (can_issue_more
20104 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
20105 && !insn_terminates_group_p (insn, current_group)
20106 && !insn_terminates_group_p (next_insn, previous_group))
20107 {
20108 if (!is_branch_slot_insn (next_insn))
20109 can_issue_more--;
20110
20111 while (can_issue_more)
20112 {
20113 nop = gen_nop ();
20114 emit_insn_before (nop, next_insn);
20115 can_issue_more--;
20116 }
20117 }
20118
20119 can_issue_more = issue_rate;
20120 group_count++;
20121 }
20122
20123 insn = next_insn;
20124 next_insn = get_next_active_insn (insn, tail);
20125 }
20126
20127 return group_count;
20128 }
20129
20130 /* We're beginning a new block. Initialize data structures as necessary. */
20131
20132 static void
20133 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
20134 int sched_verbose ATTRIBUTE_UNUSED,
20135 int max_ready ATTRIBUTE_UNUSED)
20136 {
20137 last_scheduled_insn = NULL;
20138 load_store_pendulum = 0;
20139 divide_cnt = 0;
20140 vec_pairing = 0;
20141 }
20142
20143 /* The following function is called at the end of scheduling BB.
20144 After reload, it inserts nops at insn group bundling. */
20145
20146 static void
20147 rs6000_sched_finish (FILE *dump, int sched_verbose)
20148 {
20149 int n_groups;
20150
20151 if (sched_verbose)
20152 fprintf (dump, "=== Finishing schedule.\n");
20153
20154 if (reload_completed && rs6000_sched_groups)
20155 {
20156 /* Do not run sched_finish hook when selective scheduling enabled. */
20157 if (sel_sched_p ())
20158 return;
20159
20160 if (rs6000_sched_insert_nops == sched_finish_none)
20161 return;
20162
20163 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
20164 n_groups = pad_groups (dump, sched_verbose,
20165 current_sched_info->prev_head,
20166 current_sched_info->next_tail);
20167 else
20168 n_groups = redefine_groups (dump, sched_verbose,
20169 current_sched_info->prev_head,
20170 current_sched_info->next_tail);
20171
20172 if (sched_verbose >= 6)
20173 {
20174 fprintf (dump, "ngroups = %d\n", n_groups);
20175 print_rtl (dump, current_sched_info->prev_head);
20176 fprintf (dump, "Done finish_sched\n");
20177 }
20178 }
20179 }
20180
20181 struct rs6000_sched_context
20182 {
20183 short cached_can_issue_more;
20184 rtx_insn *last_scheduled_insn;
20185 int load_store_pendulum;
20186 int divide_cnt;
20187 int vec_pairing;
20188 };
20189
20190 typedef struct rs6000_sched_context rs6000_sched_context_def;
20191 typedef rs6000_sched_context_def *rs6000_sched_context_t;
20192
20193 /* Allocate store for new scheduling context. */
20194 static void *
20195 rs6000_alloc_sched_context (void)
20196 {
20197 return xmalloc (sizeof (rs6000_sched_context_def));
20198 }
20199
20200 /* If CLEAN_P is true then initializes _SC with clean data,
20201 and from the global context otherwise. */
20202 static void
20203 rs6000_init_sched_context (void *_sc, bool clean_p)
20204 {
20205 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
20206
20207 if (clean_p)
20208 {
20209 sc->cached_can_issue_more = 0;
20210 sc->last_scheduled_insn = NULL;
20211 sc->load_store_pendulum = 0;
20212 sc->divide_cnt = 0;
20213 sc->vec_pairing = 0;
20214 }
20215 else
20216 {
20217 sc->cached_can_issue_more = cached_can_issue_more;
20218 sc->last_scheduled_insn = last_scheduled_insn;
20219 sc->load_store_pendulum = load_store_pendulum;
20220 sc->divide_cnt = divide_cnt;
20221 sc->vec_pairing = vec_pairing;
20222 }
20223 }
20224
20225 /* Sets the global scheduling context to the one pointed to by _SC. */
20226 static void
20227 rs6000_set_sched_context (void *_sc)
20228 {
20229 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
20230
20231 gcc_assert (sc != NULL);
20232
20233 cached_can_issue_more = sc->cached_can_issue_more;
20234 last_scheduled_insn = sc->last_scheduled_insn;
20235 load_store_pendulum = sc->load_store_pendulum;
20236 divide_cnt = sc->divide_cnt;
20237 vec_pairing = sc->vec_pairing;
20238 }
20239
20240 /* Free _SC. */
20241 static void
20242 rs6000_free_sched_context (void *_sc)
20243 {
20244 gcc_assert (_sc != NULL);
20245
20246 free (_sc);
20247 }
20248
20249 static bool
20250 rs6000_sched_can_speculate_insn (rtx_insn *insn)
20251 {
20252 switch (get_attr_type (insn))
20253 {
20254 case TYPE_DIV:
20255 case TYPE_SDIV:
20256 case TYPE_DDIV:
20257 case TYPE_VECDIV:
20258 case TYPE_SSQRT:
20259 case TYPE_DSQRT:
20260 return false;
20261
20262 default:
20263 return true;
20264 }
20265 }
20266 \f
20267 /* Length in units of the trampoline for entering a nested function. */
20268
20269 int
20270 rs6000_trampoline_size (void)
20271 {
20272 int ret = 0;
20273
20274 switch (DEFAULT_ABI)
20275 {
20276 default:
20277 gcc_unreachable ();
20278
20279 case ABI_AIX:
20280 ret = (TARGET_32BIT) ? 12 : 24;
20281 break;
20282
20283 case ABI_ELFv2:
20284 gcc_assert (!TARGET_32BIT);
20285 ret = 32;
20286 break;
20287
20288 case ABI_DARWIN:
20289 case ABI_V4:
20290 ret = (TARGET_32BIT) ? 40 : 48;
20291 break;
20292 }
20293
20294 return ret;
20295 }
20296
20297 /* Emit RTL insns to initialize the variable parts of a trampoline.
20298 FNADDR is an RTX for the address of the function's pure code.
20299 CXT is an RTX for the static chain value for the function. */
20300
20301 static void
20302 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
20303 {
20304 int regsize = (TARGET_32BIT) ? 4 : 8;
20305 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
20306 rtx ctx_reg = force_reg (Pmode, cxt);
20307 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
20308
20309 switch (DEFAULT_ABI)
20310 {
20311 default:
20312 gcc_unreachable ();
20313
20314 /* Under AIX, just build the 3 word function descriptor */
20315 case ABI_AIX:
20316 {
20317 rtx fnmem, fn_reg, toc_reg;
20318
20319 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
20320 error ("you cannot take the address of a nested function if you use "
20321 "the %qs option", "-mno-pointers-to-nested-functions");
20322
20323 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
20324 fn_reg = gen_reg_rtx (Pmode);
20325 toc_reg = gen_reg_rtx (Pmode);
20326
20327 /* Macro to shorten the code expansions below. */
20328 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
20329
20330 m_tramp = replace_equiv_address (m_tramp, addr);
20331
20332 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
20333 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
20334 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
20335 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
20336 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
20337
20338 # undef MEM_PLUS
20339 }
20340 break;
20341
20342 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
20343 case ABI_ELFv2:
20344 case ABI_DARWIN:
20345 case ABI_V4:
20346 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
20347 LCT_NORMAL, VOIDmode,
20348 addr, Pmode,
20349 GEN_INT (rs6000_trampoline_size ()), SImode,
20350 fnaddr, Pmode,
20351 ctx_reg, Pmode);
20352 break;
20353 }
20354 }
20355
20356 \f
20357 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
20358 identifier as an argument, so the front end shouldn't look it up. */
20359
20360 static bool
20361 rs6000_attribute_takes_identifier_p (const_tree attr_id)
20362 {
20363 return is_attribute_p ("altivec", attr_id);
20364 }
20365
20366 /* Handle the "altivec" attribute. The attribute may have
20367 arguments as follows:
20368
20369 __attribute__((altivec(vector__)))
20370 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
20371 __attribute__((altivec(bool__))) (always followed by 'unsigned')
20372
20373 and may appear more than once (e.g., 'vector bool char') in a
20374 given declaration. */
20375
20376 static tree
20377 rs6000_handle_altivec_attribute (tree *node,
20378 tree name ATTRIBUTE_UNUSED,
20379 tree args,
20380 int flags ATTRIBUTE_UNUSED,
20381 bool *no_add_attrs)
20382 {
20383 tree type = *node, result = NULL_TREE;
20384 machine_mode mode;
20385 int unsigned_p;
20386 char altivec_type
20387 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
20388 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
20389 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
20390 : '?');
20391
20392 while (POINTER_TYPE_P (type)
20393 || TREE_CODE (type) == FUNCTION_TYPE
20394 || TREE_CODE (type) == METHOD_TYPE
20395 || TREE_CODE (type) == ARRAY_TYPE)
20396 type = TREE_TYPE (type);
20397
20398 mode = TYPE_MODE (type);
20399
20400 /* Check for invalid AltiVec type qualifiers. */
20401 if (type == long_double_type_node)
20402 error ("use of %<long double%> in AltiVec types is invalid");
20403 else if (type == boolean_type_node)
20404 error ("use of boolean types in AltiVec types is invalid");
20405 else if (TREE_CODE (type) == COMPLEX_TYPE)
20406 error ("use of %<complex%> in AltiVec types is invalid");
20407 else if (DECIMAL_FLOAT_MODE_P (mode))
20408 error ("use of decimal floating-point types in AltiVec types is invalid");
20409 else if (!TARGET_VSX)
20410 {
20411 if (type == long_unsigned_type_node || type == long_integer_type_node)
20412 {
20413 if (TARGET_64BIT)
20414 error ("use of %<long%> in AltiVec types is invalid for "
20415 "64-bit code without %qs", "-mvsx");
20416 else if (rs6000_warn_altivec_long)
20417 warning (0, "use of %<long%> in AltiVec types is deprecated; "
20418 "use %<int%>");
20419 }
20420 else if (type == long_long_unsigned_type_node
20421 || type == long_long_integer_type_node)
20422 error ("use of %<long long%> in AltiVec types is invalid without %qs",
20423 "-mvsx");
20424 else if (type == double_type_node)
20425 error ("use of %<double%> in AltiVec types is invalid without %qs",
20426 "-mvsx");
20427 }
20428
20429 switch (altivec_type)
20430 {
20431 case 'v':
20432 unsigned_p = TYPE_UNSIGNED (type);
20433 switch (mode)
20434 {
20435 case E_TImode:
20436 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
20437 break;
20438 case E_DImode:
20439 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
20440 break;
20441 case E_SImode:
20442 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
20443 break;
20444 case E_HImode:
20445 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
20446 break;
20447 case E_QImode:
20448 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
20449 break;
20450 case E_SFmode: result = V4SF_type_node; break;
20451 case E_DFmode: result = V2DF_type_node; break;
20452 /* If the user says 'vector int bool', we may be handed the 'bool'
20453 attribute _before_ the 'vector' attribute, and so select the
20454 proper type in the 'b' case below. */
20455 case E_V4SImode: case E_V8HImode: case E_V16QImode: case E_V4SFmode:
20456 case E_V2DImode: case E_V2DFmode:
20457 result = type;
20458 default: break;
20459 }
20460 break;
20461 case 'b':
20462 switch (mode)
20463 {
20464 case E_TImode: case E_V1TImode: result = bool_V1TI_type_node; break;
20465 case E_DImode: case E_V2DImode: result = bool_V2DI_type_node; break;
20466 case E_SImode: case E_V4SImode: result = bool_V4SI_type_node; break;
20467 case E_HImode: case E_V8HImode: result = bool_V8HI_type_node; break;
20468 case E_QImode: case E_V16QImode: result = bool_V16QI_type_node;
20469 default: break;
20470 }
20471 break;
20472 case 'p':
20473 switch (mode)
20474 {
20475 case E_V8HImode: result = pixel_V8HI_type_node;
20476 default: break;
20477 }
20478 default: break;
20479 }
20480
20481 /* Propagate qualifiers attached to the element type
20482 onto the vector type. */
20483 if (result && result != type && TYPE_QUALS (type))
20484 result = build_qualified_type (result, TYPE_QUALS (type));
20485
20486 *no_add_attrs = true; /* No need to hang on to the attribute. */
20487
20488 if (result)
20489 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
20490
20491 return NULL_TREE;
20492 }
20493
20494 /* AltiVec defines five built-in scalar types that serve as vector
20495 elements; we must teach the compiler how to mangle them. The 128-bit
20496 floating point mangling is target-specific as well. MMA defines
20497 two built-in types to be used as opaque vector types. */
20498
20499 static const char *
20500 rs6000_mangle_type (const_tree type)
20501 {
20502 type = TYPE_MAIN_VARIANT (type);
20503
20504 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
20505 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE
20506 && TREE_CODE (type) != OPAQUE_TYPE)
20507 return NULL;
20508
20509 if (type == bool_char_type_node) return "U6__boolc";
20510 if (type == bool_short_type_node) return "U6__bools";
20511 if (type == pixel_type_node) return "u7__pixel";
20512 if (type == bool_int_type_node) return "U6__booli";
20513 if (type == bool_long_long_type_node) return "U6__boolx";
20514
20515 if (type == float128_type_node || type == float64x_type_node)
20516 return NULL;
20517
20518 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IBM_P (TYPE_MODE (type)))
20519 return "g";
20520 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IEEE_P (TYPE_MODE (type)))
20521 return "u9__ieee128";
20522
20523 if (type == vector_pair_type_node)
20524 return "u13__vector_pair";
20525 if (type == vector_quad_type_node)
20526 return "u13__vector_quad";
20527 if (type == dmr_type_node)
20528 return "u5__dmr";
20529
20530 /* For all other types, use the default mangling. */
20531 return NULL;
20532 }
20533
20534 /* Handle a "longcall" or "shortcall" attribute; arguments as in
20535 struct attribute_spec.handler. */
20536
20537 static tree
20538 rs6000_handle_longcall_attribute (tree *node, tree name,
20539 tree args ATTRIBUTE_UNUSED,
20540 int flags ATTRIBUTE_UNUSED,
20541 bool *no_add_attrs)
20542 {
20543 if (TREE_CODE (*node) != FUNCTION_TYPE
20544 && TREE_CODE (*node) != FIELD_DECL
20545 && TREE_CODE (*node) != TYPE_DECL)
20546 {
20547 warning (OPT_Wattributes, "%qE attribute only applies to functions",
20548 name);
20549 *no_add_attrs = true;
20550 }
20551
20552 return NULL_TREE;
20553 }
20554
20555 /* Set longcall attributes on all functions declared when
20556 rs6000_default_long_calls is true. */
20557 static void
20558 rs6000_set_default_type_attributes (tree type)
20559 {
20560 if (rs6000_default_long_calls
20561 && (TREE_CODE (type) == FUNCTION_TYPE
20562 || TREE_CODE (type) == METHOD_TYPE))
20563 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
20564 NULL_TREE,
20565 TYPE_ATTRIBUTES (type));
20566
20567 #if TARGET_MACHO
20568 darwin_set_default_type_attributes (type);
20569 #endif
20570 }
20571
20572 /* Return a reference suitable for calling a function with the
20573 longcall attribute. */
20574
20575 static rtx
20576 rs6000_longcall_ref (rtx call_ref, rtx arg)
20577 {
20578 /* System V adds '.' to the internal name, so skip them. */
20579 const char *call_name = XSTR (call_ref, 0);
20580 if (*call_name == '.')
20581 {
20582 while (*call_name == '.')
20583 call_name++;
20584
20585 tree node = get_identifier (call_name);
20586 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
20587 }
20588
20589 if (TARGET_PLTSEQ)
20590 {
20591 rtx base = const0_rtx;
20592 int regno = 12;
20593 if (rs6000_pcrel_p ())
20594 {
20595 rtx reg = gen_rtx_REG (Pmode, regno);
20596 rtx u = gen_rtx_UNSPEC_VOLATILE (Pmode,
20597 gen_rtvec (3, base, call_ref, arg),
20598 UNSPECV_PLT_PCREL);
20599 emit_insn (gen_rtx_SET (reg, u));
20600 return reg;
20601 }
20602
20603 if (DEFAULT_ABI == ABI_ELFv2)
20604 base = gen_rtx_REG (Pmode, TOC_REGISTER);
20605 else
20606 {
20607 if (flag_pic)
20608 base = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
20609 regno = 11;
20610 }
20611 /* Reg must match that used by linker PLT stubs. For ELFv2, r12
20612 may be used by a function global entry point. For SysV4, r11
20613 is used by __glink_PLTresolve lazy resolver entry. */
20614 rtx reg = gen_rtx_REG (Pmode, regno);
20615 rtx hi = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, base, call_ref, arg),
20616 UNSPEC_PLT16_HA);
20617 rtx lo = gen_rtx_UNSPEC_VOLATILE (Pmode,
20618 gen_rtvec (3, reg, call_ref, arg),
20619 UNSPECV_PLT16_LO);
20620 emit_insn (gen_rtx_SET (reg, hi));
20621 emit_insn (gen_rtx_SET (reg, lo));
20622 return reg;
20623 }
20624
20625 return force_reg (Pmode, call_ref);
20626 }
20627 \f
20628 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
20629 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
20630 #endif
20631
20632 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
20633 struct attribute_spec.handler. */
20634 static tree
20635 rs6000_handle_struct_attribute (tree *node, tree name,
20636 tree args ATTRIBUTE_UNUSED,
20637 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
20638 {
20639 tree *type = NULL;
20640 if (DECL_P (*node))
20641 {
20642 if (TREE_CODE (*node) == TYPE_DECL)
20643 type = &TREE_TYPE (*node);
20644 }
20645 else
20646 type = node;
20647
20648 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
20649 || TREE_CODE (*type) == UNION_TYPE)))
20650 {
20651 warning (OPT_Wattributes, "%qE attribute ignored", name);
20652 *no_add_attrs = true;
20653 }
20654
20655 else if ((is_attribute_p ("ms_struct", name)
20656 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
20657 || ((is_attribute_p ("gcc_struct", name)
20658 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
20659 {
20660 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
20661 name);
20662 *no_add_attrs = true;
20663 }
20664
20665 return NULL_TREE;
20666 }
20667
20668 static bool
20669 rs6000_ms_bitfield_layout_p (const_tree record_type)
20670 {
20671 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
20672 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
20673 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
20674 }
20675 \f
20676 #ifdef USING_ELFOS_H
20677
20678 /* A get_unnamed_section callback, used for switching to toc_section. */
20679
20680 static void
20681 rs6000_elf_output_toc_section_asm_op (const char *data ATTRIBUTE_UNUSED)
20682 {
20683 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
20684 && TARGET_MINIMAL_TOC)
20685 {
20686 if (!toc_initialized)
20687 {
20688 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
20689 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20690 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
20691 fprintf (asm_out_file, "\t.tc ");
20692 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
20693 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
20694 fprintf (asm_out_file, "\n");
20695
20696 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
20697 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20698 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
20699 fprintf (asm_out_file, " = .+32768\n");
20700 toc_initialized = 1;
20701 }
20702 else
20703 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
20704 }
20705 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
20706 {
20707 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
20708 if (!toc_initialized)
20709 {
20710 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20711 toc_initialized = 1;
20712 }
20713 }
20714 else
20715 {
20716 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
20717 if (!toc_initialized)
20718 {
20719 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20720 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
20721 fprintf (asm_out_file, " = .+32768\n");
20722 toc_initialized = 1;
20723 }
20724 }
20725 }
20726
20727 /* Implement TARGET_ASM_INIT_SECTIONS. */
20728
20729 static void
20730 rs6000_elf_asm_init_sections (void)
20731 {
20732 toc_section
20733 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
20734
20735 sdata2_section
20736 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
20737 SDATA2_SECTION_ASM_OP);
20738 }
20739
20740 /* Implement TARGET_SELECT_RTX_SECTION. */
20741
20742 static section *
20743 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
20744 unsigned HOST_WIDE_INT align)
20745 {
20746 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
20747 return toc_section;
20748 else
20749 return default_elf_select_rtx_section (mode, x, align);
20750 }
20751 \f
20752 /* For a SYMBOL_REF, set generic flags and then perform some
20753 target-specific processing.
20754
20755 When the AIX ABI is requested on a non-AIX system, replace the
20756 function name with the real name (with a leading .) rather than the
20757 function descriptor name. This saves a lot of overriding code to
20758 read the prefixes. */
20759
20760 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
20761 static void
20762 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
20763 {
20764 default_encode_section_info (decl, rtl, first);
20765
20766 if (first
20767 && TREE_CODE (decl) == FUNCTION_DECL
20768 && !TARGET_AIX
20769 && DEFAULT_ABI == ABI_AIX)
20770 {
20771 rtx sym_ref = XEXP (rtl, 0);
20772 size_t len = strlen (XSTR (sym_ref, 0));
20773 char *str = XALLOCAVEC (char, len + 2);
20774 str[0] = '.';
20775 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
20776 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
20777 }
20778 }
20779
20780 static inline bool
20781 compare_section_name (const char *section, const char *templ)
20782 {
20783 int len;
20784
20785 len = strlen (templ);
20786 return (strncmp (section, templ, len) == 0
20787 && (section[len] == 0 || section[len] == '.'));
20788 }
20789
20790 bool
20791 rs6000_elf_in_small_data_p (const_tree decl)
20792 {
20793 if (rs6000_sdata == SDATA_NONE)
20794 return false;
20795
20796 /* We want to merge strings, so we never consider them small data. */
20797 if (TREE_CODE (decl) == STRING_CST)
20798 return false;
20799
20800 /* Functions are never in the small data area. */
20801 if (TREE_CODE (decl) == FUNCTION_DECL)
20802 return false;
20803
20804 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
20805 {
20806 const char *section = DECL_SECTION_NAME (decl);
20807 if (compare_section_name (section, ".sdata")
20808 || compare_section_name (section, ".sdata2")
20809 || compare_section_name (section, ".gnu.linkonce.s")
20810 || compare_section_name (section, ".sbss")
20811 || compare_section_name (section, ".sbss2")
20812 || compare_section_name (section, ".gnu.linkonce.sb")
20813 || strcmp (section, ".PPC.EMB.sdata0") == 0
20814 || strcmp (section, ".PPC.EMB.sbss0") == 0)
20815 return true;
20816 }
20817 else
20818 {
20819 /* If we are told not to put readonly data in sdata, then don't. */
20820 if (TREE_READONLY (decl) && rs6000_sdata != SDATA_EABI
20821 && !rs6000_readonly_in_sdata)
20822 return false;
20823
20824 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
20825
20826 if (size > 0
20827 && size <= g_switch_value
20828 /* If it's not public, and we're not going to reference it there,
20829 there's no need to put it in the small data section. */
20830 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
20831 return true;
20832 }
20833
20834 return false;
20835 }
20836
20837 #endif /* USING_ELFOS_H */
20838 \f
20839 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
20840
20841 static bool
20842 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
20843 {
20844 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
20845 }
20846
20847 /* Do not place thread-local symbols refs in the object blocks. */
20848
20849 static bool
20850 rs6000_use_blocks_for_decl_p (const_tree decl)
20851 {
20852 return !DECL_THREAD_LOCAL_P (decl);
20853 }
20854 \f
20855 /* Return a REG that occurs in ADDR with coefficient 1.
20856 ADDR can be effectively incremented by incrementing REG.
20857
20858 r0 is special and we must not select it as an address
20859 register by this routine since our caller will try to
20860 increment the returned register via an "la" instruction. */
20861
20862 rtx
20863 find_addr_reg (rtx addr)
20864 {
20865 while (GET_CODE (addr) == PLUS)
20866 {
20867 if (REG_P (XEXP (addr, 0))
20868 && REGNO (XEXP (addr, 0)) != 0)
20869 addr = XEXP (addr, 0);
20870 else if (REG_P (XEXP (addr, 1))
20871 && REGNO (XEXP (addr, 1)) != 0)
20872 addr = XEXP (addr, 1);
20873 else if (CONSTANT_P (XEXP (addr, 0)))
20874 addr = XEXP (addr, 1);
20875 else if (CONSTANT_P (XEXP (addr, 1)))
20876 addr = XEXP (addr, 0);
20877 else
20878 gcc_unreachable ();
20879 }
20880 gcc_assert (REG_P (addr) && REGNO (addr) != 0);
20881 return addr;
20882 }
20883
20884 void
20885 rs6000_fatal_bad_address (rtx op)
20886 {
20887 fatal_insn ("bad address", op);
20888 }
20889
20890 #if TARGET_MACHO
20891
20892 vec<branch_island, va_gc> *branch_islands;
20893
20894 /* Remember to generate a branch island for far calls to the given
20895 function. */
20896
20897 static void
20898 add_compiler_branch_island (tree label_name, tree function_name,
20899 int line_number)
20900 {
20901 branch_island bi = {function_name, label_name, line_number};
20902 vec_safe_push (branch_islands, bi);
20903 }
20904
20905 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
20906 already there or not. */
20907
20908 static int
20909 no_previous_def (tree function_name)
20910 {
20911 branch_island *bi;
20912 unsigned ix;
20913
20914 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
20915 if (function_name == bi->function_name)
20916 return 0;
20917 return 1;
20918 }
20919
20920 /* GET_PREV_LABEL gets the label name from the previous definition of
20921 the function. */
20922
20923 static tree
20924 get_prev_label (tree function_name)
20925 {
20926 branch_island *bi;
20927 unsigned ix;
20928
20929 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
20930 if (function_name == bi->function_name)
20931 return bi->label_name;
20932 return NULL_TREE;
20933 }
20934
20935 /* Generate external symbol indirection stubs (PIC and non-PIC). */
20936
20937 void
20938 machopic_output_stub (FILE *file, const char *symb, const char *stub)
20939 {
20940 unsigned int length;
20941 char *symbol_name, *lazy_ptr_name;
20942 char *local_label_0;
20943 static unsigned label = 0;
20944
20945 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
20946 symb = (*targetm.strip_name_encoding) (symb);
20947
20948 length = strlen (symb);
20949 symbol_name = XALLOCAVEC (char, length + 32);
20950 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
20951
20952 lazy_ptr_name = XALLOCAVEC (char, length + 32);
20953 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
20954
20955 if (MACHOPIC_PURE)
20956 {
20957 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
20958 fprintf (file, "\t.align 5\n");
20959
20960 fprintf (file, "%s:\n", stub);
20961 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20962
20963 label++;
20964 local_label_0 = XALLOCAVEC (char, 16);
20965 sprintf (local_label_0, "L%u$spb", label);
20966
20967 fprintf (file, "\tmflr r0\n");
20968 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
20969 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
20970 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
20971 lazy_ptr_name, local_label_0);
20972 fprintf (file, "\tmtlr r0\n");
20973 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
20974 (TARGET_64BIT ? "ldu" : "lwzu"),
20975 lazy_ptr_name, local_label_0);
20976 fprintf (file, "\tmtctr r12\n");
20977 fprintf (file, "\tbctr\n");
20978 }
20979 else /* mdynamic-no-pic or mkernel. */
20980 {
20981 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
20982 fprintf (file, "\t.align 4\n");
20983
20984 fprintf (file, "%s:\n", stub);
20985 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20986
20987 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
20988 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
20989 (TARGET_64BIT ? "ldu" : "lwzu"),
20990 lazy_ptr_name);
20991 fprintf (file, "\tmtctr r12\n");
20992 fprintf (file, "\tbctr\n");
20993 }
20994
20995 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
20996 fprintf (file, "%s:\n", lazy_ptr_name);
20997 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20998 fprintf (file, "%sdyld_stub_binding_helper\n",
20999 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
21000 }
21001
21002 /* Legitimize PIC addresses. If the address is already
21003 position-independent, we return ORIG. Newly generated
21004 position-independent addresses go into a reg. This is REG if non
21005 zero, otherwise we allocate register(s) as necessary. */
21006
21007 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
21008
21009 rtx
21010 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
21011 rtx reg)
21012 {
21013 rtx base, offset;
21014
21015 if (reg == NULL && !reload_completed)
21016 reg = gen_reg_rtx (Pmode);
21017
21018 if (GET_CODE (orig) == CONST)
21019 {
21020 rtx reg_temp;
21021
21022 if (GET_CODE (XEXP (orig, 0)) == PLUS
21023 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
21024 return orig;
21025
21026 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
21027
21028 /* Use a different reg for the intermediate value, as
21029 it will be marked UNCHANGING. */
21030 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
21031 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
21032 Pmode, reg_temp);
21033 offset =
21034 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
21035 Pmode, reg);
21036
21037 if (CONST_INT_P (offset))
21038 {
21039 if (SMALL_INT (offset))
21040 return plus_constant (Pmode, base, INTVAL (offset));
21041 else if (!reload_completed)
21042 offset = force_reg (Pmode, offset);
21043 else
21044 {
21045 rtx mem = force_const_mem (Pmode, orig);
21046 return machopic_legitimize_pic_address (mem, Pmode, reg);
21047 }
21048 }
21049 return gen_rtx_PLUS (Pmode, base, offset);
21050 }
21051
21052 /* Fall back on generic machopic code. */
21053 return machopic_legitimize_pic_address (orig, mode, reg);
21054 }
21055
21056 /* Output a .machine directive for the Darwin assembler, and call
21057 the generic start_file routine. */
21058
21059 static void
21060 rs6000_darwin_file_start (void)
21061 {
21062 static const struct
21063 {
21064 const char *arg;
21065 const char *name;
21066 HOST_WIDE_INT if_set;
21067 } mapping[] = {
21068 { "ppc64", "ppc64", MASK_64BIT },
21069 { "970", "ppc970", OPTION_MASK_PPC_GPOPT | OPTION_MASK_MFCRF \
21070 | MASK_POWERPC64 },
21071 { "power4", "ppc970", 0 },
21072 { "G5", "ppc970", 0 },
21073 { "7450", "ppc7450", 0 },
21074 { "7400", "ppc7400", OPTION_MASK_ALTIVEC },
21075 { "G4", "ppc7400", 0 },
21076 { "750", "ppc750", 0 },
21077 { "740", "ppc750", 0 },
21078 { "G3", "ppc750", 0 },
21079 { "604e", "ppc604e", 0 },
21080 { "604", "ppc604", 0 },
21081 { "603e", "ppc603", 0 },
21082 { "603", "ppc603", 0 },
21083 { "601", "ppc601", 0 },
21084 { NULL, "ppc", 0 } };
21085 const char *cpu_id = "";
21086 size_t i;
21087
21088 rs6000_file_start ();
21089 darwin_file_start ();
21090
21091 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
21092
21093 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
21094 cpu_id = rs6000_default_cpu;
21095
21096 if (OPTION_SET_P (rs6000_cpu_index))
21097 cpu_id = processor_target_table[rs6000_cpu_index].name;
21098
21099 /* Look through the mapping array. Pick the first name that either
21100 matches the argument, has a bit set in IF_SET that is also set
21101 in the target flags, or has a NULL name. */
21102
21103 i = 0;
21104 while (mapping[i].arg != NULL
21105 && strcmp (mapping[i].arg, cpu_id) != 0
21106 && (mapping[i].if_set & rs6000_isa_flags) == 0)
21107 i++;
21108
21109 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
21110 }
21111
21112 #endif /* TARGET_MACHO */
21113
21114 #if TARGET_ELF
21115 static int
21116 rs6000_elf_reloc_rw_mask (void)
21117 {
21118 if (flag_pic)
21119 return 3;
21120 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
21121 return 2;
21122 else
21123 return 0;
21124 }
21125
21126 /* Record an element in the table of global constructors. SYMBOL is
21127 a SYMBOL_REF of the function to be called; PRIORITY is a number
21128 between 0 and MAX_INIT_PRIORITY.
21129
21130 This differs from default_named_section_asm_out_constructor in
21131 that we have special handling for -mrelocatable. */
21132
21133 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
21134 static void
21135 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
21136 {
21137 const char *section = ".ctors";
21138 char buf[18];
21139
21140 if (priority != DEFAULT_INIT_PRIORITY)
21141 {
21142 sprintf (buf, ".ctors.%.5u",
21143 /* Invert the numbering so the linker puts us in the proper
21144 order; constructors are run from right to left, and the
21145 linker sorts in increasing order. */
21146 MAX_INIT_PRIORITY - priority);
21147 section = buf;
21148 }
21149
21150 switch_to_section (get_section (section, SECTION_WRITE, NULL));
21151 assemble_align (POINTER_SIZE);
21152
21153 if (DEFAULT_ABI == ABI_V4
21154 && (TARGET_RELOCATABLE || flag_pic > 1))
21155 {
21156 fputs ("\t.long (", asm_out_file);
21157 output_addr_const (asm_out_file, symbol);
21158 fputs (")@fixup\n", asm_out_file);
21159 }
21160 else
21161 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
21162 }
21163
21164 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
21165 static void
21166 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
21167 {
21168 const char *section = ".dtors";
21169 char buf[18];
21170
21171 if (priority != DEFAULT_INIT_PRIORITY)
21172 {
21173 sprintf (buf, ".dtors.%.5u",
21174 /* Invert the numbering so the linker puts us in the proper
21175 order; constructors are run from right to left, and the
21176 linker sorts in increasing order. */
21177 MAX_INIT_PRIORITY - priority);
21178 section = buf;
21179 }
21180
21181 switch_to_section (get_section (section, SECTION_WRITE, NULL));
21182 assemble_align (POINTER_SIZE);
21183
21184 if (DEFAULT_ABI == ABI_V4
21185 && (TARGET_RELOCATABLE || flag_pic > 1))
21186 {
21187 fputs ("\t.long (", asm_out_file);
21188 output_addr_const (asm_out_file, symbol);
21189 fputs (")@fixup\n", asm_out_file);
21190 }
21191 else
21192 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
21193 }
21194
21195 void
21196 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
21197 {
21198 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
21199 {
21200 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
21201 ASM_OUTPUT_LABEL (file, name);
21202 fputs (DOUBLE_INT_ASM_OP, file);
21203 rs6000_output_function_entry (file, name);
21204 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
21205 if (DOT_SYMBOLS)
21206 {
21207 fputs ("\t.size\t", file);
21208 assemble_name (file, name);
21209 fputs (",24\n\t.type\t.", file);
21210 assemble_name (file, name);
21211 fputs (",@function\n", file);
21212 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
21213 {
21214 fputs ("\t.globl\t.", file);
21215 assemble_name (file, name);
21216 putc ('\n', file);
21217 }
21218 }
21219 else
21220 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
21221 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
21222 rs6000_output_function_entry (file, name);
21223 fputs (":\n", file);
21224 return;
21225 }
21226
21227 int uses_toc;
21228 if (DEFAULT_ABI == ABI_V4
21229 && (TARGET_RELOCATABLE || flag_pic > 1)
21230 && !TARGET_SECURE_PLT
21231 && (!constant_pool_empty_p () || crtl->profile)
21232 && (uses_toc = uses_TOC ()))
21233 {
21234 char buf[256];
21235
21236 if (uses_toc == 2)
21237 switch_to_other_text_partition ();
21238 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
21239
21240 fprintf (file, "\t.long ");
21241 assemble_name (file, toc_label_name);
21242 need_toc_init = 1;
21243 putc ('-', file);
21244 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
21245 assemble_name (file, buf);
21246 putc ('\n', file);
21247 if (uses_toc == 2)
21248 switch_to_other_text_partition ();
21249 }
21250
21251 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
21252 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
21253
21254 if (TARGET_CMODEL == CMODEL_LARGE
21255 && rs6000_global_entry_point_prologue_needed_p ())
21256 {
21257 char buf[256];
21258
21259 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
21260
21261 fprintf (file, "\t.quad .TOC.-");
21262 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
21263 assemble_name (file, buf);
21264 putc ('\n', file);
21265 }
21266
21267 if (DEFAULT_ABI == ABI_AIX)
21268 {
21269 const char *desc_name, *orig_name;
21270
21271 orig_name = (*targetm.strip_name_encoding) (name);
21272 desc_name = orig_name;
21273 while (*desc_name == '.')
21274 desc_name++;
21275
21276 if (TREE_PUBLIC (decl))
21277 fprintf (file, "\t.globl %s\n", desc_name);
21278
21279 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
21280 fprintf (file, "%s:\n", desc_name);
21281 fprintf (file, "\t.long %s\n", orig_name);
21282 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
21283 fputs ("\t.long 0\n", file);
21284 fprintf (file, "\t.previous\n");
21285 }
21286 ASM_OUTPUT_LABEL (file, name);
21287 }
21288
21289 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
21290 static void
21291 rs6000_elf_file_end (void)
21292 {
21293 #ifdef HAVE_AS_GNU_ATTRIBUTE
21294 /* ??? The value emitted depends on options active at file end.
21295 Assume anyone using #pragma or attributes that might change
21296 options knows what they are doing. */
21297 if ((TARGET_64BIT || DEFAULT_ABI == ABI_V4)
21298 && rs6000_passes_float)
21299 {
21300 int fp;
21301
21302 if (TARGET_HARD_FLOAT)
21303 fp = 1;
21304 else
21305 fp = 2;
21306 if (rs6000_passes_long_double)
21307 {
21308 if (!TARGET_LONG_DOUBLE_128)
21309 fp |= 2 * 4;
21310 else if (TARGET_IEEEQUAD)
21311 fp |= 3 * 4;
21312 else
21313 fp |= 1 * 4;
21314 }
21315 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", fp);
21316 }
21317 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
21318 {
21319 if (rs6000_passes_vector)
21320 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
21321 (TARGET_ALTIVEC_ABI ? 2 : 1));
21322 if (rs6000_returns_struct)
21323 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
21324 aix_struct_return ? 2 : 1);
21325 }
21326 #endif
21327 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
21328 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
21329 file_end_indicate_exec_stack ();
21330 #endif
21331
21332 if (flag_split_stack)
21333 file_end_indicate_split_stack ();
21334
21335 if (cpu_builtin_p)
21336 {
21337 /* We have expanded a CPU builtin, so we need to emit a reference to
21338 the special symbol that LIBC uses to declare it supports the
21339 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
21340 switch_to_section (data_section);
21341 fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3);
21342 fprintf (asm_out_file, "\t%s %s\n",
21343 TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol);
21344 }
21345 }
21346 #endif
21347
21348 #if TARGET_XCOFF
21349
21350 #ifndef HAVE_XCOFF_DWARF_EXTRAS
21351 #define HAVE_XCOFF_DWARF_EXTRAS 0
21352 #endif
21353
21354
21355 /* Names of bss and data sections. These should be unique names for each
21356 compilation unit. */
21357
21358 char *xcoff_bss_section_name;
21359 char *xcoff_private_data_section_name;
21360 char *xcoff_private_rodata_section_name;
21361 char *xcoff_tls_data_section_name;
21362 char *xcoff_read_only_section_name;
21363
21364 static enum unwind_info_type
21365 rs6000_xcoff_debug_unwind_info (void)
21366 {
21367 return UI_NONE;
21368 }
21369
21370 static void
21371 rs6000_xcoff_asm_output_anchor (rtx symbol)
21372 {
21373 char buffer[100];
21374
21375 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
21376 SYMBOL_REF_BLOCK_OFFSET (symbol));
21377 fprintf (asm_out_file, "%s", SET_ASM_OP);
21378 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
21379 fprintf (asm_out_file, ",");
21380 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
21381 fprintf (asm_out_file, "\n");
21382 }
21383
21384 static void
21385 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
21386 {
21387 fputs (GLOBAL_ASM_OP, stream);
21388 RS6000_OUTPUT_BASENAME (stream, name);
21389 putc ('\n', stream);
21390 }
21391
21392 /* A get_unnamed_decl callback, used for read-only sections. PTR
21393 points to the section string variable. */
21394
21395 static void
21396 rs6000_xcoff_output_readonly_section_asm_op (const char *directive)
21397 {
21398 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
21399 directive
21400 ? xcoff_private_rodata_section_name
21401 : xcoff_read_only_section_name,
21402 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
21403 }
21404
21405 /* Likewise for read-write sections. */
21406
21407 static void
21408 rs6000_xcoff_output_readwrite_section_asm_op (const char *)
21409 {
21410 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
21411 xcoff_private_data_section_name,
21412 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
21413 }
21414
21415 static void
21416 rs6000_xcoff_output_tls_section_asm_op (const char *directive)
21417 {
21418 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
21419 directive
21420 ? xcoff_private_data_section_name
21421 : xcoff_tls_data_section_name,
21422 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
21423 }
21424
21425 /* A get_unnamed_section callback, used for switching to toc_section. */
21426
21427 static void
21428 rs6000_xcoff_output_toc_section_asm_op (const char *data ATTRIBUTE_UNUSED)
21429 {
21430 if (TARGET_MINIMAL_TOC)
21431 {
21432 /* toc_section is always selected at least once from
21433 rs6000_xcoff_file_start, so this is guaranteed to
21434 always be defined once and only once in each file. */
21435 if (!toc_initialized)
21436 {
21437 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
21438 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
21439 toc_initialized = 1;
21440 }
21441 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
21442 (TARGET_32BIT ? "" : ",3"));
21443 }
21444 else
21445 fputs ("\t.toc\n", asm_out_file);
21446 }
21447
21448 /* Implement TARGET_ASM_INIT_SECTIONS. */
21449
21450 static void
21451 rs6000_xcoff_asm_init_sections (void)
21452 {
21453 read_only_data_section
21454 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
21455 NULL);
21456
21457 private_data_section
21458 = get_unnamed_section (SECTION_WRITE,
21459 rs6000_xcoff_output_readwrite_section_asm_op,
21460 NULL);
21461
21462 read_only_private_data_section
21463 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
21464 "");
21465
21466 tls_data_section
21467 = get_unnamed_section (SECTION_TLS,
21468 rs6000_xcoff_output_tls_section_asm_op,
21469 NULL);
21470
21471 tls_private_data_section
21472 = get_unnamed_section (SECTION_TLS,
21473 rs6000_xcoff_output_tls_section_asm_op,
21474 "");
21475
21476 toc_section
21477 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
21478
21479 readonly_data_section = read_only_data_section;
21480 }
21481
21482 static int
21483 rs6000_xcoff_reloc_rw_mask (void)
21484 {
21485 return 3;
21486 }
21487
21488 static void
21489 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
21490 tree decl ATTRIBUTE_UNUSED)
21491 {
21492 int smclass;
21493 static const char * const suffix[7]
21494 = { "PR", "RO", "RW", "BS", "TL", "UL", "XO" };
21495
21496 if (flags & SECTION_EXCLUDE)
21497 smclass = 6;
21498 else if (flags & SECTION_DEBUG)
21499 {
21500 fprintf (asm_out_file, "\t.dwsect %s\n", name);
21501 return;
21502 }
21503 else if (flags & SECTION_CODE)
21504 smclass = 0;
21505 else if (flags & SECTION_TLS)
21506 {
21507 if (flags & SECTION_BSS)
21508 smclass = 5;
21509 else
21510 smclass = 4;
21511 }
21512 else if (flags & SECTION_WRITE)
21513 {
21514 if (flags & SECTION_BSS)
21515 smclass = 3;
21516 else
21517 smclass = 2;
21518 }
21519 else
21520 smclass = 1;
21521
21522 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
21523 (flags & SECTION_CODE) ? "." : "",
21524 name, suffix[smclass], flags & SECTION_ENTSIZE);
21525 }
21526
21527 #define IN_NAMED_SECTION(DECL) \
21528 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
21529 && DECL_SECTION_NAME (DECL) != NULL)
21530
21531 static section *
21532 rs6000_xcoff_select_section (tree decl, int reloc,
21533 unsigned HOST_WIDE_INT align)
21534 {
21535 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
21536 named section. */
21537 if (align > BIGGEST_ALIGNMENT && VAR_OR_FUNCTION_DECL_P (decl))
21538 {
21539 resolve_unique_section (decl, reloc, true);
21540 if (IN_NAMED_SECTION (decl))
21541 return get_named_section (decl, NULL, reloc);
21542 }
21543
21544 if (decl_readonly_section (decl, reloc))
21545 {
21546 if (TREE_PUBLIC (decl))
21547 return read_only_data_section;
21548 else
21549 return read_only_private_data_section;
21550 }
21551 else
21552 {
21553 #if HAVE_AS_TLS
21554 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
21555 {
21556 if (bss_initializer_p (decl))
21557 return tls_comm_section;
21558 else if (TREE_PUBLIC (decl))
21559 return tls_data_section;
21560 else
21561 return tls_private_data_section;
21562 }
21563 else
21564 #endif
21565 if (TREE_PUBLIC (decl))
21566 return data_section;
21567 else
21568 return private_data_section;
21569 }
21570 }
21571
21572 static void
21573 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
21574 {
21575 const char *name;
21576
21577 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
21578 name = (*targetm.strip_name_encoding) (name);
21579 set_decl_section_name (decl, name);
21580 }
21581
21582 /* Select section for constant in constant pool.
21583
21584 On RS/6000, all constants are in the private read-only data area.
21585 However, if this is being placed in the TOC it must be output as a
21586 toc entry. */
21587
21588 static section *
21589 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
21590 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
21591 {
21592 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
21593 return toc_section;
21594 else
21595 return read_only_private_data_section;
21596 }
21597
21598 /* Remove any trailing [DS] or the like from the symbol name. */
21599
21600 static const char *
21601 rs6000_xcoff_strip_name_encoding (const char *name)
21602 {
21603 size_t len;
21604 if (*name == '*')
21605 name++;
21606 len = strlen (name);
21607 if (name[len - 1] == ']')
21608 return ggc_alloc_string (name, len - 4);
21609 else
21610 return name;
21611 }
21612
21613 /* Section attributes. AIX is always PIC. */
21614
21615 static unsigned int
21616 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
21617 {
21618 unsigned int align;
21619 unsigned int flags = default_section_type_flags (decl, name, reloc);
21620
21621 if (decl && DECL_P (decl) && VAR_P (decl) && bss_initializer_p (decl))
21622 flags |= SECTION_BSS;
21623
21624 /* Align to at least UNIT size. */
21625 if (!decl || !DECL_P (decl))
21626 align = MIN_UNITS_PER_WORD;
21627 /* Align code CSECT to at least 32 bytes. */
21628 else if ((flags & SECTION_CODE) != 0)
21629 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT), 32);
21630 else
21631 /* Increase alignment of large objects if not already stricter. */
21632 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
21633 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
21634 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
21635
21636 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
21637 }
21638
21639 /* Output at beginning of assembler file.
21640
21641 Initialize the section names for the RS/6000 at this point.
21642
21643 Specify filename, including full path, to assembler.
21644
21645 We want to go into the TOC section so at least one .toc will be emitted.
21646 Also, in order to output proper .bs/.es pairs, we need at least one static
21647 [RW] section emitted.
21648
21649 Finally, declare mcount when profiling to make the assembler happy. */
21650
21651 static void
21652 rs6000_xcoff_file_start (void)
21653 {
21654 rs6000_gen_section_name (&xcoff_bss_section_name,
21655 main_input_filename, ".bss_");
21656 rs6000_gen_section_name (&xcoff_private_data_section_name,
21657 main_input_filename, ".rw_");
21658 rs6000_gen_section_name (&xcoff_private_rodata_section_name,
21659 main_input_filename, ".rop_");
21660 rs6000_gen_section_name (&xcoff_read_only_section_name,
21661 main_input_filename, ".ro_");
21662 rs6000_gen_section_name (&xcoff_tls_data_section_name,
21663 main_input_filename, ".tls_");
21664
21665 fputs ("\t.file\t", asm_out_file);
21666 output_quoted_string (asm_out_file, main_input_filename);
21667 fputc ('\n', asm_out_file);
21668 if (write_symbols != NO_DEBUG)
21669 switch_to_section (private_data_section);
21670 switch_to_section (toc_section);
21671 switch_to_section (text_section);
21672 if (profile_flag)
21673 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
21674 rs6000_file_start ();
21675 }
21676
21677 /* Output at end of assembler file.
21678 On the RS/6000, referencing data should automatically pull in text. */
21679
21680 static void
21681 rs6000_xcoff_file_end (void)
21682 {
21683 switch_to_section (text_section);
21684 if (xcoff_tls_exec_model_detected)
21685 {
21686 /* Add a .ref to __tls_get_addr to force libpthread dependency. */
21687 fputs ("\t.extern __tls_get_addr\n\t.ref __tls_get_addr\n", asm_out_file);
21688 }
21689 fputs ("_section_.text:\n", asm_out_file);
21690 switch_to_section (data_section);
21691 fputs (TARGET_32BIT
21692 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
21693 asm_out_file);
21694
21695 }
21696
21697 struct declare_alias_data
21698 {
21699 FILE *file;
21700 bool function_descriptor;
21701 };
21702
21703 /* Declare alias N. A helper function for for_node_and_aliases. */
21704
21705 static bool
21706 rs6000_declare_alias (struct symtab_node *n, void *d)
21707 {
21708 struct declare_alias_data *data = (struct declare_alias_data *)d;
21709 /* Main symbol is output specially, because varasm machinery does part of
21710 the job for us - we do not need to declare .globl/lglobs and such. */
21711 if (!n->alias || n->weakref)
21712 return false;
21713
21714 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
21715 return false;
21716
21717 /* Prevent assemble_alias from trying to use .set pseudo operation
21718 that does not behave as expected by the middle-end. */
21719 TREE_ASM_WRITTEN (n->decl) = true;
21720
21721 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
21722 char *buffer = (char *) alloca (strlen (name) + 2);
21723 char *p;
21724 int dollar_inside = 0;
21725
21726 strcpy (buffer, name);
21727 p = strchr (buffer, '$');
21728 while (p) {
21729 *p = '_';
21730 dollar_inside++;
21731 p = strchr (p + 1, '$');
21732 }
21733 if (TREE_PUBLIC (n->decl))
21734 {
21735 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
21736 {
21737 if (dollar_inside) {
21738 if (data->function_descriptor)
21739 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
21740 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
21741 }
21742 if (data->function_descriptor)
21743 {
21744 fputs ("\t.globl .", data->file);
21745 RS6000_OUTPUT_BASENAME (data->file, buffer);
21746 putc ('\n', data->file);
21747 }
21748 fputs ("\t.globl ", data->file);
21749 assemble_name (data->file, buffer);
21750 putc ('\n', data->file);
21751 }
21752 #ifdef ASM_WEAKEN_DECL
21753 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
21754 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
21755 #endif
21756 }
21757 else
21758 {
21759 if (dollar_inside)
21760 {
21761 if (data->function_descriptor)
21762 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
21763 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
21764 }
21765 if (data->function_descriptor)
21766 {
21767 fputs ("\t.lglobl .", data->file);
21768 RS6000_OUTPUT_BASENAME (data->file, buffer);
21769 putc ('\n', data->file);
21770 }
21771 fputs ("\t.lglobl ", data->file);
21772 assemble_name (data->file, buffer);
21773 putc ('\n', data->file);
21774 }
21775 if (data->function_descriptor)
21776 putc ('.', data->file);
21777 ASM_OUTPUT_LABEL (data->file, buffer);
21778 return false;
21779 }
21780
21781
21782 #ifdef HAVE_GAS_HIDDEN
21783 /* Helper function to calculate visibility of a DECL
21784 and return the value as a const string. */
21785
21786 static const char *
21787 rs6000_xcoff_visibility (tree decl)
21788 {
21789 static const char * const visibility_types[] = {
21790 "", ",protected", ",hidden", ",internal"
21791 };
21792
21793 enum symbol_visibility vis = DECL_VISIBILITY (decl);
21794 return visibility_types[vis];
21795 }
21796 #endif
21797
21798
21799 /* This macro produces the initial definition of a function name.
21800 On the RS/6000, we need to place an extra '.' in the function name and
21801 output the function descriptor.
21802 Dollar signs are converted to underscores.
21803
21804 The csect for the function will have already been created when
21805 text_section was selected. We do have to go back to that csect, however.
21806
21807 The third and fourth parameters to the .function pseudo-op (16 and 044)
21808 are placeholders which no longer have any use.
21809
21810 Because AIX assembler's .set command has unexpected semantics, we output
21811 all aliases as alternative labels in front of the definition. */
21812
21813 void
21814 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
21815 {
21816 char *buffer = (char *) alloca (strlen (name) + 1);
21817 char *p;
21818 int dollar_inside = 0;
21819 struct declare_alias_data data = {file, false};
21820
21821 strcpy (buffer, name);
21822 p = strchr (buffer, '$');
21823 while (p) {
21824 *p = '_';
21825 dollar_inside++;
21826 p = strchr (p + 1, '$');
21827 }
21828 if (TREE_PUBLIC (decl))
21829 {
21830 if (!RS6000_WEAK || !DECL_WEAK (decl))
21831 {
21832 if (dollar_inside) {
21833 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
21834 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
21835 }
21836 fputs ("\t.globl .", file);
21837 RS6000_OUTPUT_BASENAME (file, buffer);
21838 #ifdef HAVE_GAS_HIDDEN
21839 fputs (rs6000_xcoff_visibility (decl), file);
21840 #endif
21841 putc ('\n', file);
21842 }
21843 }
21844 else
21845 {
21846 if (dollar_inside) {
21847 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
21848 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
21849 }
21850 fputs ("\t.lglobl .", file);
21851 RS6000_OUTPUT_BASENAME (file, buffer);
21852 putc ('\n', file);
21853 }
21854
21855 fputs ("\t.csect ", file);
21856 assemble_name (file, buffer);
21857 fputs (TARGET_32BIT ? "\n" : ",3\n", file);
21858
21859 ASM_OUTPUT_LABEL (file, buffer);
21860
21861 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
21862 &data, true);
21863 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
21864 RS6000_OUTPUT_BASENAME (file, buffer);
21865 fputs (", TOC[tc0], 0\n", file);
21866
21867 in_section = NULL;
21868 switch_to_section (function_section (decl));
21869 putc ('.', file);
21870 ASM_OUTPUT_LABEL (file, buffer);
21871
21872 data.function_descriptor = true;
21873 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
21874 &data, true);
21875 if (!DECL_IGNORED_P (decl))
21876 {
21877 if (dwarf_debuginfo_p ())
21878 {
21879 name = (*targetm.strip_name_encoding) (name);
21880 fprintf (file, "\t.function .%s,.%s,2,0\n", name, name);
21881 }
21882 }
21883 return;
21884 }
21885
21886
21887 /* Output assembly language to globalize a symbol from a DECL,
21888 possibly with visibility. */
21889
21890 void
21891 rs6000_xcoff_asm_globalize_decl_name (FILE *stream, tree decl)
21892 {
21893 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
21894 fputs (GLOBAL_ASM_OP, stream);
21895 assemble_name (stream, name);
21896 #ifdef HAVE_GAS_HIDDEN
21897 fputs (rs6000_xcoff_visibility (decl), stream);
21898 #endif
21899 putc ('\n', stream);
21900 }
21901
21902 /* Output assembly language to define a symbol as COMMON from a DECL,
21903 possibly with visibility. */
21904
21905 void
21906 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream,
21907 tree decl ATTRIBUTE_UNUSED,
21908 const char *name,
21909 unsigned HOST_WIDE_INT size,
21910 unsigned int align)
21911 {
21912 unsigned int align2 = 2;
21913
21914 if (align == 0)
21915 align = DATA_ABI_ALIGNMENT (TREE_TYPE (decl), DECL_ALIGN (decl));
21916
21917 if (align > 32)
21918 align2 = floor_log2 (align / BITS_PER_UNIT);
21919 else if (size > 4)
21920 align2 = 3;
21921
21922 if (! DECL_COMMON (decl))
21923 {
21924 /* Forget section. */
21925 in_section = NULL;
21926
21927 /* Globalize TLS BSS. */
21928 if (TREE_PUBLIC (decl) && DECL_THREAD_LOCAL_P (decl))
21929 {
21930 fputs (GLOBAL_ASM_OP, stream);
21931 assemble_name (stream, name);
21932 fputc ('\n', stream);
21933 }
21934
21935 /* Switch to section and skip space. */
21936 fputs ("\t.csect ", stream);
21937 assemble_name (stream, name);
21938 fprintf (stream, ",%u\n", align2);
21939 ASM_DECLARE_OBJECT_NAME (stream, name, decl);
21940 ASM_OUTPUT_SKIP (stream, size ? size : 1);
21941 return;
21942 }
21943
21944 if (TREE_PUBLIC (decl))
21945 {
21946 fprintf (stream,
21947 "\t.comm %s," HOST_WIDE_INT_PRINT_UNSIGNED ",%u" ,
21948 name, size, align2);
21949
21950 #ifdef HAVE_GAS_HIDDEN
21951 if (decl != NULL)
21952 fputs (rs6000_xcoff_visibility (decl), stream);
21953 #endif
21954 putc ('\n', stream);
21955 }
21956 else
21957 fprintf (stream,
21958 "\t.lcomm %s," HOST_WIDE_INT_PRINT_UNSIGNED ",%s,%u\n",
21959 (*targetm.strip_name_encoding) (name), size, name, align2);
21960 }
21961
21962 /* This macro produces the initial definition of a object (variable) name.
21963 Because AIX assembler's .set command has unexpected semantics, we output
21964 all aliases as alternative labels in front of the definition. */
21965
21966 void
21967 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
21968 {
21969 struct declare_alias_data data = {file, false};
21970 ASM_OUTPUT_LABEL (file, name);
21971 symtab_node::get_create (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
21972 &data, true);
21973 }
21974
21975 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
21976
21977 void
21978 rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label)
21979 {
21980 fputs (integer_asm_op (size, FALSE), file);
21981 assemble_name (file, label);
21982 fputs ("-$", file);
21983 }
21984
21985 /* Output a symbol offset relative to the dbase for the current object.
21986 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
21987 signed offsets.
21988
21989 __gcc_unwind_dbase is embedded in all executables/libraries through
21990 libgcc/config/rs6000/crtdbase.S. */
21991
21992 void
21993 rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label)
21994 {
21995 fputs (integer_asm_op (size, FALSE), file);
21996 assemble_name (file, label);
21997 fputs("-__gcc_unwind_dbase", file);
21998 }
21999
22000 #ifdef HAVE_AS_TLS
22001 static void
22002 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
22003 {
22004 rtx symbol;
22005 int flags;
22006 const char *symname;
22007
22008 default_encode_section_info (decl, rtl, first);
22009
22010 /* Careful not to prod global register variables. */
22011 if (!MEM_P (rtl))
22012 return;
22013 symbol = XEXP (rtl, 0);
22014 if (!SYMBOL_REF_P (symbol))
22015 return;
22016
22017 flags = SYMBOL_REF_FLAGS (symbol);
22018
22019 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
22020 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
22021
22022 SYMBOL_REF_FLAGS (symbol) = flags;
22023
22024 symname = XSTR (symbol, 0);
22025
22026 /* Append CSECT mapping class, unless the symbol already is qualified.
22027 Aliases are implemented as labels, so the symbol name should not add
22028 a mapping class. */
22029 if (decl
22030 && DECL_P (decl)
22031 && VAR_OR_FUNCTION_DECL_P (decl)
22032 && (symtab_node::get (decl) == NULL
22033 || symtab_node::get (decl)->alias == 0)
22034 && symname[strlen (symname) - 1] != ']')
22035 {
22036 const char *smclass = NULL;
22037
22038 if (TREE_CODE (decl) == FUNCTION_DECL)
22039 smclass = "[DS]";
22040 else if (DECL_THREAD_LOCAL_P (decl))
22041 {
22042 if (bss_initializer_p (decl))
22043 smclass = "[UL]";
22044 else if (flag_data_sections)
22045 smclass = "[TL]";
22046 }
22047 else if (DECL_EXTERNAL (decl))
22048 smclass = "[UA]";
22049 else if (bss_initializer_p (decl))
22050 smclass = "[BS]";
22051 else if (flag_data_sections)
22052 {
22053 /* This must exactly match the logic of select section. */
22054 if (decl_readonly_section (decl, compute_reloc_for_var (decl)))
22055 smclass = "[RO]";
22056 else
22057 smclass = "[RW]";
22058 }
22059
22060 if (smclass != NULL)
22061 {
22062 char *newname = XALLOCAVEC (char, strlen (symname) + 5);
22063
22064 strcpy (newname, symname);
22065 strcat (newname, smclass);
22066 XSTR (symbol, 0) = ggc_strdup (newname);
22067 }
22068 }
22069 }
22070 #endif /* HAVE_AS_TLS */
22071 #endif /* TARGET_XCOFF */
22072
22073 void
22074 rs6000_asm_weaken_decl (FILE *stream, tree decl,
22075 const char *name, const char *val)
22076 {
22077 fputs ("\t.weak\t", stream);
22078 assemble_name (stream, name);
22079 if (decl && TREE_CODE (decl) == FUNCTION_DECL
22080 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
22081 {
22082 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
22083 if (TARGET_XCOFF)
22084 fputs (rs6000_xcoff_visibility (decl), stream);
22085 #endif
22086 fputs ("\n\t.weak\t.", stream);
22087 RS6000_OUTPUT_BASENAME (stream, name);
22088 }
22089 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
22090 if (TARGET_XCOFF)
22091 fputs (rs6000_xcoff_visibility (decl), stream);
22092 #endif
22093 fputc ('\n', stream);
22094
22095 if (val)
22096 {
22097 #ifdef ASM_OUTPUT_DEF
22098 ASM_OUTPUT_DEF (stream, name, val);
22099 #endif
22100 if (decl && TREE_CODE (decl) == FUNCTION_DECL
22101 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
22102 {
22103 fputs ("\t.set\t.", stream);
22104 RS6000_OUTPUT_BASENAME (stream, name);
22105 fputs (",.", stream);
22106 RS6000_OUTPUT_BASENAME (stream, val);
22107 fputc ('\n', stream);
22108 }
22109 }
22110 }
22111
22112
22113 /* Return true if INSN should not be copied. */
22114
22115 static bool
22116 rs6000_cannot_copy_insn_p (rtx_insn *insn)
22117 {
22118 return recog_memoized (insn) >= 0
22119 && get_attr_cannot_copy (insn);
22120 }
22121
22122 /* Compute a (partial) cost for rtx X. Return true if the complete
22123 cost has been computed, and false if subexpressions should be
22124 scanned. In either case, *TOTAL contains the cost result. */
22125
22126 static bool
22127 rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
22128 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
22129 {
22130 int code = GET_CODE (x);
22131
22132 switch (code)
22133 {
22134 /* On the RS/6000, if it is valid in the insn, it is free. */
22135 case CONST_INT:
22136 if (((outer_code == SET
22137 || outer_code == PLUS
22138 || outer_code == MINUS)
22139 && (satisfies_constraint_I (x)
22140 || satisfies_constraint_L (x)))
22141 || (outer_code == AND
22142 && (satisfies_constraint_K (x)
22143 || (mode == SImode
22144 ? satisfies_constraint_L (x)
22145 : satisfies_constraint_J (x))))
22146 || ((outer_code == IOR || outer_code == XOR)
22147 && (satisfies_constraint_K (x)
22148 || (mode == SImode
22149 ? satisfies_constraint_L (x)
22150 : satisfies_constraint_J (x))))
22151 || outer_code == ASHIFT
22152 || outer_code == ASHIFTRT
22153 || outer_code == LSHIFTRT
22154 || outer_code == ROTATE
22155 || outer_code == ROTATERT
22156 || outer_code == ZERO_EXTRACT
22157 || (outer_code == MULT
22158 && satisfies_constraint_I (x))
22159 || ((outer_code == DIV || outer_code == UDIV
22160 || outer_code == MOD || outer_code == UMOD)
22161 && exact_log2 (INTVAL (x)) >= 0)
22162 || (outer_code == COMPARE
22163 && (satisfies_constraint_I (x)
22164 || satisfies_constraint_K (x)))
22165 || ((outer_code == EQ || outer_code == NE)
22166 && (satisfies_constraint_I (x)
22167 || satisfies_constraint_K (x)
22168 || (mode == SImode
22169 ? satisfies_constraint_L (x)
22170 : satisfies_constraint_J (x))))
22171 || (outer_code == GTU
22172 && satisfies_constraint_I (x))
22173 || (outer_code == LTU
22174 && satisfies_constraint_P (x)))
22175 {
22176 *total = 0;
22177 return true;
22178 }
22179 else if ((outer_code == PLUS
22180 && reg_or_add_cint_operand (x, mode))
22181 || (outer_code == MINUS
22182 && reg_or_sub_cint_operand (x, mode))
22183 || ((outer_code == SET
22184 || outer_code == IOR
22185 || outer_code == XOR)
22186 && (INTVAL (x)
22187 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
22188 {
22189 *total = COSTS_N_INSNS (1);
22190 return true;
22191 }
22192 /* FALLTHRU */
22193
22194 case CONST_DOUBLE:
22195 case CONST_WIDE_INT:
22196 case CONST:
22197 case HIGH:
22198 case SYMBOL_REF:
22199 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
22200 return true;
22201
22202 case MEM:
22203 /* When optimizing for size, MEM should be slightly more expensive
22204 than generating address, e.g., (plus (reg) (const)).
22205 L1 cache latency is about two instructions. */
22206 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
22207 if (rs6000_slow_unaligned_access (mode, MEM_ALIGN (x)))
22208 *total += COSTS_N_INSNS (100);
22209 return true;
22210
22211 case LABEL_REF:
22212 *total = 0;
22213 return true;
22214
22215 case PLUS:
22216 case MINUS:
22217 if (FLOAT_MODE_P (mode))
22218 *total = rs6000_cost->fp;
22219 else
22220 *total = COSTS_N_INSNS (1);
22221 return false;
22222
22223 case MULT:
22224 if (CONST_INT_P (XEXP (x, 1))
22225 && satisfies_constraint_I (XEXP (x, 1)))
22226 {
22227 if (INTVAL (XEXP (x, 1)) >= -256
22228 && INTVAL (XEXP (x, 1)) <= 255)
22229 *total = rs6000_cost->mulsi_const9;
22230 else
22231 *total = rs6000_cost->mulsi_const;
22232 }
22233 else if (mode == SFmode)
22234 *total = rs6000_cost->fp;
22235 else if (FLOAT_MODE_P (mode))
22236 *total = rs6000_cost->dmul;
22237 else if (mode == DImode)
22238 *total = rs6000_cost->muldi;
22239 else
22240 *total = rs6000_cost->mulsi;
22241 return false;
22242
22243 case FMA:
22244 if (mode == SFmode)
22245 *total = rs6000_cost->fp;
22246 else
22247 *total = rs6000_cost->dmul;
22248 break;
22249
22250 case DIV:
22251 case MOD:
22252 if (FLOAT_MODE_P (mode))
22253 {
22254 *total = mode == DFmode ? rs6000_cost->ddiv
22255 : rs6000_cost->sdiv;
22256 return false;
22257 }
22258 /* FALLTHRU */
22259
22260 case UDIV:
22261 case UMOD:
22262 if (CONST_INT_P (XEXP (x, 1))
22263 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
22264 {
22265 if (code == DIV || code == MOD)
22266 /* Shift, addze */
22267 *total = COSTS_N_INSNS (2);
22268 else
22269 /* Shift */
22270 *total = COSTS_N_INSNS (1);
22271 }
22272 else
22273 {
22274 if (GET_MODE (XEXP (x, 1)) == DImode)
22275 *total = rs6000_cost->divdi;
22276 else
22277 *total = rs6000_cost->divsi;
22278 }
22279 /* Add in shift and subtract for MOD unless we have a mod instruction. */
22280 if (!TARGET_MODULO && (code == MOD || code == UMOD))
22281 *total += COSTS_N_INSNS (2);
22282 return false;
22283
22284 case CTZ:
22285 *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4);
22286 return false;
22287
22288 case FFS:
22289 *total = COSTS_N_INSNS (4);
22290 return false;
22291
22292 case POPCOUNT:
22293 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
22294 return false;
22295
22296 case PARITY:
22297 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
22298 return false;
22299
22300 case NOT:
22301 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
22302 *total = 0;
22303 else
22304 *total = COSTS_N_INSNS (1);
22305 return false;
22306
22307 case AND:
22308 if (CONST_INT_P (XEXP (x, 1)))
22309 {
22310 rtx left = XEXP (x, 0);
22311 rtx_code left_code = GET_CODE (left);
22312
22313 /* rotate-and-mask: 1 insn. */
22314 if ((left_code == ROTATE
22315 || left_code == ASHIFT
22316 || left_code == LSHIFTRT)
22317 && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
22318 {
22319 *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
22320 if (!CONST_INT_P (XEXP (left, 1)))
22321 *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
22322 *total += COSTS_N_INSNS (1);
22323 return true;
22324 }
22325
22326 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
22327 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
22328 if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
22329 || (val & 0xffff) == val
22330 || (val & 0xffff0000) == val
22331 || ((val & 0xffff) == 0 && mode == SImode))
22332 {
22333 *total = rtx_cost (left, mode, AND, 0, speed);
22334 *total += COSTS_N_INSNS (1);
22335 return true;
22336 }
22337
22338 /* 2 insns. */
22339 if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
22340 {
22341 *total = rtx_cost (left, mode, AND, 0, speed);
22342 *total += COSTS_N_INSNS (2);
22343 return true;
22344 }
22345 }
22346
22347 *total = COSTS_N_INSNS (1);
22348 return false;
22349
22350 case IOR:
22351 /* FIXME */
22352 *total = COSTS_N_INSNS (1);
22353 return true;
22354
22355 case CLZ:
22356 case XOR:
22357 case ZERO_EXTRACT:
22358 *total = COSTS_N_INSNS (1);
22359 return false;
22360
22361 case ASHIFT:
22362 /* The EXTSWSLI instruction is a combined instruction. Don't count both
22363 the sign extend and shift separately within the insn. */
22364 if (TARGET_EXTSWSLI && mode == DImode
22365 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
22366 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
22367 {
22368 *total = 0;
22369 return false;
22370 }
22371 /* fall through */
22372
22373 case ASHIFTRT:
22374 case LSHIFTRT:
22375 case ROTATE:
22376 case ROTATERT:
22377 /* Handle mul_highpart. */
22378 if (outer_code == TRUNCATE
22379 && GET_CODE (XEXP (x, 0)) == MULT)
22380 {
22381 if (mode == DImode)
22382 *total = rs6000_cost->muldi;
22383 else
22384 *total = rs6000_cost->mulsi;
22385 return true;
22386 }
22387 else if (outer_code == AND)
22388 *total = 0;
22389 else
22390 *total = COSTS_N_INSNS (1);
22391 return false;
22392
22393 case SIGN_EXTEND:
22394 case ZERO_EXTEND:
22395 if (MEM_P (XEXP (x, 0)))
22396 *total = 0;
22397 else
22398 *total = COSTS_N_INSNS (1);
22399 return false;
22400
22401 case COMPARE:
22402 case NEG:
22403 case ABS:
22404 if (!FLOAT_MODE_P (mode))
22405 {
22406 *total = COSTS_N_INSNS (1);
22407 return false;
22408 }
22409 /* FALLTHRU */
22410
22411 case FLOAT:
22412 case UNSIGNED_FLOAT:
22413 case FIX:
22414 case UNSIGNED_FIX:
22415 case FLOAT_TRUNCATE:
22416 *total = rs6000_cost->fp;
22417 return false;
22418
22419 case FLOAT_EXTEND:
22420 if (mode == DFmode)
22421 *total = rs6000_cost->sfdf_convert;
22422 else
22423 *total = rs6000_cost->fp;
22424 return false;
22425
22426 case CALL:
22427 case IF_THEN_ELSE:
22428 if (!speed)
22429 {
22430 *total = COSTS_N_INSNS (1);
22431 return true;
22432 }
22433 else if (FLOAT_MODE_P (mode) && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT)
22434 {
22435 *total = rs6000_cost->fp;
22436 return false;
22437 }
22438 break;
22439
22440 case NE:
22441 case EQ:
22442 case GTU:
22443 case LTU:
22444 /* Carry bit requires mode == Pmode.
22445 NEG or PLUS already counted so only add one. */
22446 if (mode == Pmode
22447 && (outer_code == NEG || outer_code == PLUS))
22448 {
22449 *total = COSTS_N_INSNS (1);
22450 return true;
22451 }
22452 /* FALLTHRU */
22453
22454 case GT:
22455 case LT:
22456 case UNORDERED:
22457 if (outer_code == SET)
22458 {
22459 if (XEXP (x, 1) == const0_rtx)
22460 {
22461 *total = COSTS_N_INSNS (2);
22462 return true;
22463 }
22464 else
22465 {
22466 *total = COSTS_N_INSNS (3);
22467 return false;
22468 }
22469 }
22470 /* CC COMPARE. */
22471 if (outer_code == COMPARE)
22472 {
22473 *total = 0;
22474 return true;
22475 }
22476 break;
22477
22478 case UNSPEC:
22479 if (XINT (x, 1) == UNSPECV_MMA_XXSETACCZ)
22480 {
22481 *total = 0;
22482 return true;
22483 }
22484 break;
22485
22486 default:
22487 break;
22488 }
22489
22490 return false;
22491 }
22492
22493 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
22494
22495 static bool
22496 rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
22497 int opno, int *total, bool speed)
22498 {
22499 bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed);
22500
22501 fprintf (stderr,
22502 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
22503 "opno = %d, total = %d, speed = %s, x:\n",
22504 ret ? "complete" : "scan inner",
22505 GET_MODE_NAME (mode),
22506 GET_RTX_NAME (outer_code),
22507 opno,
22508 *total,
22509 speed ? "true" : "false");
22510
22511 debug_rtx (x);
22512
22513 return ret;
22514 }
22515
22516 static int
22517 rs6000_insn_cost (rtx_insn *insn, bool speed)
22518 {
22519 if (recog_memoized (insn) < 0)
22520 return 0;
22521
22522 /* If we are optimizing for size, just use the length. */
22523 if (!speed)
22524 return get_attr_length (insn);
22525
22526 /* Use the cost if provided. */
22527 int cost = get_attr_cost (insn);
22528 if (cost > 0)
22529 return cost;
22530
22531 /* If the insn tells us how many insns there are, use that. Otherwise use
22532 the length/4. Adjust the insn length to remove the extra size that
22533 prefixed instructions take. */
22534 int n = get_attr_num_insns (insn);
22535 if (n == 0)
22536 {
22537 int length = get_attr_length (insn);
22538 if (get_attr_prefixed (insn) == PREFIXED_YES)
22539 {
22540 int adjust = 0;
22541 ADJUST_INSN_LENGTH (insn, adjust);
22542 length -= adjust;
22543 }
22544
22545 n = length / 4;
22546 }
22547
22548 enum attr_type type = get_attr_type (insn);
22549
22550 switch (type)
22551 {
22552 case TYPE_LOAD:
22553 case TYPE_FPLOAD:
22554 case TYPE_VECLOAD:
22555 cost = COSTS_N_INSNS (n + 1);
22556 break;
22557
22558 case TYPE_MUL:
22559 switch (get_attr_size (insn))
22560 {
22561 case SIZE_8:
22562 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const9;
22563 break;
22564 case SIZE_16:
22565 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const;
22566 break;
22567 case SIZE_32:
22568 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi;
22569 break;
22570 case SIZE_64:
22571 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->muldi;
22572 break;
22573 default:
22574 gcc_unreachable ();
22575 }
22576 break;
22577 case TYPE_DIV:
22578 switch (get_attr_size (insn))
22579 {
22580 case SIZE_32:
22581 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divsi;
22582 break;
22583 case SIZE_64:
22584 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divdi;
22585 break;
22586 default:
22587 gcc_unreachable ();
22588 }
22589 break;
22590
22591 case TYPE_FP:
22592 cost = n * rs6000_cost->fp;
22593 break;
22594 case TYPE_DMUL:
22595 cost = n * rs6000_cost->dmul;
22596 break;
22597 case TYPE_SDIV:
22598 cost = n * rs6000_cost->sdiv;
22599 break;
22600 case TYPE_DDIV:
22601 cost = n * rs6000_cost->ddiv;
22602 break;
22603
22604 case TYPE_SYNC:
22605 case TYPE_LOAD_L:
22606 case TYPE_MFCR:
22607 case TYPE_MFCRF:
22608 cost = COSTS_N_INSNS (n + 2);
22609 break;
22610
22611 default:
22612 cost = COSTS_N_INSNS (n);
22613 }
22614
22615 return cost;
22616 }
22617
22618 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
22619
22620 static int
22621 rs6000_debug_address_cost (rtx x, machine_mode mode,
22622 addr_space_t as, bool speed)
22623 {
22624 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
22625
22626 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
22627 ret, speed ? "true" : "false");
22628 debug_rtx (x);
22629
22630 return ret;
22631 }
22632
22633
22634 /* Subroutine to determine the move cost of dense math registers. If we are
22635 moving to/from VSX_REGISTER registers, the cost is either 1 move (for
22636 512-bit accumulators) or 2 moves (for 1,024 dmr registers). If we are
22637 moving to anything else like GPR registers, make the cost very high. */
22638
22639 static int
22640 rs6000_dmr_register_move_cost (machine_mode mode, reg_class_t rclass)
22641 {
22642 const int reg_move_base = 2;
22643 HARD_REG_SET vsx_set = (reg_class_contents[rclass]
22644 & reg_class_contents[VSX_REGS]);
22645
22646 if (TARGET_DENSE_MATH && !hard_reg_set_empty_p (vsx_set))
22647 {
22648 /* __vector_quad (i.e. XOmode) is tranfered in 1 instruction. */
22649 if (mode == XOmode)
22650 return reg_move_base;
22651
22652 /* __dmr (i.e. TDOmode) is transferred in 2 instructions. */
22653 else if (mode == TDOmode)
22654 return reg_move_base * 2;
22655
22656 else
22657 return reg_move_base * 2 * hard_regno_nregs (FIRST_DMR_REGNO, mode);
22658 }
22659
22660 return 1000 * 2 * hard_regno_nregs (FIRST_DMR_REGNO, mode);
22661 }
22662
22663 /* A C expression returning the cost of moving data from a register of class
22664 CLASS1 to one of CLASS2. */
22665
22666 static int
22667 rs6000_register_move_cost (machine_mode mode,
22668 reg_class_t from, reg_class_t to)
22669 {
22670 int ret;
22671 reg_class_t rclass;
22672
22673 if (TARGET_DEBUG_COST)
22674 dbg_cost_ctrl++;
22675
22676 HARD_REG_SET to_vsx, from_vsx;
22677 to_vsx = reg_class_contents[to] & reg_class_contents[VSX_REGS];
22678 from_vsx = reg_class_contents[from] & reg_class_contents[VSX_REGS];
22679
22680 /* Special case DMR registers, that can only move to/from VSX registers. */
22681 if (from == DM_REGS && to == DM_REGS)
22682 ret = 2 * hard_regno_nregs (FIRST_DMR_REGNO, mode);
22683
22684 else if (from == DM_REGS)
22685 ret = rs6000_dmr_register_move_cost (mode, to);
22686
22687 else if (to == DM_REGS)
22688 ret = rs6000_dmr_register_move_cost (mode, from);
22689
22690 /* If we have VSX, we can easily move between FPR or Altivec registers,
22691 otherwise we can only easily move within classes.
22692 Do this first so we give best-case answers for union classes
22693 containing both gprs and vsx regs. */
22694 else if (!hard_reg_set_empty_p (to_vsx)
22695 && !hard_reg_set_empty_p (from_vsx)
22696 && (TARGET_VSX
22697 || hard_reg_set_intersect_p (to_vsx, from_vsx)))
22698 {
22699 int reg = FIRST_FPR_REGNO;
22700 if (TARGET_VSX
22701 || (TEST_HARD_REG_BIT (to_vsx, FIRST_ALTIVEC_REGNO)
22702 && TEST_HARD_REG_BIT (from_vsx, FIRST_ALTIVEC_REGNO)))
22703 reg = FIRST_ALTIVEC_REGNO;
22704 ret = 2 * hard_regno_nregs (reg, mode);
22705 }
22706
22707 /* Moves from/to GENERAL_REGS. */
22708 else if ((rclass = from, reg_classes_intersect_p (to, GENERAL_REGS))
22709 || (rclass = to, reg_classes_intersect_p (from, GENERAL_REGS)))
22710 {
22711 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
22712 {
22713 if (TARGET_DIRECT_MOVE)
22714 {
22715 /* Keep the cost for direct moves above that for within
22716 a register class even if the actual processor cost is
22717 comparable. We do this because a direct move insn
22718 can't be a nop, whereas with ideal register
22719 allocation a move within the same class might turn
22720 out to be a nop. */
22721 if (rs6000_tune == PROCESSOR_POWER9
22722 || rs6000_tune == PROCESSOR_POWER10
22723 || rs6000_tune == PROCESSOR_FUTURE)
22724 ret = 3 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22725 else
22726 ret = 4 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22727 /* SFmode requires a conversion when moving between gprs
22728 and vsx. */
22729 if (mode == SFmode)
22730 ret += 2;
22731 }
22732 else
22733 ret = (rs6000_memory_move_cost (mode, rclass, false)
22734 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
22735 }
22736
22737 /* It's more expensive to move CR_REGS than CR0_REGS because of the
22738 shift. */
22739 else if (rclass == CR_REGS)
22740 ret = 4;
22741
22742 /* For those processors that have slow LR/CTR moves, make them more
22743 expensive than memory in order to bias spills to memory .*/
22744 else if ((rs6000_tune == PROCESSOR_POWER6
22745 || rs6000_tune == PROCESSOR_POWER7
22746 || rs6000_tune == PROCESSOR_POWER8
22747 || rs6000_tune == PROCESSOR_POWER9)
22748 && reg_class_subset_p (rclass, SPECIAL_REGS))
22749 ret = 6 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22750
22751 else
22752 /* A move will cost one instruction per GPR moved. */
22753 ret = 2 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22754 }
22755
22756 /* Everything else has to go through GENERAL_REGS. */
22757 else
22758 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
22759 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
22760
22761 if (TARGET_DEBUG_COST)
22762 {
22763 if (dbg_cost_ctrl == 1)
22764 fprintf (stderr,
22765 "rs6000_register_move_cost: ret=%d, mode=%s, from=%s, to=%s\n",
22766 ret, GET_MODE_NAME (mode), reg_class_names[from],
22767 reg_class_names[to]);
22768 dbg_cost_ctrl--;
22769 }
22770
22771 return ret;
22772 }
22773
22774 /* A C expressions returning the cost of moving data of MODE from a register to
22775 or from memory. */
22776
22777 static int
22778 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
22779 bool in ATTRIBUTE_UNUSED)
22780 {
22781 int ret;
22782
22783 if (TARGET_DEBUG_COST)
22784 dbg_cost_ctrl++;
22785
22786 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
22787 ret = 4 * hard_regno_nregs (0, mode);
22788 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
22789 || reg_classes_intersect_p (rclass, VSX_REGS)))
22790 ret = 4 * hard_regno_nregs (32, mode);
22791 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
22792 ret = 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO, mode);
22793 else if (reg_classes_intersect_p (rclass, DM_REGS))
22794 ret = (rs6000_dmr_register_move_cost (mode, VSX_REGS)
22795 + rs6000_memory_move_cost (mode, VSX_REGS, false));
22796 else
22797 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
22798
22799 if (TARGET_DEBUG_COST)
22800 {
22801 if (dbg_cost_ctrl == 1)
22802 fprintf (stderr,
22803 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
22804 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
22805 dbg_cost_ctrl--;
22806 }
22807
22808 return ret;
22809 }
22810
22811 /* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
22812
22813 The register allocator chooses GEN_OR_VSX_REGS for the allocno
22814 class if GENERAL_REGS and VSX_REGS cost is lower than the memory
22815 cost. This happens a lot when TARGET_DIRECT_MOVE makes the register
22816 move cost between GENERAL_REGS and VSX_REGS low.
22817
22818 It might seem reasonable to use a union class. After all, if usage
22819 of vsr is low and gpr high, it might make sense to spill gpr to vsr
22820 rather than memory. However, in cases where register pressure of
22821 both is high, like the cactus_adm spec test, allowing
22822 GEN_OR_VSX_REGS as the allocno class results in bad decisions in
22823 the first scheduling pass. This is partly due to an allocno of
22824 GEN_OR_VSX_REGS wrongly contributing to the GENERAL_REGS pressure
22825 class, which gives too high a pressure for GENERAL_REGS and too low
22826 for VSX_REGS. So, force a choice of the subclass here.
22827
22828 The best class is also the union if GENERAL_REGS and VSX_REGS have
22829 the same cost. In that case we do use GEN_OR_VSX_REGS as the
22830 allocno class, since trying to narrow down the class by regno mode
22831 is prone to error. For example, SImode is allowed in VSX regs and
22832 in some cases (eg. gcc.target/powerpc/p9-xxbr-3.c do_bswap32_vect)
22833 it would be wrong to choose an allocno of GENERAL_REGS based on
22834 SImode. */
22835
22836 static reg_class_t
22837 rs6000_ira_change_pseudo_allocno_class (int regno ATTRIBUTE_UNUSED,
22838 reg_class_t allocno_class,
22839 reg_class_t best_class)
22840 {
22841 switch (allocno_class)
22842 {
22843 case GEN_OR_VSX_REGS:
22844 /* best_class must be a subset of allocno_class. */
22845 gcc_checking_assert (best_class == GEN_OR_VSX_REGS
22846 || best_class == GEN_OR_FLOAT_REGS
22847 || best_class == VSX_REGS
22848 || best_class == ALTIVEC_REGS
22849 || best_class == FLOAT_REGS
22850 || best_class == GENERAL_REGS
22851 || best_class == BASE_REGS);
22852 /* Use best_class but choose wider classes when copying from the
22853 wider class to best_class is cheap. This mimics IRA choice
22854 of allocno class. */
22855 if (best_class == BASE_REGS)
22856 return GENERAL_REGS;
22857 if (TARGET_VSX && best_class == FLOAT_REGS)
22858 return VSX_REGS;
22859 return best_class;
22860
22861 case VSX_REGS:
22862 if (best_class == ALTIVEC_REGS)
22863 return ALTIVEC_REGS;
22864
22865 default:
22866 break;
22867 }
22868
22869 return allocno_class;
22870 }
22871
22872 /* Load up a constant. If the mode is a vector mode, splat the value across
22873 all of the vector elements. */
22874
22875 static rtx
22876 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
22877 {
22878 rtx reg;
22879
22880 if (mode == SFmode || mode == DFmode)
22881 {
22882 rtx d = const_double_from_real_value (dconst, mode);
22883 reg = force_reg (mode, d);
22884 }
22885 else if (mode == V4SFmode)
22886 {
22887 rtx d = const_double_from_real_value (dconst, SFmode);
22888 rtvec v = gen_rtvec (4, d, d, d, d);
22889 reg = gen_reg_rtx (mode);
22890 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
22891 }
22892 else if (mode == V2DFmode)
22893 {
22894 rtx d = const_double_from_real_value (dconst, DFmode);
22895 rtvec v = gen_rtvec (2, d, d);
22896 reg = gen_reg_rtx (mode);
22897 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
22898 }
22899 else
22900 gcc_unreachable ();
22901
22902 return reg;
22903 }
22904
22905 /* Generate an FMA instruction. */
22906
22907 static void
22908 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
22909 {
22910 machine_mode mode = GET_MODE (target);
22911 rtx dst;
22912
22913 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
22914 gcc_assert (dst != NULL);
22915
22916 if (dst != target)
22917 emit_move_insn (target, dst);
22918 }
22919
22920 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
22921
22922 static void
22923 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
22924 {
22925 machine_mode mode = GET_MODE (dst);
22926 rtx r;
22927
22928 /* This is a tad more complicated, since the fnma_optab is for
22929 a different expression: fma(-m1, m2, a), which is the same
22930 thing except in the case of signed zeros.
22931
22932 Fortunately we know that if FMA is supported that FNMSUB is
22933 also supported in the ISA. Just expand it directly. */
22934
22935 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
22936
22937 r = gen_rtx_NEG (mode, a);
22938 r = gen_rtx_FMA (mode, m1, m2, r);
22939 r = gen_rtx_NEG (mode, r);
22940 emit_insn (gen_rtx_SET (dst, r));
22941 }
22942
22943 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
22944 add a reg_note saying that this was a division. Support both scalar and
22945 vector divide. Assumes no trapping math and finite arguments. */
22946
22947 void
22948 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
22949 {
22950 machine_mode mode = GET_MODE (dst);
22951 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
22952 int i;
22953
22954 /* Low precision estimates guarantee 5 bits of accuracy. High
22955 precision estimates guarantee 14 bits of accuracy. SFmode
22956 requires 23 bits of accuracy. DFmode requires 52 bits of
22957 accuracy. Each pass at least doubles the accuracy, leading
22958 to the following. */
22959 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
22960 if (mode == DFmode || mode == V2DFmode)
22961 passes++;
22962
22963 enum insn_code code = optab_handler (smul_optab, mode);
22964 insn_gen_fn gen_mul = GEN_FCN (code);
22965
22966 gcc_assert (code != CODE_FOR_nothing);
22967
22968 one = rs6000_load_constant_and_splat (mode, dconst1);
22969
22970 /* x0 = 1./d estimate */
22971 x0 = gen_reg_rtx (mode);
22972 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
22973 UNSPEC_FRES)));
22974
22975 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
22976 if (passes > 1) {
22977
22978 /* e0 = 1. - d * x0 */
22979 e0 = gen_reg_rtx (mode);
22980 rs6000_emit_nmsub (e0, d, x0, one);
22981
22982 /* x1 = x0 + e0 * x0 */
22983 x1 = gen_reg_rtx (mode);
22984 rs6000_emit_madd (x1, e0, x0, x0);
22985
22986 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
22987 ++i, xprev = xnext, eprev = enext) {
22988
22989 /* enext = eprev * eprev */
22990 enext = gen_reg_rtx (mode);
22991 emit_insn (gen_mul (enext, eprev, eprev));
22992
22993 /* xnext = xprev + enext * xprev */
22994 xnext = gen_reg_rtx (mode);
22995 rs6000_emit_madd (xnext, enext, xprev, xprev);
22996 }
22997
22998 } else
22999 xprev = x0;
23000
23001 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
23002
23003 /* u = n * xprev */
23004 u = gen_reg_rtx (mode);
23005 emit_insn (gen_mul (u, n, xprev));
23006
23007 /* v = n - (d * u) */
23008 v = gen_reg_rtx (mode);
23009 rs6000_emit_nmsub (v, d, u, n);
23010
23011 /* dst = (v * xprev) + u */
23012 rs6000_emit_madd (dst, v, xprev, u);
23013
23014 if (note_p)
23015 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
23016 }
23017
23018 /* Goldschmidt's Algorithm for single/double-precision floating point
23019 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
23020
23021 void
23022 rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
23023 {
23024 machine_mode mode = GET_MODE (src);
23025 rtx e = gen_reg_rtx (mode);
23026 rtx g = gen_reg_rtx (mode);
23027 rtx h = gen_reg_rtx (mode);
23028
23029 /* Low precision estimates guarantee 5 bits of accuracy. High
23030 precision estimates guarantee 14 bits of accuracy. SFmode
23031 requires 23 bits of accuracy. DFmode requires 52 bits of
23032 accuracy. Each pass at least doubles the accuracy, leading
23033 to the following. */
23034 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
23035 if (mode == DFmode || mode == V2DFmode)
23036 passes++;
23037
23038 int i;
23039 rtx mhalf;
23040 enum insn_code code = optab_handler (smul_optab, mode);
23041 insn_gen_fn gen_mul = GEN_FCN (code);
23042
23043 gcc_assert (code != CODE_FOR_nothing);
23044
23045 mhalf = rs6000_load_constant_and_splat (mode, dconsthalf);
23046
23047 /* e = rsqrt estimate */
23048 emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
23049 UNSPEC_RSQRT)));
23050
23051 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
23052 if (!recip)
23053 {
23054 rtx zero = force_reg (mode, CONST0_RTX (mode));
23055
23056 if (mode == SFmode)
23057 {
23058 rtx target = emit_conditional_move (e, { GT, src, zero, mode },
23059 e, zero, mode, 0);
23060 if (target != e)
23061 emit_move_insn (e, target);
23062 }
23063 else
23064 {
23065 rtx cond = gen_rtx_GT (VOIDmode, e, zero);
23066 rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero);
23067 }
23068 }
23069
23070 /* g = sqrt estimate. */
23071 emit_insn (gen_mul (g, e, src));
23072 /* h = 1/(2*sqrt) estimate. */
23073 emit_insn (gen_mul (h, e, mhalf));
23074
23075 if (recip)
23076 {
23077 if (passes == 1)
23078 {
23079 rtx t = gen_reg_rtx (mode);
23080 rs6000_emit_nmsub (t, g, h, mhalf);
23081 /* Apply correction directly to 1/rsqrt estimate. */
23082 rs6000_emit_madd (dst, e, t, e);
23083 }
23084 else
23085 {
23086 for (i = 0; i < passes; i++)
23087 {
23088 rtx t1 = gen_reg_rtx (mode);
23089 rtx g1 = gen_reg_rtx (mode);
23090 rtx h1 = gen_reg_rtx (mode);
23091
23092 rs6000_emit_nmsub (t1, g, h, mhalf);
23093 rs6000_emit_madd (g1, g, t1, g);
23094 rs6000_emit_madd (h1, h, t1, h);
23095
23096 g = g1;
23097 h = h1;
23098 }
23099 /* Multiply by 2 for 1/rsqrt. */
23100 emit_insn (gen_add3_insn (dst, h, h));
23101 }
23102 }
23103 else
23104 {
23105 rtx t = gen_reg_rtx (mode);
23106 rs6000_emit_nmsub (t, g, h, mhalf);
23107 rs6000_emit_madd (dst, g, t, g);
23108 }
23109
23110 return;
23111 }
23112
23113 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
23114 (Power7) targets. DST is the target, and SRC is the argument operand. */
23115
23116 void
23117 rs6000_emit_popcount (rtx dst, rtx src)
23118 {
23119 machine_mode mode = GET_MODE (dst);
23120 rtx tmp1, tmp2;
23121
23122 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
23123 if (TARGET_POPCNTD)
23124 {
23125 if (mode == SImode)
23126 emit_insn (gen_popcntdsi2 (dst, src));
23127 else
23128 emit_insn (gen_popcntddi2 (dst, src));
23129 return;
23130 }
23131
23132 tmp1 = gen_reg_rtx (mode);
23133
23134 if (mode == SImode)
23135 {
23136 emit_insn (gen_popcntbsi2 (tmp1, src));
23137 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
23138 NULL_RTX, 0);
23139 tmp2 = force_reg (SImode, tmp2);
23140 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
23141 }
23142 else
23143 {
23144 emit_insn (gen_popcntbdi2 (tmp1, src));
23145 tmp2 = expand_mult (DImode, tmp1,
23146 GEN_INT ((HOST_WIDE_INT)
23147 0x01010101 << 32 | 0x01010101),
23148 NULL_RTX, 0);
23149 tmp2 = force_reg (DImode, tmp2);
23150 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
23151 }
23152 }
23153
23154
23155 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
23156 target, and SRC is the argument operand. */
23157
23158 void
23159 rs6000_emit_parity (rtx dst, rtx src)
23160 {
23161 machine_mode mode = GET_MODE (dst);
23162 rtx tmp;
23163
23164 tmp = gen_reg_rtx (mode);
23165
23166 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
23167 if (TARGET_CMPB)
23168 {
23169 if (mode == SImode)
23170 {
23171 emit_insn (gen_popcntbsi2 (tmp, src));
23172 emit_insn (gen_paritysi2_cmpb (dst, tmp));
23173 }
23174 else
23175 {
23176 emit_insn (gen_popcntbdi2 (tmp, src));
23177 emit_insn (gen_paritydi2_cmpb (dst, tmp));
23178 }
23179 return;
23180 }
23181
23182 if (mode == SImode)
23183 {
23184 /* Is mult+shift >= shift+xor+shift+xor? */
23185 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
23186 {
23187 rtx tmp1, tmp2, tmp3, tmp4;
23188
23189 tmp1 = gen_reg_rtx (SImode);
23190 emit_insn (gen_popcntbsi2 (tmp1, src));
23191
23192 tmp2 = gen_reg_rtx (SImode);
23193 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
23194 tmp3 = gen_reg_rtx (SImode);
23195 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
23196
23197 tmp4 = gen_reg_rtx (SImode);
23198 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
23199 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
23200 }
23201 else
23202 rs6000_emit_popcount (tmp, src);
23203 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
23204 }
23205 else
23206 {
23207 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
23208 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
23209 {
23210 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
23211
23212 tmp1 = gen_reg_rtx (DImode);
23213 emit_insn (gen_popcntbdi2 (tmp1, src));
23214
23215 tmp2 = gen_reg_rtx (DImode);
23216 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
23217 tmp3 = gen_reg_rtx (DImode);
23218 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
23219
23220 tmp4 = gen_reg_rtx (DImode);
23221 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
23222 tmp5 = gen_reg_rtx (DImode);
23223 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
23224
23225 tmp6 = gen_reg_rtx (DImode);
23226 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
23227 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
23228 }
23229 else
23230 rs6000_emit_popcount (tmp, src);
23231 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
23232 }
23233 }
23234
23235 /* Expand an Altivec constant permutation for little endian mode.
23236 OP0 and OP1 are the input vectors and TARGET is the output vector.
23237 SEL specifies the constant permutation vector.
23238
23239 There are two issues: First, the two input operands must be
23240 swapped so that together they form a double-wide array in LE
23241 order. Second, the vperm instruction has surprising behavior
23242 in LE mode: it interprets the elements of the source vectors
23243 in BE mode ("left to right") and interprets the elements of
23244 the destination vector in LE mode ("right to left"). To
23245 correct for this, we must subtract each element of the permute
23246 control vector from 31.
23247
23248 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
23249 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
23250 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
23251 serve as the permute control vector. Then, in BE mode,
23252
23253 vperm 9,10,11,12
23254
23255 places the desired result in vr9. However, in LE mode the
23256 vector contents will be
23257
23258 vr10 = 00000003 00000002 00000001 00000000
23259 vr11 = 00000007 00000006 00000005 00000004
23260
23261 The result of the vperm using the same permute control vector is
23262
23263 vr9 = 05000000 07000000 01000000 03000000
23264
23265 That is, the leftmost 4 bytes of vr10 are interpreted as the
23266 source for the rightmost 4 bytes of vr9, and so on.
23267
23268 If we change the permute control vector to
23269
23270 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
23271
23272 and issue
23273
23274 vperm 9,11,10,12
23275
23276 we get the desired
23277
23278 vr9 = 00000006 00000004 00000002 00000000. */
23279
23280 static void
23281 altivec_expand_vec_perm_const_le (rtx target, rtx op0, rtx op1,
23282 const vec_perm_indices &sel)
23283 {
23284 unsigned int i;
23285 rtx perm[16];
23286 rtx constv, unspec;
23287
23288 /* Unpack and adjust the constant selector. */
23289 for (i = 0; i < 16; ++i)
23290 {
23291 unsigned int elt = 31 - (sel[i] & 31);
23292 perm[i] = GEN_INT (elt);
23293 }
23294
23295 /* Expand to a permute, swapping the inputs and using the
23296 adjusted selector. */
23297 if (!REG_P (op0))
23298 op0 = force_reg (V16QImode, op0);
23299 if (!REG_P (op1))
23300 op1 = force_reg (V16QImode, op1);
23301
23302 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
23303 constv = force_reg (V16QImode, constv);
23304 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
23305 UNSPEC_VPERM);
23306 if (!REG_P (target))
23307 {
23308 rtx tmp = gen_reg_rtx (V16QImode);
23309 emit_move_insn (tmp, unspec);
23310 unspec = tmp;
23311 }
23312
23313 emit_move_insn (target, unspec);
23314 }
23315
23316 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
23317 permute control vector. But here it's not a constant, so we must
23318 generate a vector NAND or NOR to do the adjustment. */
23319
23320 void
23321 altivec_expand_vec_perm_le (rtx operands[4])
23322 {
23323 rtx notx, iorx, unspec;
23324 rtx target = operands[0];
23325 rtx op0 = operands[1];
23326 rtx op1 = operands[2];
23327 rtx sel = operands[3];
23328 rtx tmp = target;
23329 rtx norreg = gen_reg_rtx (V16QImode);
23330 machine_mode mode = GET_MODE (target);
23331
23332 /* Get everything in regs so the pattern matches. */
23333 if (!REG_P (op0))
23334 op0 = force_reg (mode, op0);
23335 if (!REG_P (op1))
23336 op1 = force_reg (mode, op1);
23337 if (!REG_P (sel))
23338 sel = force_reg (V16QImode, sel);
23339 if (!REG_P (target))
23340 tmp = gen_reg_rtx (mode);
23341
23342 if (TARGET_P9_VECTOR)
23343 {
23344 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, sel),
23345 UNSPEC_VPERMR);
23346 }
23347 else
23348 {
23349 /* Invert the selector with a VNAND if available, else a VNOR.
23350 The VNAND is preferred for future fusion opportunities. */
23351 notx = gen_rtx_NOT (V16QImode, sel);
23352 iorx = (TARGET_P8_VECTOR
23353 ? gen_rtx_IOR (V16QImode, notx, notx)
23354 : gen_rtx_AND (V16QImode, notx, notx));
23355 emit_insn (gen_rtx_SET (norreg, iorx));
23356
23357 /* Permute with operands reversed and adjusted selector. */
23358 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
23359 UNSPEC_VPERM);
23360 }
23361
23362 /* Copy into target, possibly by way of a register. */
23363 if (!REG_P (target))
23364 {
23365 emit_move_insn (tmp, unspec);
23366 unspec = tmp;
23367 }
23368
23369 emit_move_insn (target, unspec);
23370 }
23371
23372 /* Expand an Altivec constant permutation. Return true if we match
23373 an efficient implementation; false to fall back to VPERM.
23374
23375 OP0 and OP1 are the input vectors and TARGET is the output vector.
23376 SEL specifies the constant permutation vector. */
23377
23378 static bool
23379 altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1,
23380 const vec_perm_indices &sel)
23381 {
23382 struct altivec_perm_insn {
23383 HOST_WIDE_INT mask;
23384 enum insn_code impl;
23385 unsigned char perm[16];
23386 };
23387 static const struct altivec_perm_insn patterns[] = {
23388 {OPTION_MASK_ALTIVEC,
23389 CODE_FOR_altivec_vpkuhum_direct,
23390 {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}},
23391 {OPTION_MASK_ALTIVEC,
23392 CODE_FOR_altivec_vpkuwum_direct,
23393 {2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31}},
23394 {OPTION_MASK_ALTIVEC,
23395 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
23396 : CODE_FOR_altivec_vmrglb_direct,
23397 {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}},
23398 {OPTION_MASK_ALTIVEC,
23399 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
23400 : CODE_FOR_altivec_vmrglh_direct,
23401 {0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23}},
23402 {OPTION_MASK_ALTIVEC,
23403 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct_v4si
23404 : CODE_FOR_altivec_vmrglw_direct_v4si,
23405 {0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23}},
23406 {OPTION_MASK_ALTIVEC,
23407 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
23408 : CODE_FOR_altivec_vmrghb_direct,
23409 {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}},
23410 {OPTION_MASK_ALTIVEC,
23411 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
23412 : CODE_FOR_altivec_vmrghh_direct,
23413 {8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31}},
23414 {OPTION_MASK_ALTIVEC,
23415 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct_v4si
23416 : CODE_FOR_altivec_vmrghw_direct_v4si,
23417 {8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31}},
23418 {OPTION_MASK_P8_VECTOR,
23419 BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgew_v4sf_direct
23420 : CODE_FOR_p8_vmrgow_v4sf_direct,
23421 {0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27}},
23422 {OPTION_MASK_P8_VECTOR,
23423 BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgow_v4sf_direct
23424 : CODE_FOR_p8_vmrgew_v4sf_direct,
23425 {4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31}},
23426 {OPTION_MASK_VSX, CODE_FOR_vsx_xxpermdi_v16qi,
23427 {0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23}},
23428 {OPTION_MASK_VSX, CODE_FOR_vsx_xxpermdi_v16qi,
23429 {8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}},
23430 {OPTION_MASK_VSX, CODE_FOR_vsx_xxpermdi_v16qi,
23431 {0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31}},
23432 {OPTION_MASK_VSX, CODE_FOR_vsx_xxpermdi_v16qi,
23433 {8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31}}};
23434
23435 unsigned int i, j, elt, which;
23436 unsigned char perm[16];
23437 rtx x;
23438 bool one_vec;
23439
23440 /* Unpack the constant selector. */
23441 for (i = which = 0; i < 16; ++i)
23442 {
23443 elt = sel[i] & 31;
23444 which |= (elt < 16 ? 1 : 2);
23445 perm[i] = elt;
23446 }
23447
23448 /* Simplify the constant selector based on operands. */
23449 switch (which)
23450 {
23451 default:
23452 gcc_unreachable ();
23453
23454 case 3:
23455 one_vec = false;
23456 if (!rtx_equal_p (op0, op1))
23457 break;
23458 /* FALLTHRU */
23459
23460 case 2:
23461 for (i = 0; i < 16; ++i)
23462 perm[i] &= 15;
23463 op0 = op1;
23464 one_vec = true;
23465 break;
23466
23467 case 1:
23468 op1 = op0;
23469 one_vec = true;
23470 break;
23471 }
23472
23473 /* Look for splat patterns. */
23474 if (one_vec)
23475 {
23476 elt = perm[0];
23477
23478 for (i = 0; i < 16; ++i)
23479 if (perm[i] != elt)
23480 break;
23481 if (i == 16)
23482 {
23483 if (!BYTES_BIG_ENDIAN)
23484 elt = 15 - elt;
23485 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
23486 return true;
23487 }
23488
23489 if (elt % 2 == 0)
23490 {
23491 for (i = 0; i < 16; i += 2)
23492 if (perm[i] != elt || perm[i + 1] != elt + 1)
23493 break;
23494 if (i == 16)
23495 {
23496 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
23497 x = gen_reg_rtx (V8HImode);
23498 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
23499 GEN_INT (field)));
23500 emit_move_insn (target, gen_lowpart (V16QImode, x));
23501 return true;
23502 }
23503 }
23504
23505 if (elt % 4 == 0)
23506 {
23507 for (i = 0; i < 16; i += 4)
23508 if (perm[i] != elt
23509 || perm[i + 1] != elt + 1
23510 || perm[i + 2] != elt + 2
23511 || perm[i + 3] != elt + 3)
23512 break;
23513 if (i == 16)
23514 {
23515 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
23516 x = gen_reg_rtx (V4SImode);
23517 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
23518 GEN_INT (field)));
23519 emit_move_insn (target, gen_lowpart (V16QImode, x));
23520 return true;
23521 }
23522 }
23523 }
23524
23525 /* Look for merge and pack patterns. */
23526 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
23527 {
23528 bool swapped;
23529
23530 if ((patterns[j].mask & rs6000_isa_flags) == 0)
23531 continue;
23532
23533 elt = patterns[j].perm[0];
23534 if (perm[0] == elt)
23535 swapped = false;
23536 else if (perm[0] == elt + 16)
23537 swapped = true;
23538 else
23539 continue;
23540 for (i = 1; i < 16; ++i)
23541 {
23542 elt = patterns[j].perm[i];
23543 if (swapped)
23544 elt = (elt >= 16 ? elt - 16 : elt + 16);
23545 else if (one_vec && elt >= 16)
23546 elt -= 16;
23547 if (perm[i] != elt)
23548 break;
23549 }
23550 if (i == 16)
23551 {
23552 enum insn_code icode = patterns[j].impl;
23553 machine_mode omode = insn_data[icode].operand[0].mode;
23554 machine_mode imode = insn_data[icode].operand[1].mode;
23555
23556 rtx perm_idx = GEN_INT (0);
23557 if (icode == CODE_FOR_vsx_xxpermdi_v16qi)
23558 {
23559 int perm_val = 0;
23560 if (one_vec)
23561 {
23562 if (perm[0] == 8)
23563 perm_val |= 2;
23564 if (perm[8] == 8)
23565 perm_val |= 1;
23566 }
23567 else
23568 {
23569 if (perm[0] != 0)
23570 perm_val |= 2;
23571 if (perm[8] != 16)
23572 perm_val |= 1;
23573 }
23574 perm_idx = GEN_INT (perm_val);
23575 }
23576
23577 /* For little-endian, don't use vpkuwum and vpkuhum if the
23578 underlying vector type is not V4SI and V8HI, respectively.
23579 For example, using vpkuwum with a V8HI picks up the even
23580 halfwords (BE numbering) when the even halfwords (LE
23581 numbering) are what we need. */
23582 if (!BYTES_BIG_ENDIAN
23583 && icode == CODE_FOR_altivec_vpkuwum_direct
23584 && ((REG_P (op0)
23585 && GET_MODE (op0) != V4SImode)
23586 || (SUBREG_P (op0)
23587 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
23588 continue;
23589 if (!BYTES_BIG_ENDIAN
23590 && icode == CODE_FOR_altivec_vpkuhum_direct
23591 && ((REG_P (op0)
23592 && GET_MODE (op0) != V8HImode)
23593 || (SUBREG_P (op0)
23594 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
23595 continue;
23596
23597 /* For little-endian, the two input operands must be swapped
23598 (or swapped back) to ensure proper right-to-left numbering
23599 from 0 to 2N-1. */
23600 if (swapped ^ !BYTES_BIG_ENDIAN
23601 && icode != CODE_FOR_vsx_xxpermdi_v16qi)
23602 std::swap (op0, op1);
23603 if (imode != V16QImode)
23604 {
23605 op0 = gen_lowpart (imode, op0);
23606 op1 = gen_lowpart (imode, op1);
23607 }
23608 if (omode == V16QImode)
23609 x = target;
23610 else
23611 x = gen_reg_rtx (omode);
23612 if (icode == CODE_FOR_vsx_xxpermdi_v16qi)
23613 emit_insn (GEN_FCN (icode) (x, op0, op1, perm_idx));
23614 else
23615 emit_insn (GEN_FCN (icode) (x, op0, op1));
23616 if (omode != V16QImode)
23617 emit_move_insn (target, gen_lowpart (V16QImode, x));
23618 return true;
23619 }
23620 }
23621
23622 if (!BYTES_BIG_ENDIAN)
23623 {
23624 altivec_expand_vec_perm_const_le (target, op0, op1, sel);
23625 return true;
23626 }
23627
23628 return false;
23629 }
23630
23631 /* Expand a VSX Permute Doubleword constant permutation.
23632 Return true if we match an efficient implementation. */
23633
23634 static bool
23635 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
23636 unsigned char perm0, unsigned char perm1)
23637 {
23638 rtx x;
23639
23640 /* If both selectors come from the same operand, fold to single op. */
23641 if ((perm0 & 2) == (perm1 & 2))
23642 {
23643 if (perm0 & 2)
23644 op0 = op1;
23645 else
23646 op1 = op0;
23647 }
23648 /* If both operands are equal, fold to simpler permutation. */
23649 if (rtx_equal_p (op0, op1))
23650 {
23651 perm0 = perm0 & 1;
23652 perm1 = (perm1 & 1) + 2;
23653 }
23654 /* If the first selector comes from the second operand, swap. */
23655 else if (perm0 & 2)
23656 {
23657 if (perm1 & 2)
23658 return false;
23659 perm0 -= 2;
23660 perm1 += 2;
23661 std::swap (op0, op1);
23662 }
23663 /* If the second selector does not come from the second operand, fail. */
23664 else if ((perm1 & 2) == 0)
23665 return false;
23666
23667 /* Success! */
23668 if (target != NULL)
23669 {
23670 machine_mode vmode, dmode;
23671 rtvec v;
23672
23673 vmode = GET_MODE (target);
23674 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
23675 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4).require ();
23676 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
23677 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
23678 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
23679 emit_insn (gen_rtx_SET (target, x));
23680 }
23681 return true;
23682 }
23683
23684 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
23685
23686 static bool
23687 rs6000_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
23688 rtx target, rtx op0, rtx op1,
23689 const vec_perm_indices &sel)
23690 {
23691 if (vmode != op_mode)
23692 return false;
23693
23694 bool testing_p = !target;
23695
23696 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
23697 if (TARGET_ALTIVEC && testing_p)
23698 return true;
23699
23700 if (op0)
23701 {
23702 rtx nop0 = force_reg (vmode, op0);
23703 if (op0 == op1)
23704 op1 = nop0;
23705 op0 = nop0;
23706 }
23707 if (op1)
23708 op1 = force_reg (vmode, op1);
23709
23710 /* Check for ps_merge* or xxpermdi insns. */
23711 if ((vmode == V2DFmode || vmode == V2DImode) && VECTOR_MEM_VSX_P (vmode))
23712 {
23713 if (testing_p)
23714 {
23715 op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
23716 op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
23717 }
23718 if (rs6000_expand_vec_perm_const_1 (target, op0, op1, sel[0], sel[1]))
23719 return true;
23720 }
23721
23722 if (TARGET_ALTIVEC)
23723 {
23724 /* Force the target-independent code to lower to V16QImode. */
23725 if (vmode != V16QImode)
23726 return false;
23727 if (altivec_expand_vec_perm_const (target, op0, op1, sel))
23728 return true;
23729 }
23730
23731 return false;
23732 }
23733
23734 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave.
23735 OP0 and OP1 are the input vectors and TARGET is the output vector.
23736 PERM specifies the constant permutation vector. */
23737
23738 static void
23739 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
23740 machine_mode vmode, const vec_perm_builder &perm)
23741 {
23742 rtx x = expand_vec_perm_const (vmode, op0, op1, perm, BLKmode, target);
23743 if (x != target)
23744 emit_move_insn (target, x);
23745 }
23746
23747 /* Expand an extract even operation. */
23748
23749 void
23750 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
23751 {
23752 machine_mode vmode = GET_MODE (target);
23753 unsigned i, nelt = GET_MODE_NUNITS (vmode);
23754 vec_perm_builder perm (nelt, nelt, 1);
23755
23756 for (i = 0; i < nelt; i++)
23757 perm.quick_push (i * 2);
23758
23759 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
23760 }
23761
23762 /* Expand a vector interleave operation. */
23763
23764 void
23765 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
23766 {
23767 machine_mode vmode = GET_MODE (target);
23768 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
23769 vec_perm_builder perm (nelt, nelt, 1);
23770
23771 high = (highp ? 0 : nelt / 2);
23772 for (i = 0; i < nelt / 2; i++)
23773 {
23774 perm.quick_push (i + high);
23775 perm.quick_push (i + nelt + high);
23776 }
23777
23778 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
23779 }
23780
23781 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
23782 void
23783 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
23784 {
23785 HOST_WIDE_INT hwi_scale (scale);
23786 REAL_VALUE_TYPE r_pow;
23787 rtvec v = rtvec_alloc (2);
23788 rtx elt;
23789 rtx scale_vec = gen_reg_rtx (V2DFmode);
23790 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
23791 elt = const_double_from_real_value (r_pow, DFmode);
23792 RTVEC_ELT (v, 0) = elt;
23793 RTVEC_ELT (v, 1) = elt;
23794 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
23795 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
23796 }
23797
23798 /* Return an RTX representing where to find the function value of a
23799 function returning MODE. */
23800 static rtx
23801 rs6000_complex_function_value (machine_mode mode)
23802 {
23803 unsigned int regno;
23804 rtx r1, r2;
23805 machine_mode inner = GET_MODE_INNER (mode);
23806 unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode);
23807
23808 if (TARGET_FLOAT128_TYPE
23809 && (mode == KCmode
23810 || (mode == TCmode && TARGET_IEEEQUAD)))
23811 regno = ALTIVEC_ARG_RETURN;
23812
23813 else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
23814 regno = FP_ARG_RETURN;
23815
23816 else
23817 {
23818 regno = GP_ARG_RETURN;
23819
23820 /* 32-bit is OK since it'll go in r3/r4. */
23821 if (TARGET_32BIT && inner_bytes >= 4)
23822 return gen_rtx_REG (mode, regno);
23823 }
23824
23825 if (inner_bytes >= 8)
23826 return gen_rtx_REG (mode, regno);
23827
23828 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
23829 const0_rtx);
23830 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
23831 GEN_INT (inner_bytes));
23832 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
23833 }
23834
23835 /* Return an rtx describing a return value of MODE as a PARALLEL
23836 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
23837 stride REG_STRIDE. */
23838
23839 static rtx
23840 rs6000_parallel_return (machine_mode mode,
23841 int n_elts, machine_mode elt_mode,
23842 unsigned int regno, unsigned int reg_stride)
23843 {
23844 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
23845
23846 int i;
23847 for (i = 0; i < n_elts; i++)
23848 {
23849 rtx r = gen_rtx_REG (elt_mode, regno);
23850 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
23851 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
23852 regno += reg_stride;
23853 }
23854
23855 return par;
23856 }
23857
23858 /* Target hook for TARGET_FUNCTION_VALUE.
23859
23860 An integer value is in r3 and a floating-point value is in fp1,
23861 unless -msoft-float. */
23862
23863 static rtx
23864 rs6000_function_value (const_tree valtype,
23865 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
23866 bool outgoing ATTRIBUTE_UNUSED)
23867 {
23868 machine_mode mode;
23869 unsigned int regno;
23870 machine_mode elt_mode;
23871 int n_elts;
23872
23873 /* Special handling for structs in darwin64. */
23874 if (TARGET_MACHO
23875 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
23876 {
23877 CUMULATIVE_ARGS valcum;
23878 rtx valret;
23879
23880 valcum.words = 0;
23881 valcum.fregno = FP_ARG_MIN_REG;
23882 valcum.vregno = ALTIVEC_ARG_MIN_REG;
23883 /* Do a trial code generation as if this were going to be passed as
23884 an argument; if any part goes in memory, we return NULL. */
23885 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
23886 if (valret)
23887 return valret;
23888 /* Otherwise fall through to standard ABI rules. */
23889 }
23890
23891 mode = TYPE_MODE (valtype);
23892
23893 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
23894 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
23895 {
23896 int first_reg, n_regs;
23897
23898 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode))
23899 {
23900 /* _Decimal128 must use even/odd register pairs. */
23901 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
23902 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
23903 }
23904 else
23905 {
23906 first_reg = ALTIVEC_ARG_RETURN;
23907 n_regs = 1;
23908 }
23909
23910 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
23911 }
23912
23913 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
23914 if (TARGET_32BIT && TARGET_POWERPC64)
23915 switch (mode)
23916 {
23917 default:
23918 break;
23919 case E_DImode:
23920 case E_SCmode:
23921 case E_DCmode:
23922 case E_TCmode:
23923 int count = GET_MODE_SIZE (mode) / 4;
23924 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
23925 }
23926
23927 if ((INTEGRAL_TYPE_P (valtype)
23928 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
23929 || POINTER_TYPE_P (valtype))
23930 mode = TARGET_32BIT ? SImode : DImode;
23931
23932 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
23933 /* _Decimal128 must use an even/odd register pair. */
23934 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
23935 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT
23936 && !FLOAT128_VECTOR_P (mode))
23937 regno = FP_ARG_RETURN;
23938 else if (TREE_CODE (valtype) == COMPLEX_TYPE
23939 && targetm.calls.split_complex_arg)
23940 return rs6000_complex_function_value (mode);
23941 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
23942 return register is used in both cases, and we won't see V2DImode/V2DFmode
23943 for pure altivec, combine the two cases. */
23944 else if ((TREE_CODE (valtype) == VECTOR_TYPE || VECTOR_ALIGNMENT_P (mode))
23945 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
23946 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
23947 regno = ALTIVEC_ARG_RETURN;
23948 else
23949 regno = GP_ARG_RETURN;
23950
23951 return gen_rtx_REG (mode, regno);
23952 }
23953
23954 /* Define how to find the value returned by a library function
23955 assuming the value has mode MODE. */
23956 rtx
23957 rs6000_libcall_value (machine_mode mode)
23958 {
23959 unsigned int regno;
23960
23961 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
23962 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
23963 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
23964
23965 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
23966 /* _Decimal128 must use an even/odd register pair. */
23967 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
23968 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && TARGET_HARD_FLOAT)
23969 regno = FP_ARG_RETURN;
23970 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
23971 return register is used in both cases, and we won't see V2DImode/V2DFmode
23972 for pure altivec, combine the two cases. */
23973 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
23974 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
23975 regno = ALTIVEC_ARG_RETURN;
23976 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
23977 return rs6000_complex_function_value (mode);
23978 else
23979 regno = GP_ARG_RETURN;
23980
23981 return gen_rtx_REG (mode, regno);
23982 }
23983
23984 /* Compute register pressure classes. We implement the target hook to avoid
23985 IRA picking something like GEN_OR_FLOAT_REGS as a pressure class, which can
23986 lead to incorrect estimates of number of available registers and therefor
23987 increased register pressure/spill. */
23988 static int
23989 rs6000_compute_pressure_classes (enum reg_class *pressure_classes)
23990 {
23991 int n;
23992
23993 n = 0;
23994 pressure_classes[n++] = GENERAL_REGS;
23995 if (TARGET_ALTIVEC)
23996 pressure_classes[n++] = ALTIVEC_REGS;
23997 if (TARGET_VSX)
23998 pressure_classes[n++] = VSX_REGS;
23999 else
24000 {
24001 if (TARGET_HARD_FLOAT)
24002 pressure_classes[n++] = FLOAT_REGS;
24003 }
24004 if (TARGET_DENSE_MATH)
24005 pressure_classes[n++] = DM_REGS;
24006 pressure_classes[n++] = CR_REGS;
24007 pressure_classes[n++] = SPECIAL_REGS;
24008
24009 return n;
24010 }
24011
24012 /* Given FROM and TO register numbers, say whether this elimination is allowed.
24013 Frame pointer elimination is automatically handled.
24014
24015 For the RS/6000, if frame pointer elimination is being done, we would like
24016 to convert ap into fp, not sp.
24017
24018 We need r30 if -mminimal-toc was specified, and there are constant pool
24019 references. */
24020
24021 static bool
24022 rs6000_can_eliminate (const int from, const int to)
24023 {
24024 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
24025 ? ! frame_pointer_needed
24026 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
24027 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC_OR_PCREL
24028 || constant_pool_empty_p ()
24029 : true);
24030 }
24031
24032 /* Define the offset between two registers, FROM to be eliminated and its
24033 replacement TO, at the start of a routine. */
24034 HOST_WIDE_INT
24035 rs6000_initial_elimination_offset (int from, int to)
24036 {
24037 rs6000_stack_t *info = rs6000_stack_info ();
24038 HOST_WIDE_INT offset;
24039
24040 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
24041 offset = info->push_p ? 0 : -info->total_size;
24042 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
24043 {
24044 offset = info->push_p ? 0 : -info->total_size;
24045 if (FRAME_GROWS_DOWNWARD)
24046 offset += info->fixed_size + info->vars_size + info->parm_size;
24047 }
24048 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
24049 offset = FRAME_GROWS_DOWNWARD
24050 ? info->fixed_size + info->vars_size + info->parm_size
24051 : 0;
24052 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
24053 offset = info->total_size;
24054 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
24055 offset = info->push_p ? info->total_size : 0;
24056 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
24057 offset = 0;
24058 else
24059 gcc_unreachable ();
24060
24061 return offset;
24062 }
24063
24064 /* Fill in sizes of registers used by unwinder. */
24065
24066 static void
24067 rs6000_init_dwarf_reg_sizes_extra (tree address)
24068 {
24069 if (TARGET_MACHO && ! TARGET_ALTIVEC)
24070 {
24071 int i;
24072 machine_mode mode = TYPE_MODE (char_type_node);
24073 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
24074 rtx mem = gen_rtx_MEM (BLKmode, addr);
24075 rtx value = gen_int_mode (16, mode);
24076
24077 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
24078 The unwinder still needs to know the size of Altivec registers. */
24079
24080 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
24081 {
24082 int column = DWARF_REG_TO_UNWIND_COLUMN
24083 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
24084 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
24085
24086 emit_move_insn (adjust_address (mem, mode, offset), value);
24087 }
24088 }
24089 }
24090
24091 /* Map internal gcc register numbers to debug format register numbers.
24092 FORMAT specifies the type of debug register number to use:
24093 0 -- debug information, except for frame-related sections
24094 1 -- DWARF .debug_frame section
24095 2 -- DWARF .eh_frame section */
24096
24097 unsigned int
24098 rs6000_debugger_regno (unsigned int regno, unsigned int format)
24099 {
24100 /* On some platforms, we use the standard DWARF register
24101 numbering for .debug_info and .debug_frame. */
24102 if ((format == 0 && dwarf_debuginfo_p ()) || format == 1)
24103 {
24104 #ifdef RS6000_USE_DWARF_NUMBERING
24105 if (regno <= 31)
24106 return regno;
24107 if (FP_REGNO_P (regno))
24108 return regno - FIRST_FPR_REGNO + 32;
24109 if (ALTIVEC_REGNO_P (regno))
24110 return regno - FIRST_ALTIVEC_REGNO + 1124;
24111 if (regno == LR_REGNO)
24112 return 108;
24113 if (regno == CTR_REGNO)
24114 return 109;
24115 if (regno == CA_REGNO)
24116 return 101; /* XER */
24117 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
24118 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
24119 The actual code emitted saves the whole of CR, so we map CR2_REGNO
24120 to the DWARF reg for CR. */
24121 if (format == 1 && regno == CR2_REGNO)
24122 return 64;
24123 if (CR_REGNO_P (regno))
24124 return regno - CR0_REGNO + 86;
24125 if (regno == VRSAVE_REGNO)
24126 return 356;
24127 if (regno == VSCR_REGNO)
24128 return 67;
24129
24130 /* These do not make much sense. */
24131 if (regno == FRAME_POINTER_REGNUM)
24132 return 111;
24133 if (regno == ARG_POINTER_REGNUM)
24134 return 67;
24135 if (regno == 64)
24136 return 100;
24137
24138 gcc_unreachable ();
24139 #endif
24140 }
24141
24142 /* We use the GCC 7 (and before) internal number for non-DWARF debug
24143 information, and also for .eh_frame. */
24144 /* Translate the regnos to their numbers in GCC 7 (and before). */
24145 if (regno <= 31)
24146 return regno;
24147 if (FP_REGNO_P (regno))
24148 return regno - FIRST_FPR_REGNO + 32;
24149 if (ALTIVEC_REGNO_P (regno))
24150 return regno - FIRST_ALTIVEC_REGNO + 77;
24151 if (regno == LR_REGNO)
24152 return 65;
24153 if (regno == CTR_REGNO)
24154 return 66;
24155 if (regno == CA_REGNO)
24156 return 76; /* XER */
24157 if (CR_REGNO_P (regno))
24158 return regno - CR0_REGNO + 68;
24159 if (regno == VRSAVE_REGNO)
24160 return 109;
24161 if (regno == VSCR_REGNO)
24162 return 110;
24163
24164 if (regno == FRAME_POINTER_REGNUM)
24165 return 111;
24166 if (regno == ARG_POINTER_REGNUM)
24167 return 67;
24168 if (regno == 64)
24169 return 64;
24170 /* XXX: This is a guess. The GCC register number for FIRST_DMR_REGNO is 111,
24171 but the frame pointer regnum uses that. */
24172 if (DMR_REGNO_P (regno))
24173 return regno - FIRST_DMR_REGNO + 112;
24174
24175 gcc_unreachable ();
24176 }
24177
24178 /* target hook eh_return_filter_mode */
24179 static scalar_int_mode
24180 rs6000_eh_return_filter_mode (void)
24181 {
24182 return TARGET_32BIT ? SImode : word_mode;
24183 }
24184
24185 /* Target hook for translate_mode_attribute. */
24186 static machine_mode
24187 rs6000_translate_mode_attribute (machine_mode mode)
24188 {
24189 if ((FLOAT128_IEEE_P (mode)
24190 && ieee128_float_type_node == long_double_type_node)
24191 || (FLOAT128_IBM_P (mode)
24192 && ibm128_float_type_node == long_double_type_node))
24193 return COMPLEX_MODE_P (mode) ? E_TCmode : E_TFmode;
24194 return mode;
24195 }
24196
24197 /* Target hook for scalar_mode_supported_p. */
24198 static bool
24199 rs6000_scalar_mode_supported_p (scalar_mode mode)
24200 {
24201 /* -m32 does not support TImode. This is the default, from
24202 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
24203 same ABI as for -m32. But default_scalar_mode_supported_p allows
24204 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
24205 for -mpowerpc64. */
24206 if (TARGET_32BIT && mode == TImode)
24207 return false;
24208
24209 if (DECIMAL_FLOAT_MODE_P (mode))
24210 return default_decimal_float_supported_p ();
24211 else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode))
24212 return true;
24213 else
24214 return default_scalar_mode_supported_p (mode);
24215 }
24216
24217 /* Target hook for libgcc_floating_mode_supported_p. */
24218
24219 static bool
24220 rs6000_libgcc_floating_mode_supported_p (scalar_float_mode mode)
24221 {
24222 switch (mode)
24223 {
24224 case E_SFmode:
24225 case E_DFmode:
24226 case E_TFmode:
24227 return true;
24228
24229 /* We only return true for KFmode if IEEE 128-bit types are supported, and
24230 if long double does not use the IEEE 128-bit format. If long double
24231 uses the IEEE 128-bit format, it will use TFmode and not KFmode.
24232 Because the code will not use KFmode in that case, there will be aborts
24233 because it can't find KFmode in the Floatn types. */
24234 case E_KFmode:
24235 return TARGET_FLOAT128_TYPE && !TARGET_IEEEQUAD;
24236
24237 default:
24238 return false;
24239 }
24240 }
24241
24242 /* Target hook for vector_mode_supported_p. */
24243 static bool
24244 rs6000_vector_mode_supported_p (machine_mode mode)
24245 {
24246 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
24247 128-bit, the compiler might try to widen IEEE 128-bit to IBM
24248 double-double. */
24249 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode))
24250 return true;
24251
24252 else
24253 return false;
24254 }
24255
24256 /* Target hook for floatn_mode. */
24257 static opt_scalar_float_mode
24258 rs6000_floatn_mode (int n, bool extended)
24259 {
24260 if (extended)
24261 {
24262 switch (n)
24263 {
24264 case 32:
24265 return DFmode;
24266
24267 case 64:
24268 if (TARGET_FLOAT128_TYPE)
24269 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
24270 else
24271 return opt_scalar_float_mode ();
24272
24273 case 128:
24274 return opt_scalar_float_mode ();
24275
24276 default:
24277 /* Those are the only valid _FloatNx types. */
24278 gcc_unreachable ();
24279 }
24280 }
24281 else
24282 {
24283 switch (n)
24284 {
24285 case 32:
24286 return SFmode;
24287
24288 case 64:
24289 return DFmode;
24290
24291 case 128:
24292 if (TARGET_FLOAT128_TYPE)
24293 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
24294 else
24295 return opt_scalar_float_mode ();
24296
24297 default:
24298 return opt_scalar_float_mode ();
24299 }
24300 }
24301
24302 }
24303
24304 /* Target hook for c_mode_for_suffix. */
24305 static machine_mode
24306 rs6000_c_mode_for_suffix (char suffix)
24307 {
24308 if (TARGET_FLOAT128_TYPE)
24309 {
24310 if (suffix == 'q' || suffix == 'Q')
24311 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
24312
24313 /* At the moment, we are not defining a suffix for IBM extended double.
24314 If/when the default for -mabi=ieeelongdouble is changed, and we want
24315 to support __ibm128 constants in legacy library code, we may need to
24316 re-evalaute this decision. Currently, c-lex.cc only supports 'w' and
24317 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
24318 __float80 constants. */
24319 }
24320
24321 return VOIDmode;
24322 }
24323
24324 /* Target hook for invalid_arg_for_unprototyped_fn. */
24325 static const char *
24326 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
24327 {
24328 return (!rs6000_darwin64_abi
24329 && typelist == 0
24330 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
24331 && (funcdecl == NULL_TREE
24332 || (TREE_CODE (funcdecl) == FUNCTION_DECL
24333 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
24334 ? N_("AltiVec argument passed to unprototyped function")
24335 : NULL;
24336 }
24337
24338 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
24339 setup by using __stack_chk_fail_local hidden function instead of
24340 calling __stack_chk_fail directly. Otherwise it is better to call
24341 __stack_chk_fail directly. */
24342
24343 static tree ATTRIBUTE_UNUSED
24344 rs6000_stack_protect_fail (void)
24345 {
24346 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
24347 ? default_hidden_stack_protect_fail ()
24348 : default_external_stack_protect_fail ();
24349 }
24350
24351 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
24352
24353 #if TARGET_ELF
24354 static unsigned HOST_WIDE_INT
24355 rs6000_asan_shadow_offset (void)
24356 {
24357 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
24358 }
24359 #endif
24360 \f
24361 /* Mask options that we want to support inside of attribute((target)) and
24362 #pragma GCC target operations. Note, we do not include things like
24363 64/32-bit, endianness, hard/soft floating point, etc. that would have
24364 different calling sequences. */
24365
24366 struct rs6000_opt_mask {
24367 const char *name; /* option name */
24368 HOST_WIDE_INT mask; /* mask to set */
24369 bool invert; /* invert sense of mask */
24370 bool valid_target; /* option is a target option */
24371 };
24372
24373 static struct rs6000_opt_mask const rs6000_opt_masks[] =
24374 {
24375 { "altivec", OPTION_MASK_ALTIVEC, false, true },
24376 { "block-ops-unaligned-vsx", OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX,
24377 false, true },
24378 { "block-ops-vector-pair", OPTION_MASK_BLOCK_OPS_VECTOR_PAIR,
24379 false, true },
24380 { "cmpb", OPTION_MASK_CMPB, false, true },
24381 { "crypto", OPTION_MASK_CRYPTO, false, true },
24382 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
24383 { "dlmzb", OPTION_MASK_DLMZB, false, true },
24384 { "dense-math", OPTION_MASK_DENSE_MATH, false, true },
24385 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
24386 false, true },
24387 { "float128", OPTION_MASK_FLOAT128_KEYWORD, false, true },
24388 { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, true },
24389 { "fprnd", OPTION_MASK_FPRND, false, true },
24390 { "power10", OPTION_MASK_POWER10, false, true },
24391 { "future", OPTION_MASK_FUTURE, false, true },
24392 { "hard-dfp", OPTION_MASK_DFP, false, true },
24393 { "htm", OPTION_MASK_HTM, false, true },
24394 { "isel", OPTION_MASK_ISEL, false, true },
24395 { "mfcrf", OPTION_MASK_MFCRF, false, true },
24396 { "mfpgpr", 0, false, true },
24397 { "mma", OPTION_MASK_MMA, false, true },
24398 { "modulo", OPTION_MASK_MODULO, false, true },
24399 { "mulhw", OPTION_MASK_MULHW, false, true },
24400 { "multiple", OPTION_MASK_MULTIPLE, false, true },
24401 { "pcrel", OPTION_MASK_PCREL, false, true },
24402 { "pcrel-opt", OPTION_MASK_PCREL_OPT, false, true },
24403 { "popcntb", OPTION_MASK_POPCNTB, false, true },
24404 { "popcntd", OPTION_MASK_POPCNTD, false, true },
24405 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
24406 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
24407 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
24408 { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true },
24409 { "power9-misc", OPTION_MASK_P9_MISC, false, true },
24410 { "power9-vector", OPTION_MASK_P9_VECTOR, false, true },
24411 { "power10-fusion", OPTION_MASK_P10_FUSION, false, true },
24412 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
24413 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
24414 { "prefixed", OPTION_MASK_PREFIXED, false, true },
24415 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
24416 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
24417 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
24418 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
24419 { "string", 0, false, true },
24420 { "update", OPTION_MASK_NO_UPDATE, true , true },
24421 { "vsx", OPTION_MASK_VSX, false, true },
24422 #ifdef OPTION_MASK_64BIT
24423 #if TARGET_AIX_OS
24424 { "aix64", OPTION_MASK_64BIT, false, false },
24425 { "aix32", OPTION_MASK_64BIT, true, false },
24426 #else
24427 { "64", OPTION_MASK_64BIT, false, false },
24428 { "32", OPTION_MASK_64BIT, true, false },
24429 #endif
24430 #endif
24431 #ifdef OPTION_MASK_EABI
24432 { "eabi", OPTION_MASK_EABI, false, false },
24433 #endif
24434 #ifdef OPTION_MASK_LITTLE_ENDIAN
24435 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
24436 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
24437 #endif
24438 #ifdef OPTION_MASK_RELOCATABLE
24439 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
24440 #endif
24441 #ifdef OPTION_MASK_STRICT_ALIGN
24442 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
24443 #endif
24444 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
24445 { "string", 0, false, false },
24446 };
24447
24448 /* Option variables that we want to support inside attribute((target)) and
24449 #pragma GCC target operations. */
24450
24451 struct rs6000_opt_var {
24452 const char *name; /* option name */
24453 size_t global_offset; /* offset of the option in global_options. */
24454 size_t target_offset; /* offset of the option in target options. */
24455 };
24456
24457 static struct rs6000_opt_var const rs6000_opt_vars[] =
24458 {
24459 { "friz",
24460 offsetof (struct gcc_options, x_TARGET_FRIZ),
24461 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
24462 { "avoid-indexed-addresses",
24463 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
24464 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
24465 { "longcall",
24466 offsetof (struct gcc_options, x_rs6000_default_long_calls),
24467 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
24468 { "optimize-swaps",
24469 offsetof (struct gcc_options, x_rs6000_optimize_swaps),
24470 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
24471 { "allow-movmisalign",
24472 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
24473 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
24474 { "sched-groups",
24475 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
24476 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
24477 { "always-hint",
24478 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
24479 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
24480 { "align-branch-targets",
24481 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
24482 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
24483 { "sched-prolog",
24484 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
24485 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
24486 { "sched-epilog",
24487 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
24488 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
24489 { "speculate-indirect-jumps",
24490 offsetof (struct gcc_options, x_rs6000_speculate_indirect_jumps),
24491 offsetof (struct cl_target_option, x_rs6000_speculate_indirect_jumps), },
24492 };
24493
24494 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
24495 parsing. Return true if there were no errors. */
24496
24497 static bool
24498 rs6000_inner_target_options (tree args, bool attr_p)
24499 {
24500 bool ret = true;
24501
24502 if (args == NULL_TREE)
24503 ;
24504
24505 else if (TREE_CODE (args) == STRING_CST)
24506 {
24507 char *p = ASTRDUP (TREE_STRING_POINTER (args));
24508 char *q;
24509
24510 while ((q = strtok (p, ",")) != NULL)
24511 {
24512 bool error_p = false;
24513 bool not_valid_p = false;
24514 const char *cpu_opt = NULL;
24515
24516 p = NULL;
24517 if (startswith (q, "cpu="))
24518 {
24519 int cpu_index = rs6000_cpu_name_lookup (q+4);
24520 if (cpu_index >= 0)
24521 rs6000_cpu_index = cpu_index;
24522 else
24523 {
24524 error_p = true;
24525 cpu_opt = q+4;
24526 }
24527 }
24528 else if (startswith (q, "tune="))
24529 {
24530 int tune_index = rs6000_cpu_name_lookup (q+5);
24531 if (tune_index >= 0)
24532 rs6000_tune_index = tune_index;
24533 else
24534 {
24535 error_p = true;
24536 cpu_opt = q+5;
24537 }
24538 }
24539 else
24540 {
24541 size_t i;
24542 bool invert = false;
24543 char *r = q;
24544
24545 error_p = true;
24546 if (startswith (r, "no-"))
24547 {
24548 invert = true;
24549 r += 3;
24550 }
24551
24552 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
24553 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
24554 {
24555 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
24556
24557 if (!rs6000_opt_masks[i].valid_target)
24558 not_valid_p = true;
24559 else
24560 {
24561 error_p = false;
24562 rs6000_isa_flags_explicit |= mask;
24563
24564 /* VSX needs altivec, so -mvsx automagically sets
24565 altivec and disables -mavoid-indexed-addresses. */
24566 if (!invert)
24567 {
24568 if (mask == OPTION_MASK_VSX)
24569 {
24570 mask |= OPTION_MASK_ALTIVEC;
24571 TARGET_AVOID_XFORM = 0;
24572 }
24573 }
24574
24575 if (rs6000_opt_masks[i].invert)
24576 invert = !invert;
24577
24578 if (invert)
24579 rs6000_isa_flags &= ~mask;
24580 else
24581 rs6000_isa_flags |= mask;
24582 }
24583 break;
24584 }
24585
24586 if (error_p && !not_valid_p)
24587 {
24588 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
24589 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
24590 {
24591 size_t j = rs6000_opt_vars[i].global_offset;
24592 *((int *) ((char *)&global_options + j)) = !invert;
24593 error_p = false;
24594 not_valid_p = false;
24595 break;
24596 }
24597 }
24598 }
24599
24600 if (error_p)
24601 {
24602 const char *eprefix, *esuffix;
24603
24604 ret = false;
24605 if (attr_p)
24606 {
24607 eprefix = "__attribute__((__target__(";
24608 esuffix = ")))";
24609 }
24610 else
24611 {
24612 eprefix = "#pragma GCC target ";
24613 esuffix = "";
24614 }
24615
24616 if (cpu_opt)
24617 error ("invalid cpu %qs for %s%qs%s", cpu_opt, eprefix,
24618 q, esuffix);
24619 else if (not_valid_p)
24620 error ("%s%qs%s is not allowed", eprefix, q, esuffix);
24621 else
24622 error ("%s%qs%s is invalid", eprefix, q, esuffix);
24623 }
24624 }
24625 }
24626
24627 else if (TREE_CODE (args) == TREE_LIST)
24628 {
24629 do
24630 {
24631 tree value = TREE_VALUE (args);
24632 if (value)
24633 {
24634 bool ret2 = rs6000_inner_target_options (value, attr_p);
24635 if (!ret2)
24636 ret = false;
24637 }
24638 args = TREE_CHAIN (args);
24639 }
24640 while (args != NULL_TREE);
24641 }
24642
24643 else
24644 {
24645 error ("attribute %<target%> argument not a string");
24646 return false;
24647 }
24648
24649 return ret;
24650 }
24651
24652 /* Print out the target options as a list for -mdebug=target. */
24653
24654 static void
24655 rs6000_debug_target_options (tree args, const char *prefix)
24656 {
24657 if (args == NULL_TREE)
24658 fprintf (stderr, "%s<NULL>", prefix);
24659
24660 else if (TREE_CODE (args) == STRING_CST)
24661 {
24662 char *p = ASTRDUP (TREE_STRING_POINTER (args));
24663 char *q;
24664
24665 while ((q = strtok (p, ",")) != NULL)
24666 {
24667 p = NULL;
24668 fprintf (stderr, "%s\"%s\"", prefix, q);
24669 prefix = ", ";
24670 }
24671 }
24672
24673 else if (TREE_CODE (args) == TREE_LIST)
24674 {
24675 do
24676 {
24677 tree value = TREE_VALUE (args);
24678 if (value)
24679 {
24680 rs6000_debug_target_options (value, prefix);
24681 prefix = ", ";
24682 }
24683 args = TREE_CHAIN (args);
24684 }
24685 while (args != NULL_TREE);
24686 }
24687
24688 else
24689 gcc_unreachable ();
24690
24691 return;
24692 }
24693
24694 \f
24695 /* Hook to validate attribute((target("..."))). */
24696
24697 static bool
24698 rs6000_valid_attribute_p (tree fndecl,
24699 tree ARG_UNUSED (name),
24700 tree args,
24701 int flags)
24702 {
24703 struct cl_target_option cur_target;
24704 bool ret;
24705 tree old_optimize;
24706 tree new_target, new_optimize;
24707 tree func_optimize;
24708
24709 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
24710
24711 if (TARGET_DEBUG_TARGET)
24712 {
24713 tree tname = DECL_NAME (fndecl);
24714 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
24715 if (tname)
24716 fprintf (stderr, "function: %.*s\n",
24717 (int) IDENTIFIER_LENGTH (tname),
24718 IDENTIFIER_POINTER (tname));
24719 else
24720 fprintf (stderr, "function: unknown\n");
24721
24722 fprintf (stderr, "args:");
24723 rs6000_debug_target_options (args, " ");
24724 fprintf (stderr, "\n");
24725
24726 if (flags)
24727 fprintf (stderr, "flags: 0x%x\n", flags);
24728
24729 fprintf (stderr, "--------------------\n");
24730 }
24731
24732 /* attribute((target("default"))) does nothing, beyond
24733 affecting multi-versioning. */
24734 if (TREE_VALUE (args)
24735 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
24736 && TREE_CHAIN (args) == NULL_TREE
24737 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
24738 return true;
24739
24740 old_optimize = build_optimization_node (&global_options,
24741 &global_options_set);
24742 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
24743
24744 /* If the function changed the optimization levels as well as setting target
24745 options, start with the optimizations specified. */
24746 if (func_optimize && func_optimize != old_optimize)
24747 cl_optimization_restore (&global_options, &global_options_set,
24748 TREE_OPTIMIZATION (func_optimize));
24749
24750 /* The target attributes may also change some optimization flags, so update
24751 the optimization options if necessary. */
24752 cl_target_option_save (&cur_target, &global_options, &global_options_set);
24753 rs6000_cpu_index = rs6000_tune_index = -1;
24754 ret = rs6000_inner_target_options (args, true);
24755
24756 /* Set up any additional state. */
24757 if (ret)
24758 {
24759 ret = rs6000_option_override_internal (false);
24760 new_target = build_target_option_node (&global_options,
24761 &global_options_set);
24762 }
24763 else
24764 new_target = NULL;
24765
24766 new_optimize = build_optimization_node (&global_options,
24767 &global_options_set);
24768
24769 if (!new_target)
24770 ret = false;
24771
24772 else if (fndecl)
24773 {
24774 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
24775
24776 if (old_optimize != new_optimize)
24777 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
24778 }
24779
24780 cl_target_option_restore (&global_options, &global_options_set, &cur_target);
24781
24782 if (old_optimize != new_optimize)
24783 cl_optimization_restore (&global_options, &global_options_set,
24784 TREE_OPTIMIZATION (old_optimize));
24785
24786 return ret;
24787 }
24788
24789 \f
24790 /* Hook to validate the current #pragma GCC target and set the state, and
24791 update the macros based on what was changed. If ARGS is NULL, then
24792 POP_TARGET is used to reset the options. */
24793
24794 bool
24795 rs6000_pragma_target_parse (tree args, tree pop_target)
24796 {
24797 tree prev_tree = build_target_option_node (&global_options,
24798 &global_options_set);
24799 tree cur_tree;
24800 struct cl_target_option *prev_opt, *cur_opt;
24801 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
24802
24803 if (TARGET_DEBUG_TARGET)
24804 {
24805 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
24806 fprintf (stderr, "args:");
24807 rs6000_debug_target_options (args, " ");
24808 fprintf (stderr, "\n");
24809
24810 if (pop_target)
24811 {
24812 fprintf (stderr, "pop_target:\n");
24813 debug_tree (pop_target);
24814 }
24815 else
24816 fprintf (stderr, "pop_target: <NULL>\n");
24817
24818 fprintf (stderr, "--------------------\n");
24819 }
24820
24821 if (! args)
24822 {
24823 cur_tree = ((pop_target)
24824 ? pop_target
24825 : target_option_default_node);
24826 cl_target_option_restore (&global_options, &global_options_set,
24827 TREE_TARGET_OPTION (cur_tree));
24828 }
24829 else
24830 {
24831 rs6000_cpu_index = rs6000_tune_index = -1;
24832 if (!rs6000_inner_target_options (args, false)
24833 || !rs6000_option_override_internal (false)
24834 || (cur_tree = build_target_option_node (&global_options,
24835 &global_options_set))
24836 == NULL_TREE)
24837 {
24838 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
24839 fprintf (stderr, "invalid pragma\n");
24840
24841 return false;
24842 }
24843 }
24844
24845 target_option_current_node = cur_tree;
24846 rs6000_activate_target_options (target_option_current_node);
24847
24848 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
24849 change the macros that are defined. */
24850 if (rs6000_target_modify_macros_ptr)
24851 {
24852 prev_opt = TREE_TARGET_OPTION (prev_tree);
24853 prev_flags = prev_opt->x_rs6000_isa_flags;
24854
24855 cur_opt = TREE_TARGET_OPTION (cur_tree);
24856 cur_flags = cur_opt->x_rs6000_isa_flags;
24857
24858 diff_flags = (prev_flags ^ cur_flags);
24859
24860 if (diff_flags != 0)
24861 {
24862 /* Delete old macros. */
24863 rs6000_target_modify_macros_ptr (false,
24864 prev_flags & diff_flags);
24865
24866 /* Define new macros. */
24867 rs6000_target_modify_macros_ptr (true,
24868 cur_flags & diff_flags);
24869 }
24870 }
24871
24872 return true;
24873 }
24874
24875 \f
24876 /* Remember the last target of rs6000_set_current_function. */
24877 static GTY(()) tree rs6000_previous_fndecl;
24878
24879 /* Restore target's globals from NEW_TREE and invalidate the
24880 rs6000_previous_fndecl cache. */
24881
24882 void
24883 rs6000_activate_target_options (tree new_tree)
24884 {
24885 cl_target_option_restore (&global_options, &global_options_set,
24886 TREE_TARGET_OPTION (new_tree));
24887 if (TREE_TARGET_GLOBALS (new_tree))
24888 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
24889 else if (new_tree == target_option_default_node)
24890 restore_target_globals (&default_target_globals);
24891 else
24892 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
24893 rs6000_previous_fndecl = NULL_TREE;
24894 }
24895
24896 /* Establish appropriate back-end context for processing the function
24897 FNDECL. The argument might be NULL to indicate processing at top
24898 level, outside of any function scope. */
24899 static void
24900 rs6000_set_current_function (tree fndecl)
24901 {
24902 if (TARGET_DEBUG_TARGET)
24903 {
24904 fprintf (stderr, "\n==================== rs6000_set_current_function");
24905
24906 if (fndecl)
24907 fprintf (stderr, ", fndecl %s (%p)",
24908 (DECL_NAME (fndecl)
24909 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
24910 : "<unknown>"), (void *)fndecl);
24911
24912 if (rs6000_previous_fndecl)
24913 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
24914
24915 fprintf (stderr, "\n");
24916 }
24917
24918 /* Only change the context if the function changes. This hook is called
24919 several times in the course of compiling a function, and we don't want to
24920 slow things down too much or call target_reinit when it isn't safe. */
24921 if (fndecl == rs6000_previous_fndecl)
24922 return;
24923
24924 tree old_tree;
24925 if (rs6000_previous_fndecl == NULL_TREE)
24926 old_tree = target_option_current_node;
24927 else if (DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl))
24928 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl);
24929 else
24930 old_tree = target_option_default_node;
24931
24932 tree new_tree;
24933 if (fndecl == NULL_TREE)
24934 {
24935 if (old_tree != target_option_current_node)
24936 new_tree = target_option_current_node;
24937 else
24938 new_tree = NULL_TREE;
24939 }
24940 else
24941 {
24942 new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
24943 if (new_tree == NULL_TREE)
24944 new_tree = target_option_default_node;
24945 }
24946
24947 if (TARGET_DEBUG_TARGET)
24948 {
24949 if (new_tree)
24950 {
24951 fprintf (stderr, "\nnew fndecl target specific options:\n");
24952 debug_tree (new_tree);
24953 }
24954
24955 if (old_tree)
24956 {
24957 fprintf (stderr, "\nold fndecl target specific options:\n");
24958 debug_tree (old_tree);
24959 }
24960
24961 if (old_tree != NULL_TREE || new_tree != NULL_TREE)
24962 fprintf (stderr, "--------------------\n");
24963 }
24964
24965 if (new_tree && old_tree != new_tree)
24966 rs6000_activate_target_options (new_tree);
24967
24968 if (fndecl)
24969 rs6000_previous_fndecl = fndecl;
24970 }
24971
24972 \f
24973 /* Save the current options */
24974
24975 static void
24976 rs6000_function_specific_save (struct cl_target_option *ptr,
24977 struct gcc_options *opts,
24978 struct gcc_options */* opts_set */)
24979 {
24980 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
24981 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
24982 }
24983
24984 /* Restore the current options */
24985
24986 static void
24987 rs6000_function_specific_restore (struct gcc_options *opts,
24988 struct gcc_options */* opts_set */,
24989 struct cl_target_option *ptr)
24990
24991 {
24992 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
24993 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
24994 (void) rs6000_option_override_internal (false);
24995 }
24996
24997 /* Print the current options */
24998
24999 static void
25000 rs6000_function_specific_print (FILE *file, int indent,
25001 struct cl_target_option *ptr)
25002 {
25003 rs6000_print_isa_options (file, indent, "Isa options set",
25004 ptr->x_rs6000_isa_flags);
25005
25006 rs6000_print_isa_options (file, indent, "Isa options explicit",
25007 ptr->x_rs6000_isa_flags_explicit);
25008 }
25009
25010 /* Helper function to print the current isa or misc options on a line. */
25011
25012 static void
25013 rs6000_print_options_internal (FILE *file,
25014 int indent,
25015 const char *string,
25016 HOST_WIDE_INT flags,
25017 const char *prefix,
25018 const struct rs6000_opt_mask *opts,
25019 size_t num_elements)
25020 {
25021 size_t i;
25022 size_t start_column = 0;
25023 size_t cur_column;
25024 size_t max_column = 120;
25025 size_t prefix_len = strlen (prefix);
25026 size_t comma_len = 0;
25027 const char *comma = "";
25028
25029 if (indent)
25030 start_column += fprintf (file, "%*s", indent, "");
25031
25032 if (!flags)
25033 {
25034 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
25035 return;
25036 }
25037
25038 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
25039
25040 /* Print the various mask options. */
25041 cur_column = start_column;
25042 for (i = 0; i < num_elements; i++)
25043 {
25044 bool invert = opts[i].invert;
25045 const char *name = opts[i].name;
25046 const char *no_str = "";
25047 HOST_WIDE_INT mask = opts[i].mask;
25048 size_t len = comma_len + prefix_len + strlen (name);
25049
25050 if (!invert)
25051 {
25052 if ((flags & mask) == 0)
25053 {
25054 no_str = "no-";
25055 len += strlen ("no-");
25056 }
25057
25058 flags &= ~mask;
25059 }
25060
25061 else
25062 {
25063 if ((flags & mask) != 0)
25064 {
25065 no_str = "no-";
25066 len += strlen ("no-");
25067 }
25068
25069 flags |= mask;
25070 }
25071
25072 cur_column += len;
25073 if (cur_column > max_column)
25074 {
25075 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
25076 cur_column = start_column + len;
25077 comma = "";
25078 }
25079
25080 fprintf (file, "%s%s%s%s", comma, prefix, no_str, name);
25081 comma = ", ";
25082 comma_len = strlen (", ");
25083 }
25084
25085 fputs ("\n", file);
25086 }
25087
25088 /* Helper function to print the current isa options on a line. */
25089
25090 static void
25091 rs6000_print_isa_options (FILE *file, int indent, const char *string,
25092 HOST_WIDE_INT flags)
25093 {
25094 rs6000_print_options_internal (file, indent, string, flags, "-m",
25095 &rs6000_opt_masks[0],
25096 ARRAY_SIZE (rs6000_opt_masks));
25097 }
25098
25099 /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06,
25100 2.07, and 3.0 options that relate to the vector unit (-mdirect-move,
25101 -mupper-regs-df, etc.).
25102
25103 If the user used -mno-power8-vector, we need to turn off all of the implicit
25104 ISA 2.07 and 3.0 options that relate to the vector unit.
25105
25106 If the user used -mno-power9-vector, we need to turn off all of the implicit
25107 ISA 3.0 options that relate to the vector unit.
25108
25109 This function does not handle explicit options such as the user specifying
25110 -mdirect-move. These are handled in rs6000_option_override_internal, and
25111 the appropriate error is given if needed.
25112
25113 We return a mask of all of the implicit options that should not be enabled
25114 by default. */
25115
25116 static HOST_WIDE_INT
25117 rs6000_disable_incompatible_switches (void)
25118 {
25119 HOST_WIDE_INT ignore_masks = rs6000_isa_flags_explicit;
25120 size_t i, j;
25121
25122 static const struct {
25123 const HOST_WIDE_INT no_flag; /* flag explicitly turned off. */
25124 const HOST_WIDE_INT dep_flags; /* flags that depend on this option. */
25125 const char *const name; /* name of the switch. */
25126 } flags[] = {
25127 { OPTION_MASK_P9_VECTOR, OTHER_P9_VECTOR_MASKS, "power9-vector" },
25128 { OPTION_MASK_P8_VECTOR, OTHER_P8_VECTOR_MASKS, "power8-vector" },
25129 { OPTION_MASK_VSX, OTHER_VSX_VECTOR_MASKS, "vsx" },
25130 { OPTION_MASK_ALTIVEC, OTHER_ALTIVEC_MASKS, "altivec" },
25131 };
25132
25133 for (i = 0; i < ARRAY_SIZE (flags); i++)
25134 {
25135 HOST_WIDE_INT no_flag = flags[i].no_flag;
25136
25137 if ((rs6000_isa_flags & no_flag) == 0
25138 && (rs6000_isa_flags_explicit & no_flag) != 0)
25139 {
25140 HOST_WIDE_INT dep_flags = flags[i].dep_flags;
25141 HOST_WIDE_INT set_flags = (rs6000_isa_flags_explicit
25142 & rs6000_isa_flags
25143 & dep_flags);
25144
25145 if (set_flags)
25146 {
25147 for (j = 0; j < ARRAY_SIZE (rs6000_opt_masks); j++)
25148 if ((set_flags & rs6000_opt_masks[j].mask) != 0)
25149 {
25150 set_flags &= ~rs6000_opt_masks[j].mask;
25151 error ("%<-mno-%s%> turns off %<-m%s%>",
25152 flags[i].name,
25153 rs6000_opt_masks[j].name);
25154 }
25155
25156 gcc_assert (!set_flags);
25157 }
25158
25159 rs6000_isa_flags &= ~dep_flags;
25160 ignore_masks |= no_flag | dep_flags;
25161 }
25162 }
25163
25164 return ignore_masks;
25165 }
25166
25167 \f
25168 /* Helper function for printing the function name when debugging. */
25169
25170 static const char *
25171 get_decl_name (tree fn)
25172 {
25173 tree name;
25174
25175 if (!fn)
25176 return "<null>";
25177
25178 name = DECL_NAME (fn);
25179 if (!name)
25180 return "<no-name>";
25181
25182 return IDENTIFIER_POINTER (name);
25183 }
25184
25185 /* Return the clone id of the target we are compiling code for in a target
25186 clone. The clone id is ordered from 0 (default) to CLONE_MAX-1 and gives
25187 the priority list for the target clones (ordered from lowest to
25188 highest). */
25189
25190 static int
25191 rs6000_clone_priority (tree fndecl)
25192 {
25193 tree fn_opts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
25194 HOST_WIDE_INT isa_masks;
25195 int ret = CLONE_DEFAULT;
25196 tree attrs = lookup_attribute ("target", DECL_ATTRIBUTES (fndecl));
25197 const char *attrs_str = NULL;
25198
25199 attrs = TREE_VALUE (TREE_VALUE (attrs));
25200 attrs_str = TREE_STRING_POINTER (attrs);
25201
25202 /* Return priority zero for default function. Return the ISA needed for the
25203 function if it is not the default. */
25204 if (strcmp (attrs_str, "default") != 0)
25205 {
25206 if (fn_opts == NULL_TREE)
25207 fn_opts = target_option_default_node;
25208
25209 if (!fn_opts || !TREE_TARGET_OPTION (fn_opts))
25210 isa_masks = rs6000_isa_flags;
25211 else
25212 isa_masks = TREE_TARGET_OPTION (fn_opts)->x_rs6000_isa_flags;
25213
25214 for (ret = CLONE_MAX - 1; ret != 0; ret--)
25215 if ((rs6000_clone_map[ret].isa_mask & isa_masks) != 0)
25216 break;
25217 }
25218
25219 if (TARGET_DEBUG_TARGET)
25220 fprintf (stderr, "rs6000_get_function_version_priority (%s) => %d\n",
25221 get_decl_name (fndecl), ret);
25222
25223 return ret;
25224 }
25225
25226 /* This compares the priority of target features in function DECL1 and DECL2.
25227 It returns positive value if DECL1 is higher priority, negative value if
25228 DECL2 is higher priority and 0 if they are the same. Note, priorities are
25229 ordered from lowest (CLONE_DEFAULT) to highest (currently CLONE_ISA_3_0). */
25230
25231 static int
25232 rs6000_compare_version_priority (tree decl1, tree decl2)
25233 {
25234 int priority1 = rs6000_clone_priority (decl1);
25235 int priority2 = rs6000_clone_priority (decl2);
25236 int ret = priority1 - priority2;
25237
25238 if (TARGET_DEBUG_TARGET)
25239 fprintf (stderr, "rs6000_compare_version_priority (%s, %s) => %d\n",
25240 get_decl_name (decl1), get_decl_name (decl2), ret);
25241
25242 return ret;
25243 }
25244
25245 /* Make a dispatcher declaration for the multi-versioned function DECL.
25246 Calls to DECL function will be replaced with calls to the dispatcher
25247 by the front-end. Returns the decl of the dispatcher function. */
25248
25249 static tree
25250 rs6000_get_function_versions_dispatcher (void *decl)
25251 {
25252 tree fn = (tree) decl;
25253 struct cgraph_node *node = NULL;
25254 struct cgraph_node *default_node = NULL;
25255 struct cgraph_function_version_info *node_v = NULL;
25256 struct cgraph_function_version_info *first_v = NULL;
25257
25258 tree dispatch_decl = NULL;
25259
25260 struct cgraph_function_version_info *default_version_info = NULL;
25261 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
25262
25263 if (TARGET_DEBUG_TARGET)
25264 fprintf (stderr, "rs6000_get_function_versions_dispatcher (%s)\n",
25265 get_decl_name (fn));
25266
25267 node = cgraph_node::get (fn);
25268 gcc_assert (node != NULL);
25269
25270 node_v = node->function_version ();
25271 gcc_assert (node_v != NULL);
25272
25273 if (node_v->dispatcher_resolver != NULL)
25274 return node_v->dispatcher_resolver;
25275
25276 /* Find the default version and make it the first node. */
25277 first_v = node_v;
25278 /* Go to the beginning of the chain. */
25279 while (first_v->prev != NULL)
25280 first_v = first_v->prev;
25281
25282 default_version_info = first_v;
25283 while (default_version_info != NULL)
25284 {
25285 const tree decl2 = default_version_info->this_node->decl;
25286 if (is_function_default_version (decl2))
25287 break;
25288 default_version_info = default_version_info->next;
25289 }
25290
25291 /* If there is no default node, just return NULL. */
25292 if (default_version_info == NULL)
25293 return NULL;
25294
25295 /* Make default info the first node. */
25296 if (first_v != default_version_info)
25297 {
25298 default_version_info->prev->next = default_version_info->next;
25299 if (default_version_info->next)
25300 default_version_info->next->prev = default_version_info->prev;
25301 first_v->prev = default_version_info;
25302 default_version_info->next = first_v;
25303 default_version_info->prev = NULL;
25304 }
25305
25306 default_node = default_version_info->this_node;
25307
25308 #ifndef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
25309 error_at (DECL_SOURCE_LOCATION (default_node->decl),
25310 "%<target_clones%> attribute needs GLIBC (2.23 and newer) that "
25311 "exports hardware capability bits");
25312 #else
25313
25314 if (targetm.has_ifunc_p ())
25315 {
25316 struct cgraph_function_version_info *it_v = NULL;
25317 struct cgraph_node *dispatcher_node = NULL;
25318 struct cgraph_function_version_info *dispatcher_version_info = NULL;
25319
25320 /* Right now, the dispatching is done via ifunc. */
25321 dispatch_decl = make_dispatcher_decl (default_node->decl);
25322 TREE_NOTHROW (dispatch_decl) = TREE_NOTHROW (fn);
25323
25324 dispatcher_node = cgraph_node::get_create (dispatch_decl);
25325 gcc_assert (dispatcher_node != NULL);
25326 dispatcher_node->dispatcher_function = 1;
25327 dispatcher_version_info
25328 = dispatcher_node->insert_new_function_version ();
25329 dispatcher_version_info->next = default_version_info;
25330 dispatcher_node->definition = 1;
25331
25332 /* Set the dispatcher for all the versions. */
25333 it_v = default_version_info;
25334 while (it_v != NULL)
25335 {
25336 it_v->dispatcher_resolver = dispatch_decl;
25337 it_v = it_v->next;
25338 }
25339 }
25340 else
25341 {
25342 error_at (DECL_SOURCE_LOCATION (default_node->decl),
25343 "multiversioning needs %<ifunc%> which is not supported "
25344 "on this target");
25345 }
25346 #endif
25347
25348 return dispatch_decl;
25349 }
25350
25351 /* Make the resolver function decl to dispatch the versions of a multi-
25352 versioned function, DEFAULT_DECL. Create an empty basic block in the
25353 resolver and store the pointer in EMPTY_BB. Return the decl of the resolver
25354 function. */
25355
25356 static tree
25357 make_resolver_func (const tree default_decl,
25358 const tree dispatch_decl,
25359 basic_block *empty_bb)
25360 {
25361 /* Make the resolver function static. The resolver function returns
25362 void *. */
25363 tree decl_name = clone_function_name (default_decl, "resolver");
25364 const char *resolver_name = IDENTIFIER_POINTER (decl_name);
25365 tree type = build_function_type_list (ptr_type_node, NULL_TREE);
25366 tree decl = build_fn_decl (resolver_name, type);
25367 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
25368
25369 DECL_NAME (decl) = decl_name;
25370 TREE_USED (decl) = 1;
25371 DECL_ARTIFICIAL (decl) = 1;
25372 DECL_IGNORED_P (decl) = 0;
25373 TREE_PUBLIC (decl) = 0;
25374 DECL_UNINLINABLE (decl) = 1;
25375
25376 /* Resolver is not external, body is generated. */
25377 DECL_EXTERNAL (decl) = 0;
25378 DECL_EXTERNAL (dispatch_decl) = 0;
25379
25380 DECL_CONTEXT (decl) = NULL_TREE;
25381 DECL_INITIAL (decl) = make_node (BLOCK);
25382 DECL_STATIC_CONSTRUCTOR (decl) = 0;
25383
25384 if (DECL_COMDAT_GROUP (default_decl)
25385 || TREE_PUBLIC (default_decl))
25386 {
25387 /* In this case, each translation unit with a call to this
25388 versioned function will put out a resolver. Ensure it
25389 is comdat to keep just one copy. */
25390 DECL_COMDAT (decl) = 1;
25391 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
25392 }
25393 else
25394 TREE_PUBLIC (dispatch_decl) = 0;
25395
25396 /* Build result decl and add to function_decl. */
25397 tree t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
25398 DECL_CONTEXT (t) = decl;
25399 DECL_ARTIFICIAL (t) = 1;
25400 DECL_IGNORED_P (t) = 1;
25401 DECL_RESULT (decl) = t;
25402
25403 gimplify_function_tree (decl);
25404 push_cfun (DECL_STRUCT_FUNCTION (decl));
25405 *empty_bb = init_lowered_empty_function (decl, false,
25406 profile_count::uninitialized ());
25407
25408 cgraph_node::add_new_function (decl, true);
25409 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
25410
25411 pop_cfun ();
25412
25413 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
25414 DECL_ATTRIBUTES (dispatch_decl)
25415 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
25416
25417 cgraph_node::create_same_body_alias (dispatch_decl, decl);
25418
25419 return decl;
25420 }
25421
25422 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL to
25423 return a pointer to VERSION_DECL if we are running on a machine that
25424 supports the index CLONE_ISA hardware architecture bits. This function will
25425 be called during version dispatch to decide which function version to
25426 execute. It returns the basic block at the end, to which more conditions
25427 can be added. */
25428
25429 static basic_block
25430 add_condition_to_bb (tree function_decl, tree version_decl,
25431 int clone_isa, basic_block new_bb)
25432 {
25433 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
25434
25435 gcc_assert (new_bb != NULL);
25436 gimple_seq gseq = bb_seq (new_bb);
25437
25438
25439 tree convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
25440 build_fold_addr_expr (version_decl));
25441 tree result_var = create_tmp_var (ptr_type_node);
25442 gimple *convert_stmt = gimple_build_assign (result_var, convert_expr);
25443 gimple *return_stmt = gimple_build_return (result_var);
25444
25445 if (clone_isa == CLONE_DEFAULT)
25446 {
25447 gimple_seq_add_stmt (&gseq, convert_stmt);
25448 gimple_seq_add_stmt (&gseq, return_stmt);
25449 set_bb_seq (new_bb, gseq);
25450 gimple_set_bb (convert_stmt, new_bb);
25451 gimple_set_bb (return_stmt, new_bb);
25452 pop_cfun ();
25453 return new_bb;
25454 }
25455
25456 tree bool_zero = build_int_cst (bool_int_type_node, 0);
25457 tree cond_var = create_tmp_var (bool_int_type_node);
25458 tree predicate_decl = rs6000_builtin_decls[(int) RS6000_BIF_CPU_SUPPORTS];
25459 const char *arg_str = rs6000_clone_map[clone_isa].name;
25460 tree predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
25461 gimple *call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
25462 gimple_call_set_lhs (call_cond_stmt, cond_var);
25463
25464 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
25465 gimple_set_bb (call_cond_stmt, new_bb);
25466 gimple_seq_add_stmt (&gseq, call_cond_stmt);
25467
25468 gimple *if_else_stmt = gimple_build_cond (NE_EXPR, cond_var, bool_zero,
25469 NULL_TREE, NULL_TREE);
25470 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
25471 gimple_set_bb (if_else_stmt, new_bb);
25472 gimple_seq_add_stmt (&gseq, if_else_stmt);
25473
25474 gimple_seq_add_stmt (&gseq, convert_stmt);
25475 gimple_seq_add_stmt (&gseq, return_stmt);
25476 set_bb_seq (new_bb, gseq);
25477
25478 basic_block bb1 = new_bb;
25479 edge e12 = split_block (bb1, if_else_stmt);
25480 basic_block bb2 = e12->dest;
25481 e12->flags &= ~EDGE_FALLTHRU;
25482 e12->flags |= EDGE_TRUE_VALUE;
25483
25484 edge e23 = split_block (bb2, return_stmt);
25485 gimple_set_bb (convert_stmt, bb2);
25486 gimple_set_bb (return_stmt, bb2);
25487
25488 basic_block bb3 = e23->dest;
25489 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
25490
25491 remove_edge (e23);
25492 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
25493
25494 pop_cfun ();
25495 return bb3;
25496 }
25497
25498 /* This function generates the dispatch function for multi-versioned functions.
25499 DISPATCH_DECL is the function which will contain the dispatch logic.
25500 FNDECLS are the function choices for dispatch, and is a tree chain.
25501 EMPTY_BB is the basic block pointer in DISPATCH_DECL in which the dispatch
25502 code is generated. */
25503
25504 static int
25505 dispatch_function_versions (tree dispatch_decl,
25506 void *fndecls_p,
25507 basic_block *empty_bb)
25508 {
25509 int ix;
25510 tree ele;
25511 vec<tree> *fndecls;
25512 tree clones[CLONE_MAX];
25513
25514 if (TARGET_DEBUG_TARGET)
25515 fputs ("dispatch_function_versions, top\n", stderr);
25516
25517 gcc_assert (dispatch_decl != NULL
25518 && fndecls_p != NULL
25519 && empty_bb != NULL);
25520
25521 /* fndecls_p is actually a vector. */
25522 fndecls = static_cast<vec<tree> *> (fndecls_p);
25523
25524 /* At least one more version other than the default. */
25525 gcc_assert (fndecls->length () >= 2);
25526
25527 /* The first version in the vector is the default decl. */
25528 memset ((void *) clones, '\0', sizeof (clones));
25529 clones[CLONE_DEFAULT] = (*fndecls)[0];
25530
25531 /* On the PowerPC, we do not need to call __builtin_cpu_init, which is a NOP
25532 on the PowerPC (on the x86_64, it is not a NOP). The builtin function
25533 __builtin_cpu_support ensures that the TOC fields are setup by requiring a
25534 recent glibc. If we ever need to call __builtin_cpu_init, we would need
25535 to insert the code here to do the call. */
25536
25537 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
25538 {
25539 int priority = rs6000_clone_priority (ele);
25540 if (!clones[priority])
25541 clones[priority] = ele;
25542 }
25543
25544 for (ix = CLONE_MAX - 1; ix >= 0; ix--)
25545 if (clones[ix])
25546 {
25547 if (TARGET_DEBUG_TARGET)
25548 fprintf (stderr, "dispatch_function_versions, clone %d, %s\n",
25549 ix, get_decl_name (clones[ix]));
25550
25551 *empty_bb = add_condition_to_bb (dispatch_decl, clones[ix], ix,
25552 *empty_bb);
25553 }
25554
25555 return 0;
25556 }
25557
25558 /* Generate the dispatching code body to dispatch multi-versioned function
25559 DECL. The target hook is called to process the "target" attributes and
25560 provide the code to dispatch the right function at run-time. NODE points
25561 to the dispatcher decl whose body will be created. */
25562
25563 static tree
25564 rs6000_generate_version_dispatcher_body (void *node_p)
25565 {
25566 tree resolver;
25567 basic_block empty_bb;
25568 struct cgraph_node *node = (cgraph_node *) node_p;
25569 struct cgraph_function_version_info *ninfo = node->function_version ();
25570
25571 if (ninfo->dispatcher_resolver)
25572 return ninfo->dispatcher_resolver;
25573
25574 /* node is going to be an alias, so remove the finalized bit. */
25575 node->definition = false;
25576
25577 /* The first version in the chain corresponds to the default version. */
25578 ninfo->dispatcher_resolver = resolver
25579 = make_resolver_func (ninfo->next->this_node->decl, node->decl, &empty_bb);
25580
25581 if (TARGET_DEBUG_TARGET)
25582 fprintf (stderr, "rs6000_get_function_versions_dispatcher, %s\n",
25583 get_decl_name (resolver));
25584
25585 push_cfun (DECL_STRUCT_FUNCTION (resolver));
25586 auto_vec<tree, 2> fn_ver_vec;
25587
25588 for (struct cgraph_function_version_info *vinfo = ninfo->next;
25589 vinfo;
25590 vinfo = vinfo->next)
25591 {
25592 struct cgraph_node *version = vinfo->this_node;
25593 /* Check for virtual functions here again, as by this time it should
25594 have been determined if this function needs a vtable index or
25595 not. This happens for methods in derived classes that override
25596 virtual methods in base classes but are not explicitly marked as
25597 virtual. */
25598 if (DECL_VINDEX (version->decl))
25599 sorry ("Virtual function multiversioning not supported");
25600
25601 fn_ver_vec.safe_push (version->decl);
25602 }
25603
25604 dispatch_function_versions (resolver, &fn_ver_vec, &empty_bb);
25605 cgraph_edge::rebuild_edges ();
25606 pop_cfun ();
25607 return resolver;
25608 }
25609
25610 /* Hook to decide if we need to scan function gimple statements to
25611 collect target specific information for inlining, and update the
25612 corresponding RS6000_FN_TARGET_INFO_* bit in INFO if we are able
25613 to predict which ISA feature is used at this time. Return true
25614 if we need to scan, otherwise return false. */
25615
25616 static bool
25617 rs6000_need_ipa_fn_target_info (const_tree decl,
25618 unsigned int &info ATTRIBUTE_UNUSED)
25619 {
25620 tree target = DECL_FUNCTION_SPECIFIC_TARGET (decl);
25621 if (!target)
25622 target = target_option_default_node;
25623 struct cl_target_option *opts = TREE_TARGET_OPTION (target);
25624
25625 /* See PR102059, we only handle HTM for now, so will only do
25626 the consequent scannings when HTM feature enabled. */
25627 if (opts->x_rs6000_isa_flags & OPTION_MASK_HTM)
25628 return true;
25629
25630 return false;
25631 }
25632
25633 /* Hook to update target specific information INFO for inlining by
25634 checking the given STMT. Return false if we don't need to scan
25635 any more, otherwise return true. */
25636
25637 static bool
25638 rs6000_update_ipa_fn_target_info (unsigned int &info, const gimple *stmt)
25639 {
25640 /* Assume inline asm can use any instruction features. */
25641 if (gimple_code (stmt) == GIMPLE_ASM)
25642 {
25643 /* Should set any bits we concerned, for now OPTION_MASK_HTM is
25644 the only bit we care about. */
25645 info |= RS6000_FN_TARGET_INFO_HTM;
25646 return false;
25647 }
25648 else if (gimple_code (stmt) == GIMPLE_CALL)
25649 {
25650 tree fndecl = gimple_call_fndecl (stmt);
25651 if (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD))
25652 {
25653 enum rs6000_gen_builtins fcode
25654 = (enum rs6000_gen_builtins) DECL_MD_FUNCTION_CODE (fndecl);
25655 /* HTM bifs definitely exploit HTM insns. */
25656 if (bif_is_htm (rs6000_builtin_info[fcode]))
25657 {
25658 info |= RS6000_FN_TARGET_INFO_HTM;
25659 return false;
25660 }
25661 }
25662 }
25663
25664 return true;
25665 }
25666
25667 /* Hook to determine if one function can safely inline another. */
25668
25669 static bool
25670 rs6000_can_inline_p (tree caller, tree callee)
25671 {
25672 bool ret = false;
25673 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
25674 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
25675
25676 /* If the callee has no option attributes, then it is ok to inline. */
25677 if (!callee_tree)
25678 ret = true;
25679
25680 else
25681 {
25682 HOST_WIDE_INT caller_isa;
25683 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
25684 HOST_WIDE_INT callee_isa = callee_opts->x_rs6000_isa_flags;
25685 HOST_WIDE_INT explicit_isa = callee_opts->x_rs6000_isa_flags_explicit;
25686
25687 /* If the caller has option attributes, then use them.
25688 Otherwise, use the command line options. */
25689 if (caller_tree)
25690 caller_isa = TREE_TARGET_OPTION (caller_tree)->x_rs6000_isa_flags;
25691 else
25692 caller_isa = rs6000_isa_flags;
25693
25694 cgraph_node *callee_node = cgraph_node::get (callee);
25695 if (ipa_fn_summaries && ipa_fn_summaries->get (callee_node) != NULL)
25696 {
25697 unsigned int info = ipa_fn_summaries->get (callee_node)->target_info;
25698 if ((info & RS6000_FN_TARGET_INFO_HTM) == 0)
25699 {
25700 callee_isa &= ~OPTION_MASK_HTM;
25701 explicit_isa &= ~OPTION_MASK_HTM;
25702 }
25703 }
25704
25705 /* Ignore -mpower8-fusion and -mpower10-fusion options for inlining
25706 purposes. */
25707 callee_isa &= ~(OPTION_MASK_P8_FUSION | OPTION_MASK_P10_FUSION);
25708 explicit_isa &= ~(OPTION_MASK_P8_FUSION | OPTION_MASK_P10_FUSION);
25709
25710 /* The callee's options must be a subset of the caller's options, i.e.
25711 a vsx function may inline an altivec function, but a no-vsx function
25712 must not inline a vsx function. However, for those options that the
25713 callee has explicitly enabled or disabled, then we must enforce that
25714 the callee's and caller's options match exactly; see PR70010. */
25715 if (((caller_isa & callee_isa) == callee_isa)
25716 && (caller_isa & explicit_isa) == (callee_isa & explicit_isa))
25717 ret = true;
25718 }
25719
25720 if (TARGET_DEBUG_TARGET)
25721 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
25722 get_decl_name (caller), get_decl_name (callee),
25723 (ret ? "can" : "cannot"));
25724
25725 return ret;
25726 }
25727 \f
25728 /* Allocate a stack temp and fixup the address so it meets the particular
25729 memory requirements (either offetable or REG+REG addressing). */
25730
25731 rtx
25732 rs6000_allocate_stack_temp (machine_mode mode,
25733 bool offsettable_p,
25734 bool reg_reg_p)
25735 {
25736 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
25737 rtx addr = XEXP (stack, 0);
25738 int strict_p = reload_completed;
25739
25740 if (!legitimate_indirect_address_p (addr, strict_p))
25741 {
25742 if (offsettable_p
25743 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
25744 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
25745
25746 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
25747 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
25748 }
25749
25750 return stack;
25751 }
25752
25753 /* Given a memory reference, if it is not a reg or reg+reg addressing,
25754 convert to such a form to deal with memory reference instructions
25755 like STFIWX and LDBRX that only take reg+reg addressing. */
25756
25757 rtx
25758 rs6000_force_indexed_or_indirect_mem (rtx x)
25759 {
25760 machine_mode mode = GET_MODE (x);
25761
25762 gcc_assert (MEM_P (x));
25763 if (can_create_pseudo_p () && !indexed_or_indirect_operand (x, mode))
25764 {
25765 rtx addr = XEXP (x, 0);
25766 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
25767 {
25768 rtx reg = XEXP (addr, 0);
25769 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
25770 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
25771 gcc_assert (REG_P (reg));
25772 emit_insn (gen_add3_insn (reg, reg, size_rtx));
25773 addr = reg;
25774 }
25775 else if (GET_CODE (addr) == PRE_MODIFY)
25776 {
25777 rtx reg = XEXP (addr, 0);
25778 rtx expr = XEXP (addr, 1);
25779 gcc_assert (REG_P (reg));
25780 gcc_assert (GET_CODE (expr) == PLUS);
25781 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
25782 addr = reg;
25783 }
25784
25785 if (GET_CODE (addr) == PLUS)
25786 {
25787 rtx op0 = XEXP (addr, 0);
25788 rtx op1 = XEXP (addr, 1);
25789 op0 = force_reg (Pmode, op0);
25790 op1 = force_reg (Pmode, op1);
25791 x = replace_equiv_address (x, gen_rtx_PLUS (Pmode, op0, op1));
25792 }
25793 else
25794 x = replace_equiv_address (x, force_reg (Pmode, addr));
25795 }
25796
25797 return x;
25798 }
25799
25800 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
25801
25802 On the RS/6000, all integer constants are acceptable, most won't be valid
25803 for particular insns, though. Only easy FP constants are acceptable. */
25804
25805 static bool
25806 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
25807 {
25808 if (TARGET_ELF && tls_referenced_p (x))
25809 return false;
25810
25811 if (CONST_DOUBLE_P (x))
25812 return easy_fp_constant (x, mode);
25813
25814 if (GET_CODE (x) == CONST_VECTOR)
25815 return easy_vector_constant (x, mode);
25816
25817 return true;
25818 }
25819
25820 #if TARGET_AIX_OS
25821 /* Implement TARGET_PRECOMPUTE_TLS_P.
25822
25823 On the AIX, TLS symbols are in the TOC, which is maintained in the
25824 constant pool. AIX TOC TLS symbols need to be pre-computed, but
25825 must be considered legitimate constants. */
25826
25827 static bool
25828 rs6000_aix_precompute_tls_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
25829 {
25830 return tls_referenced_p (x);
25831 }
25832 #endif
25833
25834 \f
25835 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
25836
25837 static bool
25838 chain_already_loaded (rtx_insn *last)
25839 {
25840 for (; last != NULL; last = PREV_INSN (last))
25841 {
25842 if (NONJUMP_INSN_P (last))
25843 {
25844 rtx patt = PATTERN (last);
25845
25846 if (GET_CODE (patt) == SET)
25847 {
25848 rtx lhs = XEXP (patt, 0);
25849
25850 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
25851 return true;
25852 }
25853 }
25854 }
25855 return false;
25856 }
25857
25858 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
25859
25860 void
25861 rs6000_call_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
25862 {
25863 rtx func = func_desc;
25864 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
25865 rtx toc_load = NULL_RTX;
25866 rtx toc_restore = NULL_RTX;
25867 rtx func_addr;
25868 rtx abi_reg = NULL_RTX;
25869 rtx call[5];
25870 int n_call;
25871 rtx insn;
25872 bool is_pltseq_longcall;
25873
25874 if (global_tlsarg)
25875 tlsarg = global_tlsarg;
25876
25877 /* Handle longcall attributes. */
25878 is_pltseq_longcall = false;
25879 if ((INTVAL (cookie) & CALL_LONG) != 0
25880 && GET_CODE (func_desc) == SYMBOL_REF)
25881 {
25882 func = rs6000_longcall_ref (func_desc, tlsarg);
25883 if (TARGET_PLTSEQ)
25884 is_pltseq_longcall = true;
25885 }
25886
25887 /* Handle indirect calls. */
25888 if (!SYMBOL_REF_P (func)
25889 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func)))
25890 {
25891 if (!rs6000_pcrel_p ())
25892 {
25893 /* Save the TOC into its reserved slot before the call,
25894 and prepare to restore it after the call. */
25895 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
25896 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
25897 gen_rtvec (1, stack_toc_offset),
25898 UNSPEC_TOCSLOT);
25899 toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
25900
25901 /* Can we optimize saving the TOC in the prologue or
25902 do we need to do it at every call? */
25903 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
25904 cfun->machine->save_toc_in_prologue = true;
25905 else
25906 {
25907 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
25908 rtx stack_toc_mem = gen_frame_mem (Pmode,
25909 gen_rtx_PLUS (Pmode, stack_ptr,
25910 stack_toc_offset));
25911 MEM_VOLATILE_P (stack_toc_mem) = 1;
25912 if (is_pltseq_longcall)
25913 {
25914 rtvec v = gen_rtvec (3, toc_reg, func_desc, tlsarg);
25915 rtx mark_toc_reg = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25916 emit_insn (gen_rtx_SET (stack_toc_mem, mark_toc_reg));
25917 }
25918 else
25919 emit_move_insn (stack_toc_mem, toc_reg);
25920 }
25921 }
25922
25923 if (DEFAULT_ABI == ABI_ELFv2)
25924 {
25925 /* A function pointer in the ELFv2 ABI is just a plain address, but
25926 the ABI requires it to be loaded into r12 before the call. */
25927 func_addr = gen_rtx_REG (Pmode, 12);
25928 emit_move_insn (func_addr, func);
25929 abi_reg = func_addr;
25930 /* Indirect calls via CTR are strongly preferred over indirect
25931 calls via LR, so move the address there. Needed to mark
25932 this insn for linker plt sequence editing too. */
25933 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
25934 if (is_pltseq_longcall)
25935 {
25936 rtvec v = gen_rtvec (3, abi_reg, func_desc, tlsarg);
25937 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25938 emit_insn (gen_rtx_SET (func_addr, mark_func));
25939 v = gen_rtvec (2, func_addr, func_desc);
25940 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25941 }
25942 else
25943 emit_move_insn (func_addr, abi_reg);
25944 }
25945 else
25946 {
25947 /* A function pointer under AIX is a pointer to a data area whose
25948 first word contains the actual address of the function, whose
25949 second word contains a pointer to its TOC, and whose third word
25950 contains a value to place in the static chain register (r11).
25951 Note that if we load the static chain, our "trampoline" need
25952 not have any executable code. */
25953
25954 /* Load up address of the actual function. */
25955 func = force_reg (Pmode, func);
25956 func_addr = gen_reg_rtx (Pmode);
25957 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func));
25958
25959 /* Indirect calls via CTR are strongly preferred over indirect
25960 calls via LR, so move the address there. */
25961 rtx ctr_reg = gen_rtx_REG (Pmode, CTR_REGNO);
25962 emit_move_insn (ctr_reg, func_addr);
25963 func_addr = ctr_reg;
25964
25965 /* Prepare to load the TOC of the called function. Note that the
25966 TOC load must happen immediately before the actual call so
25967 that unwinding the TOC registers works correctly. See the
25968 comment in frob_update_context. */
25969 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
25970 rtx func_toc_mem = gen_rtx_MEM (Pmode,
25971 gen_rtx_PLUS (Pmode, func,
25972 func_toc_offset));
25973 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
25974
25975 /* If we have a static chain, load it up. But, if the call was
25976 originally direct, the 3rd word has not been written since no
25977 trampoline has been built, so we ought not to load it, lest we
25978 override a static chain value. */
25979 if (!(GET_CODE (func_desc) == SYMBOL_REF
25980 && SYMBOL_REF_FUNCTION_P (func_desc))
25981 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
25982 && !chain_already_loaded (get_current_sequence ()->next->last))
25983 {
25984 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
25985 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
25986 rtx func_sc_mem = gen_rtx_MEM (Pmode,
25987 gen_rtx_PLUS (Pmode, func,
25988 func_sc_offset));
25989 emit_move_insn (sc_reg, func_sc_mem);
25990 abi_reg = sc_reg;
25991 }
25992 }
25993 }
25994 else
25995 {
25996 /* No TOC register needed for calls from PC-relative callers. */
25997 if (!rs6000_pcrel_p ())
25998 /* Direct calls use the TOC: for local calls, the callee will
25999 assume the TOC register is set; for non-local calls, the
26000 PLT stub needs the TOC register. */
26001 abi_reg = toc_reg;
26002 func_addr = func;
26003 }
26004
26005 /* Create the call. */
26006 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
26007 if (value != NULL_RTX)
26008 call[0] = gen_rtx_SET (value, call[0]);
26009 call[1] = gen_rtx_USE (VOIDmode, cookie);
26010 n_call = 2;
26011
26012 if (toc_load)
26013 call[n_call++] = toc_load;
26014 if (toc_restore)
26015 call[n_call++] = toc_restore;
26016
26017 call[n_call++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
26018
26019 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
26020 insn = emit_call_insn (insn);
26021
26022 /* Mention all registers defined by the ABI to hold information
26023 as uses in CALL_INSN_FUNCTION_USAGE. */
26024 if (abi_reg)
26025 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
26026 }
26027
26028 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
26029
26030 void
26031 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
26032 {
26033 rtx call[2];
26034 rtx insn;
26035 rtx r12 = NULL_RTX;
26036 rtx func_addr = func_desc;
26037
26038 if (global_tlsarg)
26039 tlsarg = global_tlsarg;
26040
26041 /* Handle longcall attributes. */
26042 if (INTVAL (cookie) & CALL_LONG && SYMBOL_REF_P (func_desc))
26043 {
26044 /* PCREL can do a sibling call to a longcall function
26045 because we don't need to restore the TOC register. */
26046 gcc_assert (rs6000_pcrel_p ());
26047 func_desc = rs6000_longcall_ref (func_desc, tlsarg);
26048 }
26049 else
26050 gcc_assert (INTVAL (cookie) == 0);
26051
26052 /* For ELFv2, r12 and CTR need to hold the function address
26053 for an indirect call. */
26054 if (GET_CODE (func_desc) != SYMBOL_REF && DEFAULT_ABI == ABI_ELFv2)
26055 {
26056 r12 = gen_rtx_REG (Pmode, 12);
26057 emit_move_insn (r12, func_desc);
26058 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
26059 emit_move_insn (func_addr, r12);
26060 }
26061
26062 /* Create the call. */
26063 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
26064 if (value != NULL_RTX)
26065 call[0] = gen_rtx_SET (value, call[0]);
26066
26067 call[1] = simple_return_rtx;
26068
26069 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
26070 insn = emit_call_insn (insn);
26071
26072 /* Note use of the TOC register. */
26073 if (!rs6000_pcrel_p ())
26074 use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
26075 gen_rtx_REG (Pmode, TOC_REGNUM));
26076
26077 /* Note use of r12. */
26078 if (r12)
26079 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r12);
26080 }
26081
26082 /* Expand code to perform a call under the SYSV4 ABI. */
26083
26084 void
26085 rs6000_call_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
26086 {
26087 rtx func = func_desc;
26088 rtx func_addr;
26089 rtx call[4];
26090 rtx insn;
26091 rtx abi_reg = NULL_RTX;
26092 int n;
26093
26094 if (global_tlsarg)
26095 tlsarg = global_tlsarg;
26096
26097 /* Handle longcall attributes. */
26098 if ((INTVAL (cookie) & CALL_LONG) != 0
26099 && GET_CODE (func_desc) == SYMBOL_REF)
26100 {
26101 func = rs6000_longcall_ref (func_desc, tlsarg);
26102 /* If the longcall was implemented as an inline PLT call using
26103 PLT unspecs then func will be REG:r11. If not, func will be
26104 a pseudo reg. The inline PLT call sequence supports lazy
26105 linking (and longcalls to functions in dlopen'd libraries).
26106 The other style of longcalls don't. The lazy linking entry
26107 to the dynamic symbol resolver requires r11 be the function
26108 address (as it is for linker generated PLT stubs). Ensure
26109 r11 stays valid to the bctrl by marking r11 used by the call. */
26110 if (TARGET_PLTSEQ)
26111 abi_reg = func;
26112 }
26113
26114 /* Handle indirect calls. */
26115 if (GET_CODE (func) != SYMBOL_REF)
26116 {
26117 func = force_reg (Pmode, func);
26118
26119 /* Indirect calls via CTR are strongly preferred over indirect
26120 calls via LR, so move the address there. That can't be left
26121 to reload because we want to mark every instruction in an
26122 inline PLT call sequence with a reloc, enabling the linker to
26123 edit the sequence back to a direct call when that makes sense. */
26124 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
26125 if (abi_reg)
26126 {
26127 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
26128 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
26129 emit_insn (gen_rtx_SET (func_addr, mark_func));
26130 v = gen_rtvec (2, func_addr, func_desc);
26131 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
26132 }
26133 else
26134 emit_move_insn (func_addr, func);
26135 }
26136 else
26137 func_addr = func;
26138
26139 /* Create the call. */
26140 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
26141 if (value != NULL_RTX)
26142 call[0] = gen_rtx_SET (value, call[0]);
26143
26144 call[1] = gen_rtx_USE (VOIDmode, cookie);
26145 n = 2;
26146 if (TARGET_SECURE_PLT
26147 && flag_pic
26148 && GET_CODE (func_addr) == SYMBOL_REF
26149 && !SYMBOL_REF_LOCAL_P (func_addr))
26150 call[n++] = gen_rtx_USE (VOIDmode, pic_offset_table_rtx);
26151
26152 call[n++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
26153
26154 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n, call));
26155 insn = emit_call_insn (insn);
26156 if (abi_reg)
26157 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
26158 }
26159
26160 /* Expand code to perform a sibling call under the SysV4 ABI. */
26161
26162 void
26163 rs6000_sibcall_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
26164 {
26165 rtx func = func_desc;
26166 rtx func_addr;
26167 rtx call[3];
26168 rtx insn;
26169 rtx abi_reg = NULL_RTX;
26170
26171 if (global_tlsarg)
26172 tlsarg = global_tlsarg;
26173
26174 /* Handle longcall attributes. */
26175 if ((INTVAL (cookie) & CALL_LONG) != 0
26176 && GET_CODE (func_desc) == SYMBOL_REF)
26177 {
26178 func = rs6000_longcall_ref (func_desc, tlsarg);
26179 /* If the longcall was implemented as an inline PLT call using
26180 PLT unspecs then func will be REG:r11. If not, func will be
26181 a pseudo reg. The inline PLT call sequence supports lazy
26182 linking (and longcalls to functions in dlopen'd libraries).
26183 The other style of longcalls don't. The lazy linking entry
26184 to the dynamic symbol resolver requires r11 be the function
26185 address (as it is for linker generated PLT stubs). Ensure
26186 r11 stays valid to the bctr by marking r11 used by the call. */
26187 if (TARGET_PLTSEQ)
26188 abi_reg = func;
26189 }
26190
26191 /* Handle indirect calls. */
26192 if (GET_CODE (func) != SYMBOL_REF)
26193 {
26194 func = force_reg (Pmode, func);
26195
26196 /* Indirect sibcalls must go via CTR. That can't be left to
26197 reload because we want to mark every instruction in an inline
26198 PLT call sequence with a reloc, enabling the linker to edit
26199 the sequence back to a direct call when that makes sense. */
26200 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
26201 if (abi_reg)
26202 {
26203 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
26204 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
26205 emit_insn (gen_rtx_SET (func_addr, mark_func));
26206 v = gen_rtvec (2, func_addr, func_desc);
26207 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
26208 }
26209 else
26210 emit_move_insn (func_addr, func);
26211 }
26212 else
26213 func_addr = func;
26214
26215 /* Create the call. */
26216 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
26217 if (value != NULL_RTX)
26218 call[0] = gen_rtx_SET (value, call[0]);
26219
26220 call[1] = gen_rtx_USE (VOIDmode, cookie);
26221 call[2] = simple_return_rtx;
26222
26223 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
26224 insn = emit_call_insn (insn);
26225 if (abi_reg)
26226 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
26227 }
26228
26229 #if TARGET_MACHO
26230
26231 /* Expand code to perform a call under the Darwin ABI.
26232 Modulo handling of mlongcall, this is much the same as sysv.
26233 if/when the longcall optimisation is removed, we could drop this
26234 code and use the sysv case (taking care to avoid the tls stuff).
26235
26236 We can use this for sibcalls too, if needed. */
26237
26238 void
26239 rs6000_call_darwin_1 (rtx value, rtx func_desc, rtx tlsarg,
26240 rtx cookie, bool sibcall)
26241 {
26242 rtx func = func_desc;
26243 rtx func_addr;
26244 rtx call[3];
26245 rtx insn;
26246 int cookie_val = INTVAL (cookie);
26247 bool make_island = false;
26248
26249 /* Handle longcall attributes, there are two cases for Darwin:
26250 1) Newer linkers are capable of synthesising any branch islands needed.
26251 2) We need a helper branch island synthesised by the compiler.
26252 The second case has mostly been retired and we don't use it for m64.
26253 In fact, it's is an optimisation, we could just indirect as sysv does..
26254 ... however, backwards compatibility for now.
26255 If we're going to use this, then we need to keep the CALL_LONG bit set,
26256 so that we can pick up the special insn form later. */
26257 if ((cookie_val & CALL_LONG) != 0
26258 && GET_CODE (func_desc) == SYMBOL_REF)
26259 {
26260 /* FIXME: the longcall opt should not hang off this flag, it is most
26261 likely incorrect for kernel-mode code-generation. */
26262 if (darwin_symbol_stubs && TARGET_32BIT)
26263 make_island = true; /* Do nothing yet, retain the CALL_LONG flag. */
26264 else
26265 {
26266 /* The linker is capable of doing this, but the user explicitly
26267 asked for -mlongcall, so we'll do the 'normal' version. */
26268 func = rs6000_longcall_ref (func_desc, NULL_RTX);
26269 cookie_val &= ~CALL_LONG; /* Handled, zap it. */
26270 }
26271 }
26272
26273 /* Handle indirect calls. */
26274 if (GET_CODE (func) != SYMBOL_REF)
26275 {
26276 func = force_reg (Pmode, func);
26277
26278 /* Indirect calls via CTR are strongly preferred over indirect
26279 calls via LR, and are required for indirect sibcalls, so move
26280 the address there. */
26281 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
26282 emit_move_insn (func_addr, func);
26283 }
26284 else
26285 func_addr = func;
26286
26287 /* Create the call. */
26288 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
26289 if (value != NULL_RTX)
26290 call[0] = gen_rtx_SET (value, call[0]);
26291
26292 call[1] = gen_rtx_USE (VOIDmode, GEN_INT (cookie_val));
26293
26294 if (sibcall)
26295 call[2] = simple_return_rtx;
26296 else
26297 call[2] = gen_hard_reg_clobber (Pmode, LR_REGNO);
26298
26299 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
26300 insn = emit_call_insn (insn);
26301 /* Now we have the debug info in the insn, we can set up the branch island
26302 if we're using one. */
26303 if (make_island)
26304 {
26305 tree funname = get_identifier (XSTR (func_desc, 0));
26306
26307 if (no_previous_def (funname))
26308 {
26309 rtx label_rtx = gen_label_rtx ();
26310 char *label_buf, temp_buf[256];
26311 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
26312 CODE_LABEL_NUMBER (label_rtx));
26313 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
26314 tree labelname = get_identifier (label_buf);
26315 add_compiler_branch_island (labelname, funname,
26316 insn_line ((const rtx_insn*)insn));
26317 }
26318 }
26319 }
26320 #endif
26321
26322 void
26323 rs6000_call_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
26324 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
26325 {
26326 #if TARGET_MACHO
26327 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, false);
26328 #else
26329 gcc_unreachable();
26330 #endif
26331 }
26332
26333
26334 void
26335 rs6000_sibcall_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
26336 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
26337 {
26338 #if TARGET_MACHO
26339 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, true);
26340 #else
26341 gcc_unreachable();
26342 #endif
26343 }
26344
26345 /* Return whether we should generate PC-relative code for FNDECL. */
26346 bool
26347 rs6000_fndecl_pcrel_p (const_tree fndecl)
26348 {
26349 if (DEFAULT_ABI != ABI_ELFv2)
26350 return false;
26351
26352 struct cl_target_option *opts = target_opts_for_fn (fndecl);
26353
26354 return ((opts->x_rs6000_isa_flags & OPTION_MASK_PCREL) != 0
26355 && TARGET_CMODEL == CMODEL_MEDIUM);
26356 }
26357
26358 /* Return whether we should generate PC-relative code for *FN. */
26359 bool
26360 rs6000_function_pcrel_p (struct function *fn)
26361 {
26362 if (DEFAULT_ABI != ABI_ELFv2)
26363 return false;
26364
26365 /* Optimize usual case. */
26366 if (fn == cfun)
26367 return ((rs6000_isa_flags & OPTION_MASK_PCREL) != 0
26368 && TARGET_CMODEL == CMODEL_MEDIUM);
26369
26370 return rs6000_fndecl_pcrel_p (fn->decl);
26371 }
26372
26373 /* Return whether we should generate PC-relative code for the current
26374 function. */
26375 bool
26376 rs6000_pcrel_p ()
26377 {
26378 return (DEFAULT_ABI == ABI_ELFv2
26379 && (rs6000_isa_flags & OPTION_MASK_PCREL) != 0
26380 && TARGET_CMODEL == CMODEL_MEDIUM);
26381 }
26382
26383 \f
26384 /* Given an address (ADDR), a mode (MODE), and what the format of the
26385 non-prefixed address (NON_PREFIXED_FORMAT) is, return the instruction format
26386 for the address. */
26387
26388 enum insn_form
26389 address_to_insn_form (rtx addr,
26390 machine_mode mode,
26391 enum non_prefixed_form non_prefixed_format)
26392 {
26393 /* Single register is easy. */
26394 if (REG_P (addr) || SUBREG_P (addr))
26395 return INSN_FORM_BASE_REG;
26396
26397 /* If the non prefixed instruction format doesn't support offset addressing,
26398 make sure only indexed addressing is allowed.
26399
26400 We special case SDmode so that the register allocator does not try to move
26401 SDmode through GPR registers, but instead uses the 32-bit integer load and
26402 store instructions for the floating point registers. */
26403 if (non_prefixed_format == NON_PREFIXED_X || (mode == SDmode && TARGET_DFP))
26404 {
26405 if (GET_CODE (addr) != PLUS)
26406 return INSN_FORM_BAD;
26407
26408 rtx op0 = XEXP (addr, 0);
26409 rtx op1 = XEXP (addr, 1);
26410 if (!REG_P (op0) && !SUBREG_P (op0))
26411 return INSN_FORM_BAD;
26412
26413 if (!REG_P (op1) && !SUBREG_P (op1))
26414 return INSN_FORM_BAD;
26415
26416 return INSN_FORM_X;
26417 }
26418
26419 /* Deal with update forms. */
26420 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
26421 return INSN_FORM_UPDATE;
26422
26423 /* Handle PC-relative symbols and labels. Check for both local and
26424 external symbols. Assume labels are always local. TLS symbols
26425 are not PC-relative for rs6000. */
26426 if (TARGET_PCREL)
26427 {
26428 if (LABEL_REF_P (addr))
26429 return INSN_FORM_PCREL_LOCAL;
26430
26431 if (SYMBOL_REF_P (addr) && !SYMBOL_REF_TLS_MODEL (addr))
26432 {
26433 if (!SYMBOL_REF_LOCAL_P (addr))
26434 return INSN_FORM_PCREL_EXTERNAL;
26435 else
26436 return INSN_FORM_PCREL_LOCAL;
26437 }
26438 }
26439
26440 if (GET_CODE (addr) == CONST)
26441 addr = XEXP (addr, 0);
26442
26443 /* Recognize LO_SUM addresses used with TOC and 32-bit addressing. */
26444 if (GET_CODE (addr) == LO_SUM)
26445 return INSN_FORM_LO_SUM;
26446
26447 /* Everything below must be an offset address of some form. */
26448 if (GET_CODE (addr) != PLUS)
26449 return INSN_FORM_BAD;
26450
26451 rtx op0 = XEXP (addr, 0);
26452 rtx op1 = XEXP (addr, 1);
26453
26454 /* Check for indexed addresses. */
26455 if (REG_P (op1) || SUBREG_P (op1))
26456 {
26457 if (REG_P (op0) || SUBREG_P (op0))
26458 return INSN_FORM_X;
26459
26460 return INSN_FORM_BAD;
26461 }
26462
26463 if (!CONST_INT_P (op1))
26464 return INSN_FORM_BAD;
26465
26466 HOST_WIDE_INT offset = INTVAL (op1);
26467 if (!SIGNED_INTEGER_34BIT_P (offset))
26468 return INSN_FORM_BAD;
26469
26470 /* Check for local and external PC-relative addresses. Labels are always
26471 local. TLS symbols are not PC-relative for rs6000. */
26472 if (TARGET_PCREL)
26473 {
26474 if (LABEL_REF_P (op0))
26475 return INSN_FORM_PCREL_LOCAL;
26476
26477 if (SYMBOL_REF_P (op0) && !SYMBOL_REF_TLS_MODEL (op0))
26478 {
26479 if (!SYMBOL_REF_LOCAL_P (op0))
26480 return INSN_FORM_PCREL_EXTERNAL;
26481 else
26482 return INSN_FORM_PCREL_LOCAL;
26483 }
26484 }
26485
26486 /* If it isn't PC-relative, the address must use a base register. */
26487 if (!REG_P (op0) && !SUBREG_P (op0))
26488 return INSN_FORM_BAD;
26489
26490 /* Large offsets must be prefixed. */
26491 if (!SIGNED_INTEGER_16BIT_P (offset))
26492 {
26493 if (TARGET_PREFIXED)
26494 return INSN_FORM_PREFIXED_NUMERIC;
26495
26496 return INSN_FORM_BAD;
26497 }
26498
26499 /* We have a 16-bit offset, see what default instruction format to use. */
26500 if (non_prefixed_format == NON_PREFIXED_DEFAULT)
26501 {
26502 unsigned size = GET_MODE_SIZE (mode);
26503
26504 /* On 64-bit systems, assume 64-bit integers need to use DS form
26505 addresses (for LD/STD). VSX vectors need to use DQ form addresses
26506 (for LXV and STXV). TImode is problematical in that its normal usage
26507 is expected to be GPRs where it wants a DS instruction format, but if
26508 it goes into the vector registers, it wants a DQ instruction
26509 format. */
26510 if (TARGET_POWERPC64 && size >= 8 && GET_MODE_CLASS (mode) == MODE_INT)
26511 non_prefixed_format = NON_PREFIXED_DS;
26512
26513 else if (TARGET_VSX && size >= 16
26514 && (VECTOR_MODE_P (mode) || VECTOR_ALIGNMENT_P (mode)))
26515 non_prefixed_format = NON_PREFIXED_DQ;
26516
26517 else
26518 non_prefixed_format = NON_PREFIXED_D;
26519 }
26520
26521 /* Classify the D/DS/DQ-form addresses. */
26522 switch (non_prefixed_format)
26523 {
26524 /* Instruction format D, all 16 bits are valid. */
26525 case NON_PREFIXED_D:
26526 return INSN_FORM_D;
26527
26528 /* Instruction format DS, bottom 2 bits must be 0. */
26529 case NON_PREFIXED_DS:
26530 if ((offset & 3) == 0)
26531 return INSN_FORM_DS;
26532
26533 else if (TARGET_PREFIXED)
26534 return INSN_FORM_PREFIXED_NUMERIC;
26535
26536 else
26537 return INSN_FORM_BAD;
26538
26539 /* Instruction format DQ, bottom 4 bits must be 0. */
26540 case NON_PREFIXED_DQ:
26541 if ((offset & 15) == 0)
26542 return INSN_FORM_DQ;
26543
26544 else if (TARGET_PREFIXED)
26545 return INSN_FORM_PREFIXED_NUMERIC;
26546
26547 else
26548 return INSN_FORM_BAD;
26549
26550 default:
26551 break;
26552 }
26553
26554 return INSN_FORM_BAD;
26555 }
26556
26557 /* Given address rtx ADDR for a load of MODE, is this legitimate for a
26558 non-prefixed D-form or X-form instruction? NON_PREFIXED_FORMAT is
26559 given NON_PREFIXED_D or NON_PREFIXED_DS to indicate whether we want
26560 a D-form or DS-form instruction. X-form and base_reg are always
26561 allowed. */
26562 bool
26563 address_is_non_pfx_d_or_x (rtx addr, machine_mode mode,
26564 enum non_prefixed_form non_prefixed_format)
26565 {
26566 enum insn_form result_form;
26567
26568 result_form = address_to_insn_form (addr, mode, non_prefixed_format);
26569
26570 switch (non_prefixed_format)
26571 {
26572 case NON_PREFIXED_D:
26573 switch (result_form)
26574 {
26575 case INSN_FORM_X:
26576 case INSN_FORM_D:
26577 case INSN_FORM_DS:
26578 case INSN_FORM_BASE_REG:
26579 return true;
26580 default:
26581 return false;
26582 }
26583 break;
26584 case NON_PREFIXED_DS:
26585 switch (result_form)
26586 {
26587 case INSN_FORM_X:
26588 case INSN_FORM_DS:
26589 case INSN_FORM_BASE_REG:
26590 return true;
26591 default:
26592 return false;
26593 }
26594 break;
26595 default:
26596 break;
26597 }
26598 return false;
26599 }
26600
26601 /* Return true if an REG with a given MODE is loaded from or stored into a MEM
26602 location uses a non-prefixed D/DS/DQ-form address. This is used to validate
26603 the load or store with the PCREL_OPT optimization to make sure it is an
26604 instruction that can be optimized.
26605
26606 We need to specify the MODE separately from the REG to allow for loads that
26607 include zero/sign/float extension. */
26608
26609 bool
26610 pcrel_opt_valid_mem_p (rtx reg, machine_mode mode, rtx mem)
26611 {
26612 /* If the instruction is indexed only like LFIWAX/LXSIWAX we cannot do the
26613 PCREL_OPT optimization. */
26614 enum non_prefixed_form non_prefixed = reg_to_non_prefixed (reg, mode);
26615 if (non_prefixed == NON_PREFIXED_X)
26616 return false;
26617
26618 /* Check if this is a non-prefixed D/DS/DQ-form instruction. */
26619 rtx addr = XEXP (mem, 0);
26620 enum insn_form iform = address_to_insn_form (addr, mode, non_prefixed);
26621 return (iform == INSN_FORM_BASE_REG
26622 || iform == INSN_FORM_D
26623 || iform == INSN_FORM_DS
26624 || iform == INSN_FORM_DQ);
26625 }
26626
26627 /* Helper function to see if we're potentially looking at lfs/stfs.
26628 - PARALLEL containing a SET and a CLOBBER
26629 - stfs:
26630 - SET is from UNSPEC_SI_FROM_SF to MEM:SI
26631 - CLOBBER is a V4SF
26632 - lfs:
26633 - SET is from UNSPEC_SF_FROM_SI to REG:SF
26634 - CLOBBER is a DI
26635 */
26636
26637 static bool
26638 is_lfs_stfs_insn (rtx_insn *insn)
26639 {
26640 rtx pattern = PATTERN (insn);
26641 if (GET_CODE (pattern) != PARALLEL)
26642 return false;
26643
26644 /* This should be a parallel with exactly one set and one clobber. */
26645 if (XVECLEN (pattern, 0) != 2)
26646 return false;
26647
26648 rtx set = XVECEXP (pattern, 0, 0);
26649 if (GET_CODE (set) != SET)
26650 return false;
26651
26652 rtx clobber = XVECEXP (pattern, 0, 1);
26653 if (GET_CODE (clobber) != CLOBBER)
26654 return false;
26655
26656 /* All we care is that the destination of the SET is a mem:SI,
26657 the source should be an UNSPEC_SI_FROM_SF, and the clobber
26658 should be a scratch:V4SF. */
26659
26660 rtx dest = SET_DEST (set);
26661 rtx src = SET_SRC (set);
26662 rtx scratch = SET_DEST (clobber);
26663
26664 if (GET_CODE (src) != UNSPEC)
26665 return false;
26666
26667 /* stfs case. */
26668 if (XINT (src, 1) == UNSPEC_SI_FROM_SF
26669 && GET_CODE (dest) == MEM && GET_MODE (dest) == SImode
26670 && GET_CODE (scratch) == SCRATCH && GET_MODE (scratch) == V4SFmode)
26671 return true;
26672
26673 /* lfs case. */
26674 if (XINT (src, 1) == UNSPEC_SF_FROM_SI
26675 && GET_CODE (dest) == REG && GET_MODE (dest) == SFmode
26676 && GET_CODE (scratch) == SCRATCH && GET_MODE (scratch) == DImode)
26677 return true;
26678
26679 return false;
26680 }
26681
26682 /* Helper function to take a REG and a MODE and turn it into the non-prefixed
26683 instruction format (D/DS/DQ) used for offset memory. */
26684
26685 enum non_prefixed_form
26686 reg_to_non_prefixed (rtx reg, machine_mode mode)
26687 {
26688 /* If it isn't a register, use the defaults. */
26689 if (!REG_P (reg) && !SUBREG_P (reg))
26690 return NON_PREFIXED_DEFAULT;
26691
26692 unsigned int r = reg_or_subregno (reg);
26693
26694 /* If we have a pseudo, use the default instruction format. */
26695 if (!HARD_REGISTER_NUM_P (r))
26696 return NON_PREFIXED_DEFAULT;
26697
26698 unsigned size = GET_MODE_SIZE (mode);
26699
26700 /* FPR registers use D-mode for scalars, and DQ-mode for vectors, IEEE
26701 128-bit floating point, and 128-bit integers. Before power9, only indexed
26702 addressing was available for vectors. */
26703 if (FP_REGNO_P (r))
26704 {
26705 if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
26706 return NON_PREFIXED_D;
26707
26708 else if (size < 8)
26709 return NON_PREFIXED_X;
26710
26711 else if (TARGET_VSX && size >= 16
26712 && (VECTOR_MODE_P (mode)
26713 || VECTOR_ALIGNMENT_P (mode)
26714 || mode == TImode || mode == CTImode))
26715 return (TARGET_P9_VECTOR) ? NON_PREFIXED_DQ : NON_PREFIXED_X;
26716
26717 else
26718 return NON_PREFIXED_DEFAULT;
26719 }
26720
26721 /* Altivec registers use DS-mode for scalars, and DQ-mode for vectors, IEEE
26722 128-bit floating point, and 128-bit integers. Before power9, only indexed
26723 addressing was available. */
26724 else if (ALTIVEC_REGNO_P (r))
26725 {
26726 if (!TARGET_P9_VECTOR)
26727 return NON_PREFIXED_X;
26728
26729 if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
26730 return NON_PREFIXED_DS;
26731
26732 else if (size < 8)
26733 return NON_PREFIXED_X;
26734
26735 else if (TARGET_VSX && size >= 16
26736 && (VECTOR_MODE_P (mode)
26737 || VECTOR_ALIGNMENT_P (mode)
26738 || mode == TImode || mode == CTImode))
26739 return NON_PREFIXED_DQ;
26740
26741 else
26742 return NON_PREFIXED_DEFAULT;
26743 }
26744
26745 /* GPR registers use DS-mode for 64-bit items on 64-bit systems, and D-mode
26746 otherwise. Assume that any other register, such as LR, CRs, etc. will go
26747 through the GPR registers for memory operations. */
26748 else if (TARGET_POWERPC64 && size >= 8)
26749 return NON_PREFIXED_DS;
26750
26751 return NON_PREFIXED_D;
26752 }
26753
26754 \f
26755 /* Whether a load instruction is a prefixed instruction. This is called from
26756 the prefixed attribute processing. */
26757
26758 bool
26759 prefixed_load_p (rtx_insn *insn)
26760 {
26761 /* Validate the insn to make sure it is a normal load insn. */
26762 extract_insn_cached (insn);
26763 if (recog_data.n_operands < 2)
26764 return false;
26765
26766 rtx reg = recog_data.operand[0];
26767 rtx mem = recog_data.operand[1];
26768
26769 if (!REG_P (reg) && !SUBREG_P (reg))
26770 return false;
26771
26772 if (!MEM_P (mem))
26773 return false;
26774
26775 /* Prefixed load instructions do not support update or indexed forms. */
26776 if (get_attr_indexed (insn) == INDEXED_YES
26777 || get_attr_update (insn) == UPDATE_YES)
26778 return false;
26779
26780 /* LWA uses the DS format instead of the D format that LWZ uses. */
26781 enum non_prefixed_form non_prefixed;
26782 machine_mode reg_mode = GET_MODE (reg);
26783 machine_mode mem_mode = GET_MODE (mem);
26784
26785 if (mem_mode == SImode && reg_mode == DImode
26786 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
26787 non_prefixed = NON_PREFIXED_DS;
26788
26789 else
26790 non_prefixed = reg_to_non_prefixed (reg, mem_mode);
26791
26792 if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn))
26793 return address_is_prefixed (XEXP (mem, 0), mem_mode, NON_PREFIXED_DEFAULT);
26794 else
26795 return address_is_prefixed (XEXP (mem, 0), mem_mode, non_prefixed);
26796 }
26797
26798 /* Whether a store instruction is a prefixed instruction. This is called from
26799 the prefixed attribute processing. */
26800
26801 bool
26802 prefixed_store_p (rtx_insn *insn)
26803 {
26804 /* Validate the insn to make sure it is a normal store insn. */
26805 extract_insn_cached (insn);
26806 if (recog_data.n_operands < 2)
26807 return false;
26808
26809 rtx mem = recog_data.operand[0];
26810 rtx reg = recog_data.operand[1];
26811
26812 if (!REG_P (reg) && !SUBREG_P (reg))
26813 return false;
26814
26815 if (!MEM_P (mem))
26816 return false;
26817
26818 /* Prefixed store instructions do not support update or indexed forms. */
26819 if (get_attr_indexed (insn) == INDEXED_YES
26820 || get_attr_update (insn) == UPDATE_YES)
26821 return false;
26822
26823 machine_mode mem_mode = GET_MODE (mem);
26824 rtx addr = XEXP (mem, 0);
26825 enum non_prefixed_form non_prefixed = reg_to_non_prefixed (reg, mem_mode);
26826
26827 /* Need to make sure we aren't looking at a stfs which doesn't look
26828 like the other things reg_to_non_prefixed/address_is_prefixed
26829 looks for. */
26830 if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn))
26831 return address_is_prefixed (addr, mem_mode, NON_PREFIXED_DEFAULT);
26832 else
26833 return address_is_prefixed (addr, mem_mode, non_prefixed);
26834 }
26835
26836 /* Whether a load immediate or add instruction is a prefixed instruction. This
26837 is called from the prefixed attribute processing. */
26838
26839 bool
26840 prefixed_paddi_p (rtx_insn *insn)
26841 {
26842 rtx set = single_set (insn);
26843 if (!set)
26844 return false;
26845
26846 rtx dest = SET_DEST (set);
26847 rtx src = SET_SRC (set);
26848
26849 if (!REG_P (dest) && !SUBREG_P (dest))
26850 return false;
26851
26852 /* Is this a load immediate that can't be done with a simple ADDI or
26853 ADDIS? */
26854 if (CONST_INT_P (src))
26855 return (satisfies_constraint_eI (src)
26856 && !satisfies_constraint_I (src)
26857 && !satisfies_constraint_L (src));
26858
26859 /* Is this a PADDI instruction that can't be done with a simple ADDI or
26860 ADDIS? */
26861 if (GET_CODE (src) == PLUS)
26862 {
26863 rtx op1 = XEXP (src, 1);
26864
26865 return (CONST_INT_P (op1)
26866 && satisfies_constraint_eI (op1)
26867 && !satisfies_constraint_I (op1)
26868 && !satisfies_constraint_L (op1));
26869 }
26870
26871 /* If not, is it a load of a PC-relative address? */
26872 if (!TARGET_PCREL || GET_MODE (dest) != Pmode)
26873 return false;
26874
26875 if (!SYMBOL_REF_P (src) && !LABEL_REF_P (src) && GET_CODE (src) != CONST)
26876 return false;
26877
26878 enum insn_form iform = address_to_insn_form (src, Pmode,
26879 NON_PREFIXED_DEFAULT);
26880
26881 return (iform == INSN_FORM_PCREL_EXTERNAL || iform == INSN_FORM_PCREL_LOCAL);
26882 }
26883
26884 /* Whether the next instruction needs a 'p' prefix issued before the
26885 instruction is printed out. */
26886 static bool prepend_p_to_next_insn;
26887
26888 /* Define FINAL_PRESCAN_INSN if some processing needs to be done before
26889 outputting the assembler code. On the PowerPC, we remember if the current
26890 insn is a prefixed insn where we need to emit a 'p' before the insn.
26891
26892 In addition, if the insn is part of a PC-relative reference to an external
26893 label optimization, this is recorded also. */
26894 void
26895 rs6000_final_prescan_insn (rtx_insn *insn, rtx [], int)
26896 {
26897 prepend_p_to_next_insn = (get_attr_maybe_prefixed (insn)
26898 == MAYBE_PREFIXED_YES
26899 && get_attr_prefixed (insn) == PREFIXED_YES);
26900 return;
26901 }
26902
26903 /* Define ASM_OUTPUT_OPCODE to do anything special before emitting an opcode.
26904 We use it to emit a 'p' for prefixed insns that is set in
26905 FINAL_PRESCAN_INSN. */
26906 void
26907 rs6000_asm_output_opcode (FILE *stream)
26908 {
26909 if (prepend_p_to_next_insn)
26910 {
26911 fprintf (stream, "p");
26912
26913 /* Reset the flag in the case where there are separate insn lines in the
26914 sequence, so the 'p' is only emitted for the first line. This shows up
26915 when we are doing the PCREL_OPT optimization, in that the label created
26916 with %r<n> would have a leading 'p' printed. */
26917 prepend_p_to_next_insn = false;
26918 }
26919
26920 return;
26921 }
26922
26923 /* Emit the relocation to tie the next instruction to a previous instruction
26924 that loads up an external address. This is used to do the PCREL_OPT
26925 optimization. Note, the label is generated after the PLD of the got
26926 pc-relative address to allow for the assembler to insert NOPs before the PLD
26927 instruction. The operand is a constant integer that is the label
26928 number. */
26929
26930 void
26931 output_pcrel_opt_reloc (rtx label_num)
26932 {
26933 rtx operands[1] = { label_num };
26934 output_asm_insn (".reloc .Lpcrel%0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel%0-8)",
26935 operands);
26936 }
26937
26938 /* Adjust the length of an INSN. LENGTH is the currently-computed length and
26939 should be adjusted to reflect any required changes. This macro is used when
26940 there is some systematic length adjustment required that would be difficult
26941 to express in the length attribute.
26942
26943 In the PowerPC, we use this to adjust the length of an instruction if one or
26944 more prefixed instructions are generated, using the attribute
26945 num_prefixed_insns. A prefixed instruction is 8 bytes instead of 4, but the
26946 hardware requires that a prefied instruciton does not cross a 64-byte
26947 boundary. This means the compiler has to assume the length of the first
26948 prefixed instruction is 12 bytes instead of 8 bytes. Since the length is
26949 already set for the non-prefixed instruction, we just need to udpate for the
26950 difference. */
26951
26952 int
26953 rs6000_adjust_insn_length (rtx_insn *insn, int length)
26954 {
26955 if (TARGET_PREFIXED && NONJUMP_INSN_P (insn))
26956 {
26957 rtx pattern = PATTERN (insn);
26958 if (GET_CODE (pattern) != USE && GET_CODE (pattern) != CLOBBER
26959 && get_attr_prefixed (insn) == PREFIXED_YES)
26960 {
26961 int num_prefixed = get_attr_max_prefixed_insns (insn);
26962 length += 4 * (num_prefixed + 1);
26963 }
26964 }
26965
26966 return length;
26967 }
26968
26969 \f
26970 #ifdef HAVE_GAS_HIDDEN
26971 # define USE_HIDDEN_LINKONCE 1
26972 #else
26973 # define USE_HIDDEN_LINKONCE 0
26974 #endif
26975
26976 /* Fills in the label name that should be used for a 476 link stack thunk. */
26977
26978 void
26979 get_ppc476_thunk_name (char name[32])
26980 {
26981 gcc_assert (TARGET_LINK_STACK);
26982
26983 if (USE_HIDDEN_LINKONCE)
26984 sprintf (name, "__ppc476.get_thunk");
26985 else
26986 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
26987 }
26988
26989 /* This function emits the simple thunk routine that is used to preserve
26990 the link stack on the 476 cpu. */
26991
26992 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
26993 static void
26994 rs6000_code_end (void)
26995 {
26996 char name[32];
26997 tree decl;
26998
26999 if (!TARGET_LINK_STACK)
27000 return;
27001
27002 get_ppc476_thunk_name (name);
27003
27004 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
27005 build_function_type_list (void_type_node, NULL_TREE));
27006 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
27007 NULL_TREE, void_type_node);
27008 TREE_PUBLIC (decl) = 1;
27009 TREE_STATIC (decl) = 1;
27010
27011 #if RS6000_WEAK
27012 if (USE_HIDDEN_LINKONCE && !TARGET_XCOFF)
27013 {
27014 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
27015 targetm.asm_out.unique_section (decl, 0);
27016 switch_to_section (get_named_section (decl, NULL, 0));
27017 DECL_WEAK (decl) = 1;
27018 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
27019 targetm.asm_out.globalize_label (asm_out_file, name);
27020 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
27021 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
27022 }
27023 else
27024 #endif
27025 {
27026 switch_to_section (text_section);
27027 ASM_OUTPUT_LABEL (asm_out_file, name);
27028 }
27029
27030 DECL_INITIAL (decl) = make_node (BLOCK);
27031 current_function_decl = decl;
27032 allocate_struct_function (decl, false);
27033 init_function_start (decl);
27034 first_function_block_is_cold = false;
27035 /* Make sure unwind info is emitted for the thunk if needed. */
27036 final_start_function (emit_barrier (), asm_out_file, 1);
27037
27038 fputs ("\tblr\n", asm_out_file);
27039
27040 final_end_function ();
27041 init_insn_lengths ();
27042 free_after_compilation (cfun);
27043 set_cfun (NULL);
27044 current_function_decl = NULL;
27045 }
27046
27047 /* Add r30 to hard reg set if the prologue sets it up and it is not
27048 pic_offset_table_rtx. */
27049
27050 static void
27051 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
27052 {
27053 if (!TARGET_SINGLE_PIC_BASE
27054 && TARGET_TOC
27055 && TARGET_MINIMAL_TOC
27056 && !constant_pool_empty_p ())
27057 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
27058 if (cfun->machine->split_stack_argp_used)
27059 add_to_hard_reg_set (&set->set, Pmode, 12);
27060
27061 /* Make sure the hard reg set doesn't include r2, which was possibly added
27062 via PIC_OFFSET_TABLE_REGNUM. */
27063 if (TARGET_TOC)
27064 remove_from_hard_reg_set (&set->set, Pmode, TOC_REGNUM);
27065 }
27066
27067 \f
27068 /* Helper function for rs6000_split_logical to emit a logical instruction after
27069 spliting the operation to single GPR registers.
27070
27071 DEST is the destination register.
27072 OP1 and OP2 are the input source registers.
27073 CODE is the base operation (AND, IOR, XOR, NOT).
27074 MODE is the machine mode.
27075 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
27076 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
27077 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
27078
27079 static void
27080 rs6000_split_logical_inner (rtx dest,
27081 rtx op1,
27082 rtx op2,
27083 enum rtx_code code,
27084 machine_mode mode,
27085 bool complement_final_p,
27086 bool complement_op1_p,
27087 bool complement_op2_p)
27088 {
27089 rtx bool_rtx;
27090
27091 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
27092 if (op2 && CONST_INT_P (op2)
27093 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
27094 && !complement_final_p && !complement_op1_p && !complement_op2_p)
27095 {
27096 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
27097 HOST_WIDE_INT value = INTVAL (op2) & mask;
27098
27099 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
27100 if (code == AND)
27101 {
27102 if (value == 0)
27103 {
27104 emit_insn (gen_rtx_SET (dest, const0_rtx));
27105 return;
27106 }
27107
27108 else if (value == mask)
27109 {
27110 if (!rtx_equal_p (dest, op1))
27111 emit_insn (gen_rtx_SET (dest, op1));
27112 return;
27113 }
27114 }
27115
27116 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
27117 into separate ORI/ORIS or XORI/XORIS instrucitons. */
27118 else if (code == IOR || code == XOR)
27119 {
27120 if (value == 0)
27121 {
27122 if (!rtx_equal_p (dest, op1))
27123 emit_insn (gen_rtx_SET (dest, op1));
27124 return;
27125 }
27126 }
27127 }
27128
27129 if (code == AND && mode == SImode
27130 && !complement_final_p && !complement_op1_p && !complement_op2_p)
27131 {
27132 emit_insn (gen_andsi3 (dest, op1, op2));
27133 return;
27134 }
27135
27136 if (complement_op1_p)
27137 op1 = gen_rtx_NOT (mode, op1);
27138
27139 if (complement_op2_p)
27140 op2 = gen_rtx_NOT (mode, op2);
27141
27142 /* For canonical RTL, if only one arm is inverted it is the first. */
27143 if (!complement_op1_p && complement_op2_p)
27144 std::swap (op1, op2);
27145
27146 bool_rtx = ((code == NOT)
27147 ? gen_rtx_NOT (mode, op1)
27148 : gen_rtx_fmt_ee (code, mode, op1, op2));
27149
27150 if (complement_final_p)
27151 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
27152
27153 emit_insn (gen_rtx_SET (dest, bool_rtx));
27154 }
27155
27156 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
27157 operations are split immediately during RTL generation to allow for more
27158 optimizations of the AND/IOR/XOR.
27159
27160 OPERANDS is an array containing the destination and two input operands.
27161 CODE is the base operation (AND, IOR, XOR, NOT).
27162 MODE is the machine mode.
27163 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
27164 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
27165 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
27166 CLOBBER_REG is either NULL or a scratch register of type CC to allow
27167 formation of the AND instructions. */
27168
27169 static void
27170 rs6000_split_logical_di (rtx operands[3],
27171 enum rtx_code code,
27172 bool complement_final_p,
27173 bool complement_op1_p,
27174 bool complement_op2_p)
27175 {
27176 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
27177 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
27178 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
27179 enum hi_lo { hi = 0, lo = 1 };
27180 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
27181 size_t i;
27182
27183 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
27184 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
27185 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
27186 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
27187
27188 if (code == NOT)
27189 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
27190 else
27191 {
27192 if (!CONST_INT_P (operands[2]))
27193 {
27194 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
27195 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
27196 }
27197 else
27198 {
27199 HOST_WIDE_INT value = INTVAL (operands[2]);
27200 HOST_WIDE_INT value_hi_lo[2];
27201
27202 gcc_assert (!complement_final_p);
27203 gcc_assert (!complement_op1_p);
27204 gcc_assert (!complement_op2_p);
27205
27206 value_hi_lo[hi] = value >> 32;
27207 value_hi_lo[lo] = value & lower_32bits;
27208
27209 for (i = 0; i < 2; i++)
27210 {
27211 HOST_WIDE_INT sub_value = value_hi_lo[i];
27212
27213 if (sub_value & sign_bit)
27214 sub_value |= upper_32bits;
27215
27216 op2_hi_lo[i] = GEN_INT (sub_value);
27217
27218 /* If this is an AND instruction, check to see if we need to load
27219 the value in a register. */
27220 if (code == AND && sub_value != -1 && sub_value != 0
27221 && !and_operand (op2_hi_lo[i], SImode))
27222 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
27223 }
27224 }
27225 }
27226
27227 for (i = 0; i < 2; i++)
27228 {
27229 /* Split large IOR/XOR operations. */
27230 if ((code == IOR || code == XOR)
27231 && CONST_INT_P (op2_hi_lo[i])
27232 && !complement_final_p
27233 && !complement_op1_p
27234 && !complement_op2_p
27235 && !logical_const_operand (op2_hi_lo[i], SImode))
27236 {
27237 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
27238 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
27239 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
27240 rtx tmp = gen_reg_rtx (SImode);
27241
27242 /* Make sure the constant is sign extended. */
27243 if ((hi_16bits & sign_bit) != 0)
27244 hi_16bits |= upper_32bits;
27245
27246 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
27247 code, SImode, false, false, false);
27248
27249 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
27250 code, SImode, false, false, false);
27251 }
27252 else
27253 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
27254 code, SImode, complement_final_p,
27255 complement_op1_p, complement_op2_p);
27256 }
27257
27258 return;
27259 }
27260
27261 /* Split the insns that make up boolean operations operating on multiple GPR
27262 registers. The boolean MD patterns ensure that the inputs either are
27263 exactly the same as the output registers, or there is no overlap.
27264
27265 OPERANDS is an array containing the destination and two input operands.
27266 CODE is the base operation (AND, IOR, XOR, NOT).
27267 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
27268 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
27269 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
27270
27271 void
27272 rs6000_split_logical (rtx operands[3],
27273 enum rtx_code code,
27274 bool complement_final_p,
27275 bool complement_op1_p,
27276 bool complement_op2_p)
27277 {
27278 machine_mode mode = GET_MODE (operands[0]);
27279 machine_mode sub_mode;
27280 rtx op0, op1, op2;
27281 int sub_size, regno0, regno1, nregs, i;
27282
27283 /* If this is DImode, use the specialized version that can run before
27284 register allocation. */
27285 if (mode == DImode && !TARGET_POWERPC64)
27286 {
27287 rs6000_split_logical_di (operands, code, complement_final_p,
27288 complement_op1_p, complement_op2_p);
27289 return;
27290 }
27291
27292 op0 = operands[0];
27293 op1 = operands[1];
27294 op2 = (code == NOT) ? NULL_RTX : operands[2];
27295 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
27296 sub_size = GET_MODE_SIZE (sub_mode);
27297 regno0 = REGNO (op0);
27298 regno1 = REGNO (op1);
27299
27300 gcc_assert (reload_completed);
27301 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
27302 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
27303
27304 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
27305 gcc_assert (nregs > 1);
27306
27307 if (op2 && REG_P (op2))
27308 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
27309
27310 for (i = 0; i < nregs; i++)
27311 {
27312 int offset = i * sub_size;
27313 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
27314 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
27315 rtx sub_op2 = ((code == NOT)
27316 ? NULL_RTX
27317 : simplify_subreg (sub_mode, op2, mode, offset));
27318
27319 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
27320 complement_final_p, complement_op1_p,
27321 complement_op2_p);
27322 }
27323
27324 return;
27325 }
27326
27327 /* Emit instructions to move SRC to DST. Called by splitters for
27328 multi-register moves. It will emit at most one instruction for
27329 each register that is accessed; that is, it won't emit li/lis pairs
27330 (or equivalent for 64-bit code). One of SRC or DST must be a hard
27331 register. */
27332
27333 void
27334 rs6000_split_multireg_move (rtx dst, rtx src)
27335 {
27336 /* The register number of the first register being moved. */
27337 int reg;
27338 /* The mode that is to be moved. */
27339 machine_mode mode;
27340 /* The mode that the move is being done in, and its size. */
27341 machine_mode reg_mode;
27342 int reg_mode_size;
27343 /* The number of registers that will be moved. */
27344 int nregs;
27345
27346 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
27347 mode = GET_MODE (dst);
27348 nregs = hard_regno_nregs (reg, mode);
27349
27350 /* If we have a vector quad register for MMA or DMR register for dense math,
27351 and this is a load or store, see if we can use vector paired
27352 load/stores. */
27353 if ((mode == XOmode || mode == TDOmode) && TARGET_MMA
27354 && (MEM_P (dst) || MEM_P (src)))
27355 {
27356 reg_mode = OOmode;
27357 nregs /= 2;
27358 }
27359 /* If we have a vector pair/quad mode, split it into two/four separate
27360 vectors. */
27361 else if (mode == OOmode || mode == XOmode || mode == TDOmode)
27362 reg_mode = V1TImode;
27363 else if (FP_REGNO_P (reg))
27364 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
27365 (TARGET_HARD_FLOAT ? DFmode : SFmode);
27366 else if (ALTIVEC_REGNO_P (reg))
27367 reg_mode = V16QImode;
27368 else
27369 reg_mode = word_mode;
27370 reg_mode_size = GET_MODE_SIZE (reg_mode);
27371
27372 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
27373
27374 /* TDmode residing in FP registers is special, since the ISA requires that
27375 the lower-numbered word of a register pair is always the most significant
27376 word, even in little-endian mode. This does not match the usual subreg
27377 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
27378 the appropriate constituent registers "by hand" in little-endian mode.
27379
27380 Note we do not need to check for destructive overlap here since TDmode
27381 can only reside in even/odd register pairs. */
27382 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
27383 {
27384 rtx p_src, p_dst;
27385 int i;
27386
27387 for (i = 0; i < nregs; i++)
27388 {
27389 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
27390 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
27391 else
27392 p_src = simplify_gen_subreg (reg_mode, src, mode,
27393 i * reg_mode_size);
27394
27395 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
27396 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
27397 else
27398 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
27399 i * reg_mode_size);
27400
27401 emit_insn (gen_rtx_SET (p_dst, p_src));
27402 }
27403
27404 return;
27405 }
27406
27407 /* The __vector_pair, __vector_quad, and __dmr modes are multi-register
27408 modes, so if we have to load or store the registers, we have to be careful
27409 to properly swap them if we're in little endian mode below. This means
27410 the last register gets the first memory location. We also need to be
27411 careful of using the right register numbers if we are splitting XO to
27412 OO. */
27413 if (mode == OOmode || mode == XOmode || mode == TDOmode)
27414 {
27415 nregs = hard_regno_nregs (reg, mode);
27416 int reg_mode_nregs = hard_regno_nregs (reg, reg_mode);
27417 if (MEM_P (dst))
27418 {
27419 unsigned offset = 0;
27420 unsigned size = GET_MODE_SIZE (reg_mode);
27421
27422 /* If we are reading an accumulator register, we have to
27423 deprime it before we can access it. */
27424 if (TARGET_MMA && !TARGET_DENSE_MATH
27425 && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
27426 emit_insn (gen_mma_xxmfacc (src, src));
27427
27428 for (int i = 0; i < nregs; i += reg_mode_nregs)
27429 {
27430 unsigned subreg
27431 = WORDS_BIG_ENDIAN ? i : (nregs - reg_mode_nregs - i);
27432 rtx dst2 = adjust_address (dst, reg_mode, offset);
27433 rtx src2 = gen_rtx_REG (reg_mode, reg + subreg);
27434 offset += size;
27435 emit_insn (gen_rtx_SET (dst2, src2));
27436 }
27437
27438 return;
27439 }
27440
27441 if (MEM_P (src))
27442 {
27443 unsigned offset = 0;
27444 unsigned size = GET_MODE_SIZE (reg_mode);
27445
27446 for (int i = 0; i < nregs; i += reg_mode_nregs)
27447 {
27448 unsigned subreg
27449 = WORDS_BIG_ENDIAN ? i : (nregs - reg_mode_nregs - i);
27450 rtx dst2 = gen_rtx_REG (reg_mode, reg + subreg);
27451 rtx src2 = adjust_address (src, reg_mode, offset);
27452 offset += size;
27453 emit_insn (gen_rtx_SET (dst2, src2));
27454 }
27455
27456 /* If we are writing an accumulator register that overlaps with the
27457 FPR registers, we have to prime it after we've written it. */
27458 if (TARGET_MMA && !TARGET_DENSE_MATH
27459 && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
27460 emit_insn (gen_mma_xxmtacc (dst, dst));
27461
27462 return;
27463 }
27464
27465 if (GET_CODE (src) == UNSPEC
27466 || GET_CODE (src) == UNSPEC_VOLATILE)
27467 {
27468 gcc_assert (XINT (src, 1) == UNSPEC_VSX_ASSEMBLE
27469 || XINT (src, 1) == UNSPECV_MMA_ASSEMBLE);
27470 gcc_assert (REG_P (dst));
27471 if (GET_MODE (src) == XOmode)
27472 gcc_assert ((TARGET_DENSE_MATH
27473 ? VSX_REGNO_P (REGNO (dst))
27474 : FP_REGNO_P (REGNO (dst))));
27475 if (GET_MODE (src) == OOmode)
27476 gcc_assert (VSX_REGNO_P (REGNO (dst)));
27477
27478 int nvecs = XVECLEN (src, 0);
27479 for (int i = 0; i < nvecs; i++)
27480 {
27481 rtx op;
27482 int regno = reg + i;
27483
27484 if (WORDS_BIG_ENDIAN)
27485 {
27486 op = XVECEXP (src, 0, i);
27487
27488 /* If we are loading an even VSX register and the memory location
27489 is adjacent to the next register's memory location (if any),
27490 then we can load them both with one LXVP instruction. */
27491 if ((regno & 1) == 0)
27492 {
27493 rtx op2 = XVECEXP (src, 0, i + 1);
27494 if (adjacent_mem_locations (op, op2) == op)
27495 {
27496 op = adjust_address (op, OOmode, 0);
27497 /* Skip the next register, since we're going to
27498 load it together with this register. */
27499 i++;
27500 }
27501 }
27502 }
27503 else
27504 {
27505 op = XVECEXP (src, 0, nvecs - i - 1);
27506
27507 /* If we are loading an even VSX register and the memory location
27508 is adjacent to the next register's memory location (if any),
27509 then we can load them both with one LXVP instruction. */
27510 if ((regno & 1) == 0)
27511 {
27512 rtx op2 = XVECEXP (src, 0, nvecs - i - 2);
27513 if (adjacent_mem_locations (op2, op) == op2)
27514 {
27515 op = adjust_address (op2, OOmode, 0);
27516 /* Skip the next register, since we're going to
27517 load it together with this register. */
27518 i++;
27519 }
27520 }
27521 }
27522
27523 rtx dst_i = gen_rtx_REG (GET_MODE (op), regno);
27524 emit_insn (gen_rtx_SET (dst_i, op));
27525 }
27526
27527 /* On systems without dense math where accumulators overlap with the
27528 vector registers, we have to prime it after we've written it. */
27529 if (GET_MODE (src) == XOmode && !TARGET_DENSE_MATH)
27530 emit_insn (gen_mma_xxmtacc (dst, dst));
27531
27532 return;
27533 }
27534
27535 /* Register -> register moves can use common code. */
27536 }
27537
27538 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
27539 {
27540 /* If we are reading an accumulator register and we don't have dense
27541 math, we have to deprime it before we can access it. */
27542 if (TARGET_MMA && !TARGET_DENSE_MATH
27543 && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
27544 emit_insn (gen_mma_xxmfacc (src, src));
27545
27546 /* Move register range backwards, if we might have destructive
27547 overlap. */
27548 int i;
27549 /* XO/OO are opaque so cannot use subregs. */
27550 if (mode == OOmode || mode == XOmode || mode == TDOmode)
27551 {
27552 for (i = nregs - 1; i >= 0; i--)
27553 {
27554 rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + i);
27555 rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + i);
27556 emit_insn (gen_rtx_SET (dst_i, src_i));
27557 }
27558 }
27559 else
27560 {
27561 for (i = nregs - 1; i >= 0; i--)
27562 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
27563 i * reg_mode_size),
27564 simplify_gen_subreg (reg_mode, src, mode,
27565 i * reg_mode_size)));
27566 }
27567
27568 /* If we are writing an accumulator register, we have to
27569 prime it after we've written it. */
27570 if (TARGET_MMA && !TARGET_DENSE_MATH
27571 && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
27572 emit_insn (gen_mma_xxmtacc (dst, dst));
27573 }
27574 else
27575 {
27576 int i;
27577 int j = -1;
27578 bool used_update = false;
27579 rtx restore_basereg = NULL_RTX;
27580
27581 if (MEM_P (src) && INT_REGNO_P (reg))
27582 {
27583 rtx breg;
27584
27585 if (GET_CODE (XEXP (src, 0)) == PRE_INC
27586 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
27587 {
27588 rtx delta_rtx;
27589 breg = XEXP (XEXP (src, 0), 0);
27590 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
27591 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
27592 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
27593 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
27594 src = replace_equiv_address (src, breg);
27595 }
27596 else if (! rs6000_offsettable_memref_p (src, reg_mode, true))
27597 {
27598 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
27599 {
27600 rtx basereg = XEXP (XEXP (src, 0), 0);
27601 if (TARGET_UPDATE)
27602 {
27603 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
27604 emit_insn (gen_rtx_SET (ndst,
27605 gen_rtx_MEM (reg_mode,
27606 XEXP (src, 0))));
27607 used_update = true;
27608 }
27609 else
27610 emit_insn (gen_rtx_SET (basereg,
27611 XEXP (XEXP (src, 0), 1)));
27612 src = replace_equiv_address (src, basereg);
27613 }
27614 else
27615 {
27616 rtx basereg = gen_rtx_REG (Pmode, reg);
27617 emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
27618 src = replace_equiv_address (src, basereg);
27619 }
27620 }
27621
27622 breg = XEXP (src, 0);
27623 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
27624 breg = XEXP (breg, 0);
27625
27626 /* If the base register we are using to address memory is
27627 also a destination reg, then change that register last. */
27628 if (REG_P (breg)
27629 && REGNO (breg) >= REGNO (dst)
27630 && REGNO (breg) < REGNO (dst) + nregs)
27631 j = REGNO (breg) - REGNO (dst);
27632 }
27633 else if (MEM_P (dst) && INT_REGNO_P (reg))
27634 {
27635 rtx breg;
27636
27637 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
27638 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
27639 {
27640 rtx delta_rtx;
27641 breg = XEXP (XEXP (dst, 0), 0);
27642 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
27643 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
27644 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
27645
27646 /* We have to update the breg before doing the store.
27647 Use store with update, if available. */
27648
27649 if (TARGET_UPDATE)
27650 {
27651 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
27652 emit_insn (TARGET_32BIT
27653 ? (TARGET_POWERPC64
27654 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
27655 : gen_movsi_si_update (breg, breg, delta_rtx, nsrc))
27656 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
27657 used_update = true;
27658 }
27659 else
27660 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
27661 dst = replace_equiv_address (dst, breg);
27662 }
27663 else if (!rs6000_offsettable_memref_p (dst, reg_mode, true)
27664 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
27665 {
27666 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
27667 {
27668 rtx basereg = XEXP (XEXP (dst, 0), 0);
27669 if (TARGET_UPDATE)
27670 {
27671 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
27672 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
27673 XEXP (dst, 0)),
27674 nsrc));
27675 used_update = true;
27676 }
27677 else
27678 emit_insn (gen_rtx_SET (basereg,
27679 XEXP (XEXP (dst, 0), 1)));
27680 dst = replace_equiv_address (dst, basereg);
27681 }
27682 else
27683 {
27684 rtx basereg = XEXP (XEXP (dst, 0), 0);
27685 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
27686 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
27687 && REG_P (basereg)
27688 && REG_P (offsetreg)
27689 && REGNO (basereg) != REGNO (offsetreg));
27690 if (REGNO (basereg) == 0)
27691 {
27692 rtx tmp = offsetreg;
27693 offsetreg = basereg;
27694 basereg = tmp;
27695 }
27696 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
27697 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
27698 dst = replace_equiv_address (dst, basereg);
27699 }
27700 }
27701 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
27702 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode, true));
27703 }
27704
27705 /* If we are reading an accumulator register, we have to
27706 deprime it before we can access it. */
27707 if (TARGET_MMA && !TARGET_DENSE_MATH && REG_P (src)
27708 && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
27709 emit_insn (gen_mma_xxmfacc (src, src));
27710
27711 for (i = 0; i < nregs; i++)
27712 {
27713 /* Calculate index to next subword. */
27714 ++j;
27715 if (j == nregs)
27716 j = 0;
27717
27718 /* If compiler already emitted move of first word by
27719 store with update, no need to do anything. */
27720 if (j == 0 && used_update)
27721 continue;
27722
27723 /* XO/OO are opaque so cannot use subregs. */
27724 if (mode == OOmode || mode == XOmode || mode == TDOmode)
27725 {
27726 rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + j);
27727 rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + j);
27728 emit_insn (gen_rtx_SET (dst_i, src_i));
27729 }
27730 else
27731 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
27732 j * reg_mode_size),
27733 simplify_gen_subreg (reg_mode, src, mode,
27734 j * reg_mode_size)));
27735 }
27736
27737 /* If we are writing an accumulator register, we have to
27738 prime it after we've written it. */
27739 if (TARGET_MMA && !TARGET_DENSE_MATH && REG_P (dst)
27740 && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
27741 emit_insn (gen_mma_xxmtacc (dst, dst));
27742
27743 if (restore_basereg != NULL_RTX)
27744 emit_insn (restore_basereg);
27745 }
27746 }
27747 \f
27748 /* Return true if the peephole2 can combine a load involving a combination of
27749 an addis instruction and a load with an offset that can be fused together on
27750 a power8. */
27751
27752 bool
27753 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
27754 rtx addis_value, /* addis value. */
27755 rtx target, /* target register that is loaded. */
27756 rtx mem) /* bottom part of the memory addr. */
27757 {
27758 rtx addr;
27759 rtx base_reg;
27760
27761 /* Validate arguments. */
27762 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
27763 return false;
27764
27765 if (!base_reg_operand (target, GET_MODE (target)))
27766 return false;
27767
27768 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
27769 return false;
27770
27771 /* Allow sign/zero extension. */
27772 if (GET_CODE (mem) == ZERO_EXTEND
27773 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
27774 mem = XEXP (mem, 0);
27775
27776 if (!MEM_P (mem))
27777 return false;
27778
27779 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
27780 return false;
27781
27782 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
27783 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
27784 return false;
27785
27786 /* Validate that the register used to load the high value is either the
27787 register being loaded, or we can safely replace its use.
27788
27789 This function is only called from the peephole2 pass and we assume that
27790 there are 2 instructions in the peephole (addis and load), so we want to
27791 check if the target register was not used in the memory address and the
27792 register to hold the addis result is dead after the peephole. */
27793 if (REGNO (addis_reg) != REGNO (target))
27794 {
27795 if (reg_mentioned_p (target, mem))
27796 return false;
27797
27798 if (!peep2_reg_dead_p (2, addis_reg))
27799 return false;
27800
27801 /* If the target register being loaded is the stack pointer, we must
27802 avoid loading any other value into it, even temporarily. */
27803 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
27804 return false;
27805 }
27806
27807 base_reg = XEXP (addr, 0);
27808 return REGNO (addis_reg) == REGNO (base_reg);
27809 }
27810
27811 /* During the peephole2 pass, adjust and expand the insns for a load fusion
27812 sequence. We adjust the addis register to use the target register. If the
27813 load sign extends, we adjust the code to do the zero extending load, and an
27814 explicit sign extension later since the fusion only covers zero extending
27815 loads.
27816
27817 The operands are:
27818 operands[0] register set with addis (to be replaced with target)
27819 operands[1] value set via addis
27820 operands[2] target register being loaded
27821 operands[3] D-form memory reference using operands[0]. */
27822
27823 void
27824 expand_fusion_gpr_load (rtx *operands)
27825 {
27826 rtx addis_value = operands[1];
27827 rtx target = operands[2];
27828 rtx orig_mem = operands[3];
27829 rtx new_addr, new_mem, orig_addr, offset;
27830 enum rtx_code plus_or_lo_sum;
27831 machine_mode target_mode = GET_MODE (target);
27832 machine_mode extend_mode = target_mode;
27833 machine_mode ptr_mode = Pmode;
27834 enum rtx_code extend = UNKNOWN;
27835
27836 if (GET_CODE (orig_mem) == ZERO_EXTEND
27837 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
27838 {
27839 extend = GET_CODE (orig_mem);
27840 orig_mem = XEXP (orig_mem, 0);
27841 target_mode = GET_MODE (orig_mem);
27842 }
27843
27844 gcc_assert (MEM_P (orig_mem));
27845
27846 orig_addr = XEXP (orig_mem, 0);
27847 plus_or_lo_sum = GET_CODE (orig_addr);
27848 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
27849
27850 offset = XEXP (orig_addr, 1);
27851 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
27852 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
27853
27854 if (extend != UNKNOWN)
27855 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
27856
27857 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
27858 UNSPEC_FUSION_GPR);
27859 emit_insn (gen_rtx_SET (target, new_mem));
27860
27861 if (extend == SIGN_EXTEND)
27862 {
27863 int sub_off = ((BYTES_BIG_ENDIAN)
27864 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
27865 : 0);
27866 rtx sign_reg
27867 = simplify_subreg (target_mode, target, extend_mode, sub_off);
27868
27869 emit_insn (gen_rtx_SET (target,
27870 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
27871 }
27872
27873 return;
27874 }
27875
27876 /* Emit the addis instruction that will be part of a fused instruction
27877 sequence. */
27878
27879 void
27880 emit_fusion_addis (rtx target, rtx addis_value)
27881 {
27882 rtx fuse_ops[10];
27883 const char *addis_str = NULL;
27884
27885 /* Emit the addis instruction. */
27886 fuse_ops[0] = target;
27887 if (satisfies_constraint_L (addis_value))
27888 {
27889 fuse_ops[1] = addis_value;
27890 addis_str = "lis %0,%v1";
27891 }
27892
27893 else if (GET_CODE (addis_value) == PLUS)
27894 {
27895 rtx op0 = XEXP (addis_value, 0);
27896 rtx op1 = XEXP (addis_value, 1);
27897
27898 if (REG_P (op0) && CONST_INT_P (op1)
27899 && satisfies_constraint_L (op1))
27900 {
27901 fuse_ops[1] = op0;
27902 fuse_ops[2] = op1;
27903 addis_str = "addis %0,%1,%v2";
27904 }
27905 }
27906
27907 else if (GET_CODE (addis_value) == HIGH)
27908 {
27909 rtx value = XEXP (addis_value, 0);
27910 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
27911 {
27912 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
27913 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
27914 if (TARGET_ELF)
27915 addis_str = "addis %0,%2,%1@toc@ha";
27916
27917 else if (TARGET_XCOFF)
27918 addis_str = "addis %0,%1@u(%2)";
27919
27920 else
27921 gcc_unreachable ();
27922 }
27923
27924 else if (GET_CODE (value) == PLUS)
27925 {
27926 rtx op0 = XEXP (value, 0);
27927 rtx op1 = XEXP (value, 1);
27928
27929 if (GET_CODE (op0) == UNSPEC
27930 && XINT (op0, 1) == UNSPEC_TOCREL
27931 && CONST_INT_P (op1))
27932 {
27933 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
27934 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
27935 fuse_ops[3] = op1;
27936 if (TARGET_ELF)
27937 addis_str = "addis %0,%2,%1+%3@toc@ha";
27938
27939 else if (TARGET_XCOFF)
27940 addis_str = "addis %0,%1+%3@u(%2)";
27941
27942 else
27943 gcc_unreachable ();
27944 }
27945 }
27946
27947 else if (satisfies_constraint_L (value))
27948 {
27949 fuse_ops[1] = value;
27950 addis_str = "lis %0,%v1";
27951 }
27952
27953 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
27954 {
27955 fuse_ops[1] = value;
27956 addis_str = "lis %0,%1@ha";
27957 }
27958 }
27959
27960 if (!addis_str)
27961 fatal_insn ("Could not generate addis value for fusion", addis_value);
27962
27963 output_asm_insn (addis_str, fuse_ops);
27964 }
27965
27966 /* Emit a D-form load or store instruction that is the second instruction
27967 of a fusion sequence. */
27968
27969 static void
27970 emit_fusion_load (rtx load_reg, rtx addis_reg, rtx offset, const char *insn_str)
27971 {
27972 rtx fuse_ops[10];
27973 char insn_template[80];
27974
27975 fuse_ops[0] = load_reg;
27976 fuse_ops[1] = addis_reg;
27977
27978 if (CONST_INT_P (offset) && satisfies_constraint_I (offset))
27979 {
27980 sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str);
27981 fuse_ops[2] = offset;
27982 output_asm_insn (insn_template, fuse_ops);
27983 }
27984
27985 else if (GET_CODE (offset) == UNSPEC
27986 && XINT (offset, 1) == UNSPEC_TOCREL)
27987 {
27988 if (TARGET_ELF)
27989 sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str);
27990
27991 else if (TARGET_XCOFF)
27992 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
27993
27994 else
27995 gcc_unreachable ();
27996
27997 fuse_ops[2] = XVECEXP (offset, 0, 0);
27998 output_asm_insn (insn_template, fuse_ops);
27999 }
28000
28001 else if (GET_CODE (offset) == PLUS
28002 && GET_CODE (XEXP (offset, 0)) == UNSPEC
28003 && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL
28004 && CONST_INT_P (XEXP (offset, 1)))
28005 {
28006 rtx tocrel_unspec = XEXP (offset, 0);
28007 if (TARGET_ELF)
28008 sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str);
28009
28010 else if (TARGET_XCOFF)
28011 sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str);
28012
28013 else
28014 gcc_unreachable ();
28015
28016 fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0);
28017 fuse_ops[3] = XEXP (offset, 1);
28018 output_asm_insn (insn_template, fuse_ops);
28019 }
28020
28021 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset))
28022 {
28023 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
28024
28025 fuse_ops[2] = offset;
28026 output_asm_insn (insn_template, fuse_ops);
28027 }
28028
28029 else
28030 fatal_insn ("Unable to generate load/store offset for fusion", offset);
28031
28032 return;
28033 }
28034
28035 /* Given an address, convert it into the addis and load offset parts. Addresses
28036 created during the peephole2 process look like:
28037 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
28038 (unspec [(...)] UNSPEC_TOCREL)) */
28039
28040 static void
28041 fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo)
28042 {
28043 rtx hi, lo;
28044
28045 if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
28046 {
28047 hi = XEXP (addr, 0);
28048 lo = XEXP (addr, 1);
28049 }
28050 else
28051 gcc_unreachable ();
28052
28053 *p_hi = hi;
28054 *p_lo = lo;
28055 }
28056
28057 /* Return a string to fuse an addis instruction with a gpr load to the same
28058 register that we loaded up the addis instruction. The address that is used
28059 is the logical address that was formed during peephole2:
28060 (lo_sum (high) (low-part))
28061
28062 The code is complicated, so we call output_asm_insn directly, and just
28063 return "". */
28064
28065 const char *
28066 emit_fusion_gpr_load (rtx target, rtx mem)
28067 {
28068 rtx addis_value;
28069 rtx addr;
28070 rtx load_offset;
28071 const char *load_str = NULL;
28072 machine_mode mode;
28073
28074 if (GET_CODE (mem) == ZERO_EXTEND)
28075 mem = XEXP (mem, 0);
28076
28077 gcc_assert (REG_P (target) && MEM_P (mem));
28078
28079 addr = XEXP (mem, 0);
28080 fusion_split_address (addr, &addis_value, &load_offset);
28081
28082 /* Now emit the load instruction to the same register. */
28083 mode = GET_MODE (mem);
28084 switch (mode)
28085 {
28086 case E_QImode:
28087 load_str = "lbz";
28088 break;
28089
28090 case E_HImode:
28091 load_str = "lhz";
28092 break;
28093
28094 case E_SImode:
28095 case E_SFmode:
28096 load_str = "lwz";
28097 break;
28098
28099 case E_DImode:
28100 case E_DFmode:
28101 gcc_assert (TARGET_POWERPC64);
28102 load_str = "ld";
28103 break;
28104
28105 default:
28106 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem));
28107 }
28108
28109 /* Emit the addis instruction. */
28110 emit_fusion_addis (target, addis_value);
28111
28112 /* Emit the D-form load instruction. */
28113 emit_fusion_load (target, target, load_offset, load_str);
28114
28115 return "";
28116 }
28117 \f
28118 /* This is not inside an #ifdef RS6000_GLIBC_ATOMIC_FENV because gengtype
28119 ignores it then. */
28120 static GTY(()) tree atomic_hold_decl;
28121 static GTY(()) tree atomic_clear_decl;
28122 static GTY(()) tree atomic_update_decl;
28123
28124 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
28125 static void
28126 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
28127 {
28128 if (!TARGET_HARD_FLOAT)
28129 {
28130 #ifdef RS6000_GLIBC_ATOMIC_FENV
28131 if (atomic_hold_decl == NULL_TREE)
28132 {
28133 atomic_hold_decl
28134 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
28135 get_identifier ("__atomic_feholdexcept"),
28136 build_function_type_list (void_type_node,
28137 double_ptr_type_node,
28138 NULL_TREE));
28139 TREE_PUBLIC (atomic_hold_decl) = 1;
28140 DECL_EXTERNAL (atomic_hold_decl) = 1;
28141 }
28142
28143 if (atomic_clear_decl == NULL_TREE)
28144 {
28145 atomic_clear_decl
28146 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
28147 get_identifier ("__atomic_feclearexcept"),
28148 build_function_type_list (void_type_node,
28149 NULL_TREE));
28150 TREE_PUBLIC (atomic_clear_decl) = 1;
28151 DECL_EXTERNAL (atomic_clear_decl) = 1;
28152 }
28153
28154 tree const_double = build_qualified_type (double_type_node,
28155 TYPE_QUAL_CONST);
28156 tree const_double_ptr = build_pointer_type (const_double);
28157 if (atomic_update_decl == NULL_TREE)
28158 {
28159 atomic_update_decl
28160 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
28161 get_identifier ("__atomic_feupdateenv"),
28162 build_function_type_list (void_type_node,
28163 const_double_ptr,
28164 NULL_TREE));
28165 TREE_PUBLIC (atomic_update_decl) = 1;
28166 DECL_EXTERNAL (atomic_update_decl) = 1;
28167 }
28168
28169 tree fenv_var = create_tmp_var_raw (double_type_node);
28170 TREE_ADDRESSABLE (fenv_var) = 1;
28171 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node,
28172 build4 (TARGET_EXPR, double_type_node, fenv_var,
28173 void_node, NULL_TREE, NULL_TREE));
28174
28175 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
28176 *clear = build_call_expr (atomic_clear_decl, 0);
28177 *update = build_call_expr (atomic_update_decl, 1,
28178 fold_convert (const_double_ptr, fenv_addr));
28179 #endif
28180 return;
28181 }
28182
28183 tree mffs = rs6000_builtin_decls[RS6000_BIF_MFFS];
28184 tree mtfsf = rs6000_builtin_decls[RS6000_BIF_MTFSF];
28185 tree call_mffs = build_call_expr (mffs, 0);
28186
28187 /* Generates the equivalent of feholdexcept (&fenv_var)
28188
28189 *fenv_var = __builtin_mffs ();
28190 double fenv_hold;
28191 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
28192 __builtin_mtfsf (0xff, fenv_hold); */
28193
28194 /* Mask to clear everything except for the rounding modes and non-IEEE
28195 arithmetic flag. */
28196 const unsigned HOST_WIDE_INT hold_exception_mask
28197 = HOST_WIDE_INT_C (0xffffffff00000007);
28198
28199 tree fenv_var = create_tmp_var_raw (double_type_node);
28200
28201 tree hold_mffs = build4 (TARGET_EXPR, double_type_node, fenv_var, call_mffs,
28202 NULL_TREE, NULL_TREE);
28203
28204 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
28205 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
28206 build_int_cst (uint64_type_node,
28207 hold_exception_mask));
28208
28209 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
28210 fenv_llu_and);
28211
28212 tree hold_mtfsf = build_call_expr (mtfsf, 2,
28213 build_int_cst (unsigned_type_node, 0xff),
28214 fenv_hold_mtfsf);
28215
28216 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
28217
28218 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
28219
28220 double fenv_clear = __builtin_mffs ();
28221 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
28222 __builtin_mtfsf (0xff, fenv_clear); */
28223
28224 /* Mask to clear everything except for the rounding modes and non-IEEE
28225 arithmetic flag. */
28226 const unsigned HOST_WIDE_INT clear_exception_mask
28227 = HOST_WIDE_INT_C (0xffffffff00000000);
28228
28229 tree fenv_clear = create_tmp_var_raw (double_type_node);
28230
28231 tree clear_mffs = build4 (TARGET_EXPR, double_type_node, fenv_clear,
28232 call_mffs, NULL_TREE, NULL_TREE);
28233
28234 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
28235 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
28236 fenv_clean_llu,
28237 build_int_cst (uint64_type_node,
28238 clear_exception_mask));
28239
28240 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
28241 fenv_clear_llu_and);
28242
28243 tree clear_mtfsf = build_call_expr (mtfsf, 2,
28244 build_int_cst (unsigned_type_node, 0xff),
28245 fenv_clear_mtfsf);
28246
28247 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
28248
28249 /* Generates the equivalent of feupdateenv (&fenv_var)
28250
28251 double old_fenv = __builtin_mffs ();
28252 double fenv_update;
28253 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
28254 (*(uint64_t*)fenv_var 0x1ff80fff);
28255 __builtin_mtfsf (0xff, fenv_update); */
28256
28257 const unsigned HOST_WIDE_INT update_exception_mask
28258 = HOST_WIDE_INT_C (0xffffffff1fffff00);
28259 const unsigned HOST_WIDE_INT new_exception_mask
28260 = HOST_WIDE_INT_C (0x1ff80fff);
28261
28262 tree old_fenv = create_tmp_var_raw (double_type_node);
28263 tree update_mffs = build4 (TARGET_EXPR, double_type_node, old_fenv,
28264 call_mffs, NULL_TREE, NULL_TREE);
28265
28266 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
28267 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
28268 build_int_cst (uint64_type_node,
28269 update_exception_mask));
28270
28271 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
28272 build_int_cst (uint64_type_node,
28273 new_exception_mask));
28274
28275 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
28276 old_llu_and, new_llu_and);
28277
28278 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
28279 new_llu_mask);
28280
28281 tree update_mtfsf = build_call_expr (mtfsf, 2,
28282 build_int_cst (unsigned_type_node, 0xff),
28283 fenv_update_mtfsf);
28284
28285 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
28286 }
28287
28288 void
28289 rs6000_generate_float2_double_code (rtx dst, rtx src1, rtx src2)
28290 {
28291 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
28292
28293 rtx_tmp0 = gen_reg_rtx (V2DFmode);
28294 rtx_tmp1 = gen_reg_rtx (V2DFmode);
28295
28296 /* The destination of the vmrgew instruction layout is:
28297 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
28298 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
28299 vmrgew instruction will be correct. */
28300 if (BYTES_BIG_ENDIAN)
28301 {
28302 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp0, src1, src2,
28303 GEN_INT (0)));
28304 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp1, src1, src2,
28305 GEN_INT (3)));
28306 }
28307 else
28308 {
28309 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (3)));
28310 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (0)));
28311 }
28312
28313 rtx_tmp2 = gen_reg_rtx (V4SFmode);
28314 rtx_tmp3 = gen_reg_rtx (V4SFmode);
28315
28316 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp2, rtx_tmp0));
28317 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp3, rtx_tmp1));
28318
28319 if (BYTES_BIG_ENDIAN)
28320 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
28321 else
28322 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
28323 }
28324
28325 void
28326 rs6000_generate_float2_code (bool signed_convert, rtx dst, rtx src1, rtx src2)
28327 {
28328 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
28329
28330 rtx_tmp0 = gen_reg_rtx (V2DImode);
28331 rtx_tmp1 = gen_reg_rtx (V2DImode);
28332
28333 /* The destination of the vmrgew instruction layout is:
28334 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
28335 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
28336 vmrgew instruction will be correct. */
28337 if (BYTES_BIG_ENDIAN)
28338 {
28339 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp0, src1, src2, GEN_INT (0)));
28340 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp1, src1, src2, GEN_INT (3)));
28341 }
28342 else
28343 {
28344 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp0, src1, src2, GEN_INT (3)));
28345 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp1, src1, src2, GEN_INT (0)));
28346 }
28347
28348 rtx_tmp2 = gen_reg_rtx (V4SFmode);
28349 rtx_tmp3 = gen_reg_rtx (V4SFmode);
28350
28351 if (signed_convert)
28352 {
28353 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp2, rtx_tmp0));
28354 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp3, rtx_tmp1));
28355 }
28356 else
28357 {
28358 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp2, rtx_tmp0));
28359 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp3, rtx_tmp1));
28360 }
28361
28362 if (BYTES_BIG_ENDIAN)
28363 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
28364 else
28365 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
28366 }
28367
28368 void
28369 rs6000_generate_vsigned2_code (bool signed_convert, rtx dst, rtx src1,
28370 rtx src2)
28371 {
28372 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
28373
28374 rtx_tmp0 = gen_reg_rtx (V2DFmode);
28375 rtx_tmp1 = gen_reg_rtx (V2DFmode);
28376
28377 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (0)));
28378 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (3)));
28379
28380 rtx_tmp2 = gen_reg_rtx (V4SImode);
28381 rtx_tmp3 = gen_reg_rtx (V4SImode);
28382
28383 if (signed_convert)
28384 {
28385 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp2, rtx_tmp0));
28386 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp3, rtx_tmp1));
28387 }
28388 else
28389 {
28390 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp2, rtx_tmp0));
28391 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp3, rtx_tmp1));
28392 }
28393
28394 emit_insn (gen_p8_vmrgew_v4si (dst, rtx_tmp2, rtx_tmp3));
28395 }
28396
28397 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
28398
28399 static bool
28400 rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode,
28401 optimization_type opt_type)
28402 {
28403 switch (op)
28404 {
28405 case rsqrt_optab:
28406 return (opt_type == OPTIMIZE_FOR_SPEED
28407 && RS6000_RECIP_AUTO_RSQRTE_P (mode1));
28408
28409 default:
28410 return true;
28411 }
28412 }
28413
28414 /* Implement TARGET_CONSTANT_ALIGNMENT. */
28415
28416 static HOST_WIDE_INT
28417 rs6000_constant_alignment (const_tree exp, HOST_WIDE_INT align)
28418 {
28419 if (TREE_CODE (exp) == STRING_CST
28420 && (STRICT_ALIGNMENT || !optimize_size))
28421 return MAX (align, BITS_PER_WORD);
28422 return align;
28423 }
28424
28425 /* Implement TARGET_STARTING_FRAME_OFFSET. */
28426
28427 static HOST_WIDE_INT
28428 rs6000_starting_frame_offset (void)
28429 {
28430 if (FRAME_GROWS_DOWNWARD)
28431 return 0;
28432 return RS6000_STARTING_FRAME_OFFSET;
28433 }
28434 \f
28435 /* Internal function to return the built-in function id for the complex
28436 multiply operation for a given mode. */
28437
28438 static inline built_in_function
28439 complex_multiply_builtin_code (machine_mode mode)
28440 {
28441 return (built_in_function) (BUILT_IN_COMPLEX_MUL_MIN + mode
28442 - MIN_MODE_COMPLEX_FLOAT);
28443 }
28444
28445 /* Internal function to return the built-in function id for the complex divide
28446 operation for a given mode. */
28447
28448 static inline built_in_function
28449 complex_divide_builtin_code (machine_mode mode)
28450 {
28451 return (built_in_function) (BUILT_IN_COMPLEX_DIV_MIN + mode
28452 - MIN_MODE_COMPLEX_FLOAT);
28453 }
28454
28455 /* On 64-bit Linux and Freebsd systems, possibly switch the long double library
28456 function names from <foo>l to <foo>f128 if the default long double type is
28457 IEEE 128-bit. Typically, with the C and C++ languages, the standard math.h
28458 include file switches the names on systems that support long double as IEEE
28459 128-bit, but that doesn't work if the user uses __builtin_<foo>l directly.
28460 In the future, glibc will export names like __ieee128_sinf128 and we can
28461 switch to using those instead of using sinf128, which pollutes the user's
28462 namespace.
28463
28464 This will switch the names for Fortran math functions as well (which doesn't
28465 use math.h). However, Fortran needs other changes to the compiler and
28466 library before you can switch the real*16 type at compile time.
28467
28468 We use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change this name. We
28469 only do this transformation if the __float128 type is enabled. This
28470 prevents us from doing the transformation on older 32-bit ports that might
28471 have enabled using IEEE 128-bit floating point as the default long double
28472 type.
28473
28474 We also use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change the
28475 function names used for complex multiply and divide to the appropriate
28476 names. */
28477
28478 static tree
28479 rs6000_mangle_decl_assembler_name (tree decl, tree id)
28480 {
28481 /* Handle complex multiply/divide. For IEEE 128-bit, use __mulkc3 or
28482 __divkc3 and for IBM 128-bit use __multc3 and __divtc3. */
28483 if (TARGET_FLOAT128_TYPE
28484 && TREE_CODE (decl) == FUNCTION_DECL
28485 && DECL_IS_UNDECLARED_BUILTIN (decl)
28486 && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
28487 {
28488 built_in_function id = DECL_FUNCTION_CODE (decl);
28489 const char *newname = NULL;
28490
28491 if (id == complex_multiply_builtin_code (KCmode))
28492 newname = "__mulkc3";
28493
28494 else if (id == complex_multiply_builtin_code (ICmode))
28495 newname = "__multc3";
28496
28497 else if (id == complex_multiply_builtin_code (TCmode))
28498 newname = (TARGET_IEEEQUAD) ? "__mulkc3" : "__multc3";
28499
28500 else if (id == complex_divide_builtin_code (KCmode))
28501 newname = "__divkc3";
28502
28503 else if (id == complex_divide_builtin_code (ICmode))
28504 newname = "__divtc3";
28505
28506 else if (id == complex_divide_builtin_code (TCmode))
28507 newname = (TARGET_IEEEQUAD) ? "__divkc3" : "__divtc3";
28508
28509 if (newname)
28510 {
28511 if (TARGET_DEBUG_BUILTIN)
28512 fprintf (stderr, "Map complex mul/div => %s\n", newname);
28513
28514 return get_identifier (newname);
28515 }
28516 }
28517
28518 /* Map long double built-in functions if long double is IEEE 128-bit. */
28519 if (TARGET_FLOAT128_TYPE && TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128
28520 && TREE_CODE (decl) == FUNCTION_DECL
28521 && DECL_IS_UNDECLARED_BUILTIN (decl)
28522 && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
28523 {
28524 size_t len = IDENTIFIER_LENGTH (id);
28525 const char *name = IDENTIFIER_POINTER (id);
28526 char *newname = NULL;
28527
28528 /* See if it is one of the built-in functions with an unusual name. */
28529 switch (DECL_FUNCTION_CODE (decl))
28530 {
28531 case BUILT_IN_DREML:
28532 newname = xstrdup ("__remainderieee128");
28533 break;
28534
28535 case BUILT_IN_GAMMAL:
28536 newname = xstrdup ("__lgammaieee128");
28537 break;
28538
28539 case BUILT_IN_GAMMAL_R:
28540 case BUILT_IN_LGAMMAL_R:
28541 newname = xstrdup ("__lgammaieee128_r");
28542 break;
28543
28544 case BUILT_IN_NEXTTOWARD:
28545 newname = xstrdup ("__nexttoward_to_ieee128");
28546 break;
28547
28548 case BUILT_IN_NEXTTOWARDF:
28549 newname = xstrdup ("__nexttowardf_to_ieee128");
28550 break;
28551
28552 case BUILT_IN_NEXTTOWARDL:
28553 newname = xstrdup ("__nexttowardieee128");
28554 break;
28555
28556 case BUILT_IN_POW10L:
28557 newname = xstrdup ("__exp10ieee128");
28558 break;
28559
28560 case BUILT_IN_SCALBL:
28561 newname = xstrdup ("__scalbieee128");
28562 break;
28563
28564 case BUILT_IN_SIGNIFICANDL:
28565 newname = xstrdup ("__significandieee128");
28566 break;
28567
28568 case BUILT_IN_SINCOSL:
28569 newname = xstrdup ("__sincosieee128");
28570 break;
28571
28572 default:
28573 break;
28574 }
28575
28576 /* Update the __builtin_*printf and __builtin_*scanf functions. */
28577 if (!newname)
28578 {
28579 size_t printf_len = strlen ("printf");
28580 size_t scanf_len = strlen ("scanf");
28581 size_t printf_chk_len = strlen ("printf_chk");
28582
28583 if (len >= printf_len
28584 && strcmp (name + len - printf_len, "printf") == 0)
28585 newname = xasprintf ("__%sieee128", name);
28586
28587 else if (len >= scanf_len
28588 && strcmp (name + len - scanf_len, "scanf") == 0)
28589 newname = xasprintf ("__isoc99_%sieee128", name);
28590
28591 else if (len >= printf_chk_len
28592 && strcmp (name + len - printf_chk_len, "printf_chk") == 0)
28593 newname = xasprintf ("%sieee128", name);
28594
28595 else if (name[len - 1] == 'l')
28596 {
28597 bool uses_ieee128_p = false;
28598 tree type = TREE_TYPE (decl);
28599 machine_mode ret_mode = TYPE_MODE (type);
28600
28601 /* See if the function returns a IEEE 128-bit floating point type or
28602 complex type. */
28603 if (ret_mode == TFmode || ret_mode == TCmode)
28604 uses_ieee128_p = true;
28605 else
28606 {
28607 function_args_iterator args_iter;
28608 tree arg;
28609
28610 /* See if the function passes a IEEE 128-bit floating point type
28611 or complex type. */
28612 FOREACH_FUNCTION_ARGS (type, arg, args_iter)
28613 {
28614 machine_mode arg_mode = TYPE_MODE (arg);
28615 if (arg_mode == TFmode || arg_mode == TCmode)
28616 {
28617 uses_ieee128_p = true;
28618 break;
28619 }
28620 }
28621 }
28622
28623 /* If we passed or returned an IEEE 128-bit floating point type,
28624 change the name. Use __<name>ieee128, instead of <name>l. */
28625 if (uses_ieee128_p)
28626 newname = xasprintf ("__%.*sieee128", (int)(len - 1), name);
28627 }
28628 }
28629
28630 if (newname)
28631 {
28632 if (TARGET_DEBUG_BUILTIN)
28633 fprintf (stderr, "Map %s => %s\n", name, newname);
28634
28635 id = get_identifier (newname);
28636 free (newname);
28637 }
28638 }
28639
28640 return id;
28641 }
28642
28643 /* Predict whether the given loop in gimple will be transformed in the RTL
28644 doloop_optimize pass. */
28645
28646 static bool
28647 rs6000_predict_doloop_p (struct loop *loop)
28648 {
28649 gcc_assert (loop);
28650
28651 /* On rs6000, targetm.can_use_doloop_p is actually
28652 can_use_doloop_if_innermost. Just ensure the loop is innermost. */
28653 if (loop->inner != NULL)
28654 {
28655 if (dump_file && (dump_flags & TDF_DETAILS))
28656 fprintf (dump_file, "Predict doloop failure due to"
28657 " loop nesting.\n");
28658 return false;
28659 }
28660
28661 return true;
28662 }
28663
28664 /* Implement TARGET_PREFERRED_DOLOOP_MODE. */
28665
28666 static machine_mode
28667 rs6000_preferred_doloop_mode (machine_mode)
28668 {
28669 return word_mode;
28670 }
28671
28672 /* Implement TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P. */
28673
28674 static bool
28675 rs6000_cannot_substitute_mem_equiv_p (rtx mem)
28676 {
28677 gcc_assert (MEM_P (mem));
28678
28679 /* curr_insn_transform()'s handling of subregs cannot handle altivec AND:
28680 type addresses, so don't allow MEMs with those address types to be
28681 substituted as an equivalent expression. See PR93974 for details. */
28682 if (GET_CODE (XEXP (mem, 0)) == AND)
28683 return true;
28684
28685 return false;
28686 }
28687
28688 /* Implement TARGET_INVALID_CONVERSION. */
28689
28690 static const char *
28691 rs6000_invalid_conversion (const_tree fromtype, const_tree totype)
28692 {
28693 /* Make sure we're working with the canonical types. */
28694 if (TYPE_CANONICAL (fromtype) != NULL_TREE)
28695 fromtype = TYPE_CANONICAL (fromtype);
28696 if (TYPE_CANONICAL (totype) != NULL_TREE)
28697 totype = TYPE_CANONICAL (totype);
28698
28699 machine_mode frommode = TYPE_MODE (fromtype);
28700 machine_mode tomode = TYPE_MODE (totype);
28701
28702 if (frommode != tomode)
28703 {
28704 /* Do not allow conversions to/from XOmode, OOmode, and TDOmode
28705 types. */
28706 if (frommode == XOmode)
28707 return N_("invalid conversion from type %<__vector_quad%>");
28708 if (tomode == XOmode)
28709 return N_("invalid conversion to type %<__vector_quad%>");
28710 if (frommode == OOmode)
28711 return N_("invalid conversion from type %<__vector_pair%>");
28712 if (tomode == OOmode)
28713 return N_("invalid conversion to type %<__vector_pair%>");
28714 if (frommode == TDOmode)
28715 return N_("invalid conversion from type %<__dmr%>");
28716 if (tomode == TDOmode)
28717 return N_("invalid conversion to type %<__dmr%>");
28718 }
28719
28720 /* Conversion allowed. */
28721 return NULL;
28722 }
28723
28724 /* Convert a SFmode constant to the integer bit pattern. */
28725
28726 long
28727 rs6000_const_f32_to_i32 (rtx operand)
28728 {
28729 long value;
28730 const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (operand);
28731
28732 gcc_assert (GET_MODE (operand) == SFmode);
28733 REAL_VALUE_TO_TARGET_SINGLE (*rv, value);
28734 return value;
28735 }
28736
28737 void
28738 rs6000_emit_xxspltidp_v2df (rtx dst, long value)
28739 {
28740 if (((value & 0x7F800000) == 0) && ((value & 0x7FFFFF) != 0))
28741 inform (input_location,
28742 "the result for the xxspltidp instruction "
28743 "is undefined for subnormal input values");
28744 emit_insn( gen_xxspltidp_v2df_inst (dst, GEN_INT (value)));
28745 }
28746
28747 /* Implement TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC. */
28748
28749 static bool
28750 rs6000_gen_pic_addr_diff_vec (void)
28751 {
28752 return rs6000_relative_jumptables;
28753 }
28754
28755 void
28756 rs6000_output_addr_vec_elt (FILE *file, int value)
28757 {
28758 const char *directive = TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t";
28759 char buf[100];
28760
28761 fprintf (file, "%s", directive);
28762 ASM_GENERATE_INTERNAL_LABEL (buf, "L", value);
28763 assemble_name (file, buf);
28764 fprintf (file, "\n");
28765 }
28766
28767 \f
28768 /* Copy an integer constant to the vector constant structure. */
28769
28770 static void
28771 constant_int_to_128bit_vector (rtx op,
28772 machine_mode mode,
28773 size_t byte_num,
28774 vec_const_128bit_type *info)
28775 {
28776 unsigned HOST_WIDE_INT uvalue = UINTVAL (op);
28777 unsigned bitsize = GET_MODE_BITSIZE (mode);
28778
28779 for (int shift = bitsize - 8; shift >= 0; shift -= 8)
28780 info->bytes[byte_num++] = (uvalue >> shift) & 0xff;
28781 }
28782
28783 /* Copy a floating point constant to the vector constant structure. */
28784
28785 static void
28786 constant_fp_to_128bit_vector (rtx op,
28787 machine_mode mode,
28788 size_t byte_num,
28789 vec_const_128bit_type *info)
28790 {
28791 unsigned bitsize = GET_MODE_BITSIZE (mode);
28792 unsigned num_words = bitsize / 32;
28793 const REAL_VALUE_TYPE *rtype = CONST_DOUBLE_REAL_VALUE (op);
28794 long real_words[VECTOR_128BIT_WORDS];
28795
28796 /* Make sure we don't overflow the real_words array and that it is
28797 filled completely. */
28798 gcc_assert (num_words <= VECTOR_128BIT_WORDS && (bitsize % 32) == 0);
28799
28800 real_to_target (real_words, rtype, mode);
28801
28802 /* Iterate over each 32-bit word in the floating point constant. The
28803 real_to_target function puts out words in target endian fashion. We need
28804 to arrange the order so that the bytes are written in big endian order. */
28805 for (unsigned num = 0; num < num_words; num++)
28806 {
28807 unsigned endian_num = (BYTES_BIG_ENDIAN
28808 ? num
28809 : num_words - 1 - num);
28810
28811 unsigned uvalue = real_words[endian_num];
28812 for (int shift = 32 - 8; shift >= 0; shift -= 8)
28813 info->bytes[byte_num++] = (uvalue >> shift) & 0xff;
28814 }
28815
28816 /* Mark that this constant involves floating point. */
28817 info->fp_constant_p = true;
28818 }
28819
28820 /* Convert a vector constant OP with mode MODE to a vector 128-bit constant
28821 structure INFO.
28822
28823 Break out the constant out to bytes, half words, words, and double words.
28824 Return true if we have successfully converted the constant.
28825
28826 We handle CONST_INT, CONST_DOUBLE, CONST_VECTOR, and VEC_DUPLICATE of
28827 constants. Integer and floating point scalar constants are splatted to fill
28828 out the vector. */
28829
28830 bool
28831 vec_const_128bit_to_bytes (rtx op,
28832 machine_mode mode,
28833 vec_const_128bit_type *info)
28834 {
28835 /* Initialize the constant structure. */
28836 memset ((void *)info, 0, sizeof (vec_const_128bit_type));
28837
28838 /* Assume CONST_INTs are DImode. */
28839 if (mode == VOIDmode)
28840 mode = CONST_INT_P (op) ? DImode : GET_MODE (op);
28841
28842 if (mode == VOIDmode)
28843 return false;
28844
28845 unsigned size = GET_MODE_SIZE (mode);
28846 bool splat_p = false;
28847
28848 if (size > VECTOR_128BIT_BYTES)
28849 return false;
28850
28851 /* Set up the bits. */
28852 switch (GET_CODE (op))
28853 {
28854 /* Integer constants, default to double word. */
28855 case CONST_INT:
28856 {
28857 constant_int_to_128bit_vector (op, mode, 0, info);
28858 splat_p = true;
28859 break;
28860 }
28861
28862 /* Floating point constants. */
28863 case CONST_DOUBLE:
28864 {
28865 /* Fail if the floating point constant is the wrong mode. */
28866 if (GET_MODE (op) != mode)
28867 return false;
28868
28869 /* SFmode stored as scalars are stored in DFmode format. */
28870 if (mode == SFmode)
28871 {
28872 mode = DFmode;
28873 size = GET_MODE_SIZE (DFmode);
28874 }
28875
28876 constant_fp_to_128bit_vector (op, mode, 0, info);
28877 splat_p = true;
28878 break;
28879 }
28880
28881 /* Vector constants, iterate over each element. On little endian
28882 systems, we have to reverse the element numbers. */
28883 case CONST_VECTOR:
28884 {
28885 /* Fail if the vector constant is the wrong mode or size. */
28886 if (GET_MODE (op) != mode
28887 || GET_MODE_SIZE (mode) != VECTOR_128BIT_BYTES)
28888 return false;
28889
28890 machine_mode ele_mode = GET_MODE_INNER (mode);
28891 size_t ele_size = GET_MODE_SIZE (ele_mode);
28892 size_t nunits = GET_MODE_NUNITS (mode);
28893
28894 for (size_t num = 0; num < nunits; num++)
28895 {
28896 rtx ele = CONST_VECTOR_ELT (op, num);
28897 size_t byte_num = (BYTES_BIG_ENDIAN
28898 ? num
28899 : nunits - 1 - num) * ele_size;
28900
28901 if (CONST_INT_P (ele))
28902 constant_int_to_128bit_vector (ele, ele_mode, byte_num, info);
28903 else if (CONST_DOUBLE_P (ele))
28904 constant_fp_to_128bit_vector (ele, ele_mode, byte_num, info);
28905 else
28906 return false;
28907 }
28908
28909 break;
28910 }
28911
28912 /* Treat VEC_DUPLICATE of a constant just like a vector constant.
28913 Since we are duplicating the element, we don't have to worry about
28914 endian issues. */
28915 case VEC_DUPLICATE:
28916 {
28917 /* Fail if the vector duplicate is the wrong mode or size. */
28918 if (GET_MODE (op) != mode
28919 || GET_MODE_SIZE (mode) != VECTOR_128BIT_BYTES)
28920 return false;
28921
28922 machine_mode ele_mode = GET_MODE_INNER (mode);
28923 size_t ele_size = GET_MODE_SIZE (ele_mode);
28924 rtx ele = XEXP (op, 0);
28925 size_t nunits = GET_MODE_NUNITS (mode);
28926
28927 if (!CONST_INT_P (ele) && !CONST_DOUBLE_P (ele))
28928 return false;
28929
28930 for (size_t num = 0; num < nunits; num++)
28931 {
28932 size_t byte_num = num * ele_size;
28933
28934 if (CONST_INT_P (ele))
28935 constant_int_to_128bit_vector (ele, ele_mode, byte_num, info);
28936 else
28937 constant_fp_to_128bit_vector (ele, ele_mode, byte_num, info);
28938 }
28939
28940 break;
28941 }
28942
28943 /* Any thing else, just return failure. */
28944 default:
28945 return false;
28946 }
28947
28948 /* Splat the constant to fill 128 bits if desired. */
28949 if (splat_p && size < VECTOR_128BIT_BYTES)
28950 {
28951 if ((VECTOR_128BIT_BYTES % size) != 0)
28952 return false;
28953
28954 for (size_t offset = size;
28955 offset < VECTOR_128BIT_BYTES;
28956 offset += size)
28957 memcpy ((void *) &info->bytes[offset],
28958 (void *) &info->bytes[0],
28959 size);
28960 }
28961
28962 /* Remember original size. */
28963 info->original_size = size;
28964
28965 /* Determine if the bytes are all the same. */
28966 unsigned char first_byte = info->bytes[0];
28967 info->all_bytes_same = true;
28968 for (size_t i = 1; i < VECTOR_128BIT_BYTES; i++)
28969 if (first_byte != info->bytes[i])
28970 {
28971 info->all_bytes_same = false;
28972 break;
28973 }
28974
28975 /* Pack half words together & determine if all of the half words are the
28976 same. */
28977 for (size_t i = 0; i < VECTOR_128BIT_HALF_WORDS; i++)
28978 info->half_words[i] = ((info->bytes[i * 2] << 8)
28979 | info->bytes[(i * 2) + 1]);
28980
28981 unsigned short first_hword = info->half_words[0];
28982 info->all_half_words_same = true;
28983 for (size_t i = 1; i < VECTOR_128BIT_HALF_WORDS; i++)
28984 if (first_hword != info->half_words[i])
28985 {
28986 info->all_half_words_same = false;
28987 break;
28988 }
28989
28990 /* Pack words together & determine if all of the words are the same. */
28991 for (size_t i = 0; i < VECTOR_128BIT_WORDS; i++)
28992 info->words[i] = ((info->bytes[i * 4] << 24)
28993 | (info->bytes[(i * 4) + 1] << 16)
28994 | (info->bytes[(i * 4) + 2] << 8)
28995 | info->bytes[(i * 4) + 3]);
28996
28997 info->all_words_same
28998 = (info->words[0] == info->words[1]
28999 && info->words[0] == info->words[1]
29000 && info->words[0] == info->words[2]
29001 && info->words[0] == info->words[3]);
29002
29003 /* Pack double words together & determine if all of the double words are the
29004 same. */
29005 for (size_t i = 0; i < VECTOR_128BIT_DOUBLE_WORDS; i++)
29006 {
29007 unsigned HOST_WIDE_INT d_word = 0;
29008 for (size_t j = 0; j < 8; j++)
29009 d_word = (d_word << 8) | info->bytes[(i * 8) + j];
29010
29011 info->double_words[i] = d_word;
29012 }
29013
29014 info->all_double_words_same
29015 = (info->double_words[0] == info->double_words[1]);
29016
29017 return true;
29018 }
29019
29020 /* Determine if an IEEE 128-bit constant can be loaded with LXVKQ. Return zero
29021 if the LXVKQ instruction cannot be used. Otherwise return the immediate
29022 value to be used with the LXVKQ instruction. */
29023
29024 unsigned
29025 constant_generates_lxvkq (vec_const_128bit_type *vsx_const)
29026 {
29027 /* Is the instruction supported with power10 code generation, IEEE 128-bit
29028 floating point hardware and VSX registers are available. */
29029 if (!TARGET_IEEE128_CONSTANT || !TARGET_FLOAT128_HW || !TARGET_POWER10
29030 || !TARGET_VSX)
29031 return 0;
29032
29033 /* All of the constants that are generated by LXVKQ have the bottom 3 words
29034 that are 0. */
29035 if (vsx_const->words[1] != 0
29036 || vsx_const->words[2] != 0
29037 || vsx_const->words[3] != 0)
29038 return 0;
29039
29040 /* See if we have a match for the first word. */
29041 switch (vsx_const->words[0])
29042 {
29043 case 0x3FFF0000U: return 1; /* IEEE 128-bit +1.0. */
29044 case 0x40000000U: return 2; /* IEEE 128-bit +2.0. */
29045 case 0x40008000U: return 3; /* IEEE 128-bit +3.0. */
29046 case 0x40010000U: return 4; /* IEEE 128-bit +4.0. */
29047 case 0x40014000U: return 5; /* IEEE 128-bit +5.0. */
29048 case 0x40018000U: return 6; /* IEEE 128-bit +6.0. */
29049 case 0x4001C000U: return 7; /* IEEE 128-bit +7.0. */
29050 case 0x7FFF0000U: return 8; /* IEEE 128-bit +Infinity. */
29051 case 0x7FFF8000U: return 9; /* IEEE 128-bit quiet NaN. */
29052 case 0x80000000U: return 16; /* IEEE 128-bit -0.0. */
29053 case 0xBFFF0000U: return 17; /* IEEE 128-bit -1.0. */
29054 case 0xC0000000U: return 18; /* IEEE 128-bit -2.0. */
29055 case 0xC0008000U: return 19; /* IEEE 128-bit -3.0. */
29056 case 0xC0010000U: return 20; /* IEEE 128-bit -4.0. */
29057 case 0xC0014000U: return 21; /* IEEE 128-bit -5.0. */
29058 case 0xC0018000U: return 22; /* IEEE 128-bit -6.0. */
29059 case 0xC001C000U: return 23; /* IEEE 128-bit -7.0. */
29060 case 0xFFFF0000U: return 24; /* IEEE 128-bit -Infinity. */
29061
29062 /* anything else cannot be loaded. */
29063 default:
29064 break;
29065 }
29066
29067 return 0;
29068 }
29069
29070 /* Determine if a vector constant can be loaded with XXSPLTIW. Return zero if
29071 the XXSPLTIW instruction cannot be used. Otherwise return the immediate
29072 value to be used with the XXSPLTIW instruction. */
29073
29074 unsigned
29075 constant_generates_xxspltiw (vec_const_128bit_type *vsx_const)
29076 {
29077 if (!TARGET_SPLAT_WORD_CONSTANT || !TARGET_PREFIXED || !TARGET_VSX)
29078 return 0;
29079
29080 if (!vsx_const->all_words_same)
29081 return 0;
29082
29083 /* If we can use XXSPLTIB, don't generate XXSPLTIW. */
29084 if (vsx_const->all_bytes_same)
29085 return 0;
29086
29087 /* See if we can use VSPLTISH or VSPLTISW. */
29088 if (vsx_const->all_half_words_same)
29089 {
29090 short sign_h_word = vsx_const->half_words[0];
29091 if (EASY_VECTOR_15 (sign_h_word))
29092 return 0;
29093 }
29094
29095 int sign_word = vsx_const->words[0];
29096 if (EASY_VECTOR_15 (sign_word))
29097 return 0;
29098
29099 return vsx_const->words[0];
29100 }
29101
29102 /* Determine if a vector constant can be loaded with XXSPLTIDP. Return zero if
29103 the XXSPLTIDP instruction cannot be used. Otherwise return the immediate
29104 value to be used with the XXSPLTIDP instruction. */
29105
29106 unsigned
29107 constant_generates_xxspltidp (vec_const_128bit_type *vsx_const)
29108 {
29109 if (!TARGET_SPLAT_FLOAT_CONSTANT || !TARGET_PREFIXED || !TARGET_VSX)
29110 return 0;
29111
29112 /* Reject if the two 64-bit segments are not the same. */
29113 if (!vsx_const->all_double_words_same)
29114 return 0;
29115
29116 /* If the bytes, half words, or words are all the same, don't use XXSPLTIDP.
29117 Use a simpler instruction (XXSPLTIB, VSPLTISB, VSPLTISH, or VSPLTISW). */
29118 if (vsx_const->all_bytes_same
29119 || vsx_const->all_half_words_same
29120 || vsx_const->all_words_same)
29121 return 0;
29122
29123 unsigned HOST_WIDE_INT value = vsx_const->double_words[0];
29124
29125 /* Avoid values that look like DFmode NaN's, except for the normal NaN bit
29126 pattern and the signalling NaN bit pattern. Recognize infinity and
29127 negative infinity. */
29128
29129 /* Bit representation of DFmode normal quiet NaN. */
29130 #define RS6000_CONST_DF_NAN HOST_WIDE_INT_UC (0x7ff8000000000000)
29131
29132 /* Bit representation of DFmode normal signaling NaN. */
29133 #define RS6000_CONST_DF_NANS HOST_WIDE_INT_UC (0x7ff4000000000000)
29134
29135 /* Bit representation of DFmode positive infinity. */
29136 #define RS6000_CONST_DF_INF HOST_WIDE_INT_UC (0x7ff0000000000000)
29137
29138 /* Bit representation of DFmode negative infinity. */
29139 #define RS6000_CONST_DF_NEG_INF HOST_WIDE_INT_UC (0xfff0000000000000)
29140
29141 if (value != RS6000_CONST_DF_NAN
29142 && value != RS6000_CONST_DF_NANS
29143 && value != RS6000_CONST_DF_INF
29144 && value != RS6000_CONST_DF_NEG_INF)
29145 {
29146 /* The IEEE 754 64-bit floating format has 1 bit for sign, 11 bits for
29147 the exponent, and 52 bits for the mantissa (not counting the hidden
29148 bit used for normal numbers). NaN values have the exponent set to all
29149 1 bits, and the mantissa non-zero (mantissa == 0 is infinity). */
29150
29151 int df_exponent = (value >> 52) & 0x7ff;
29152 unsigned HOST_WIDE_INT
29153 df_mantissa = value & ((HOST_WIDE_INT_1U << 52) - HOST_WIDE_INT_1U);
29154
29155 if (df_exponent == 0x7ff && df_mantissa != 0) /* other NaNs. */
29156 return 0;
29157
29158 /* Avoid values that are DFmode subnormal values. Subnormal numbers have
29159 the exponent all 0 bits, and the mantissa non-zero. If the value is
29160 subnormal, then the hidden bit in the mantissa is not set. */
29161 if (df_exponent == 0 && df_mantissa != 0) /* subnormal. */
29162 return 0;
29163 }
29164
29165 /* Change the representation to DFmode constant. */
29166 long df_words[2] = { vsx_const->words[0], vsx_const->words[1] };
29167
29168 /* real_from_target takes the target words in target order. */
29169 if (!BYTES_BIG_ENDIAN)
29170 std::swap (df_words[0], df_words[1]);
29171
29172 REAL_VALUE_TYPE rv_type;
29173 real_from_target (&rv_type, df_words, DFmode);
29174
29175 const REAL_VALUE_TYPE *rv = &rv_type;
29176
29177 /* Validate that the number can be stored as a SFmode value. */
29178 if (!exact_real_truncate (SFmode, rv))
29179 return 0;
29180
29181 /* Validate that the number is not a SFmode subnormal value (exponent is 0,
29182 mantissa field is non-zero) which is undefined for the XXSPLTIDP
29183 instruction. */
29184 long sf_value;
29185 real_to_target (&sf_value, rv, SFmode);
29186
29187 /* IEEE 754 32-bit values have 1 bit for the sign, 8 bits for the exponent,
29188 and 23 bits for the mantissa. Subnormal numbers have the exponent all
29189 0 bits, and the mantissa non-zero. */
29190 long sf_exponent = (sf_value >> 23) & 0xFF;
29191 long sf_mantissa = sf_value & 0x7FFFFF;
29192
29193 if (sf_exponent == 0 && sf_mantissa != 0)
29194 return 0;
29195
29196 /* Return the immediate to be used. */
29197 return sf_value;
29198 }
29199
29200 /* Now we have only two opaque types, they are __vector_quad and
29201 __vector_pair built-in types. They are target specific and
29202 only available when MMA is supported. With MMA supported, it
29203 simply returns true, otherwise it checks if the given gimple
29204 STMT is an assignment, asm or call stmt and uses either of
29205 these two opaque types unexpectedly, if yes, it would raise
29206 an error message and returns true, otherwise it returns false. */
29207
29208 bool
29209 rs6000_opaque_type_invalid_use_p (gimple *stmt)
29210 {
29211 if (TARGET_MMA)
29212 return false;
29213
29214 /* If the given TYPE is one MMA opaque type, emit the corresponding
29215 error messages and return true, otherwise return false. */
29216 auto check_and_error_invalid_use = [](tree type)
29217 {
29218 tree mv = TYPE_MAIN_VARIANT (type);
29219 if (mv == vector_quad_type_node)
29220 {
29221 error ("type %<__vector_quad%> requires the %qs option", "-mmma");
29222 return true;
29223 }
29224 else if (mv == vector_pair_type_node)
29225 {
29226 error ("type %<__vector_pair%> requires the %qs option", "-mmma");
29227 return true;
29228 }
29229 return false;
29230 };
29231
29232 if (stmt)
29233 {
29234 /* The usage of MMA opaque types is very limited for now,
29235 to check with gassign, gasm and gcall is enough so far. */
29236 if (gassign *ga = dyn_cast<gassign *> (stmt))
29237 {
29238 tree lhs = gimple_assign_lhs (ga);
29239 tree type = TREE_TYPE (lhs);
29240 if (check_and_error_invalid_use (type))
29241 return true;
29242 }
29243 else if (gasm *gs = dyn_cast<gasm *> (stmt))
29244 {
29245 unsigned ninputs = gimple_asm_ninputs (gs);
29246 for (unsigned i = 0; i < ninputs; i++)
29247 {
29248 tree op = gimple_asm_input_op (gs, i);
29249 tree val = TREE_VALUE (op);
29250 tree type = TREE_TYPE (val);
29251 if (check_and_error_invalid_use (type))
29252 return true;
29253 }
29254 unsigned noutputs = gimple_asm_noutputs (gs);
29255 for (unsigned i = 0; i < noutputs; i++)
29256 {
29257 tree op = gimple_asm_output_op (gs, i);
29258 tree val = TREE_VALUE (op);
29259 tree type = TREE_TYPE (val);
29260 if (check_and_error_invalid_use (type))
29261 return true;
29262 }
29263 }
29264 else if (gcall *gc = dyn_cast<gcall *> (stmt))
29265 {
29266 unsigned nargs = gimple_call_num_args (gc);
29267 for (unsigned i = 0; i < nargs; i++)
29268 {
29269 tree arg = gimple_call_arg (gc, i);
29270 tree type = TREE_TYPE (arg);
29271 if (check_and_error_invalid_use (type))
29272 return true;
29273 }
29274 }
29275 }
29276
29277 return false;
29278 }
29279
29280 struct gcc_target targetm = TARGET_INITIALIZER;
29281
29282 #include "gt-rs6000.h"
This page took 1.376004 seconds and 5 git commands to generate.