]> gcc.gnu.org Git - gcc.git/blob - gcc/config/rs6000/rs6000.cc
Mangle __ibm128 into u8__ibm128.
[gcc.git] / gcc / config / rs6000 / rs6000.cc
1 // SPDX-License-Identifier: GPL-3.0-or-later
2 /* Subroutines used for code generation on IBM RS/6000.
3 Copyright (C) 1991-2022 Free Software Foundation, Inc.
4 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published
10 by the Free Software Foundation; either version 3, or (at your
11 option) any later version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
16 License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #define IN_TARGET_CODE 1
23
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "backend.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "memmodel.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "cfgloop.h"
34 #include "df.h"
35 #include "tm_p.h"
36 #include "stringpool.h"
37 #include "expmed.h"
38 #include "optabs.h"
39 #include "regs.h"
40 #include "ira.h"
41 #include "recog.h"
42 #include "cgraph.h"
43 #include "diagnostic-core.h"
44 #include "insn-attr.h"
45 #include "flags.h"
46 #include "alias.h"
47 #include "fold-const.h"
48 #include "attribs.h"
49 #include "stor-layout.h"
50 #include "calls.h"
51 #include "print-tree.h"
52 #include "varasm.h"
53 #include "explow.h"
54 #include "expr.h"
55 #include "output.h"
56 #include "common/common-target.h"
57 #include "langhooks.h"
58 #include "reload.h"
59 #include "sched-int.h"
60 #include "gimplify.h"
61 #include "gimple-iterator.h"
62 #include "gimple-fold.h"
63 #include "gimple-walk.h"
64 #include "ssa.h"
65 #include "tree-vectorizer.h"
66 #include "tree-ssa-propagate.h"
67 #include "intl.h"
68 #include "tm-constrs.h"
69 #include "target-globals.h"
70 #include "builtins.h"
71 #include "tree-vector-builder.h"
72 #include "context.h"
73 #include "tree-pass.h"
74 #include "symbol-summary.h"
75 #include "ipa-prop.h"
76 #include "ipa-fnsummary.h"
77 #include "except.h"
78 #if TARGET_XCOFF
79 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
80 #endif
81 #include "case-cfn-macros.h"
82 #include "ppc-auxv.h"
83 #include "rs6000-internal.h"
84 #include "opts.h"
85
86 /* This file should be included last. */
87 #include "target-def.h"
88
89 extern tree rs6000_builtin_mask_for_load (void);
90 extern tree rs6000_builtin_md_vectorized_function (tree, tree, tree);
91 extern tree rs6000_builtin_reciprocal (tree);
92
93 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
94 systems will also set long double to be IEEE 128-bit. AIX and Darwin
95 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
96 those systems will not pick up this default. This needs to be after all
97 of the include files, so that POWERPC_LINUX and POWERPC_FREEBSD are
98 properly defined. */
99 #ifndef TARGET_IEEEQUAD_DEFAULT
100 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
101 #define TARGET_IEEEQUAD_DEFAULT 1
102 #else
103 #define TARGET_IEEEQUAD_DEFAULT 0
104 #endif
105 #endif
106
107 /* Don't enable PC-relative addressing if the target does not support it. */
108 #ifndef PCREL_SUPPORTED_BY_OS
109 #define PCREL_SUPPORTED_BY_OS 0
110 #endif
111
112 #ifdef USING_ELFOS_H
113 /* Counter for labels which are to be placed in .fixup. */
114 int fixuplabelno = 0;
115 #endif
116
117 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
118 int dot_symbols;
119
120 /* Specify the machine mode that pointers have. After generation of rtl, the
121 compiler makes no further distinction between pointers and any other objects
122 of this machine mode. */
123 scalar_int_mode rs6000_pmode;
124
125 /* Track use of r13 in 64bit AIX TLS. */
126 static bool xcoff_tls_exec_model_detected = false;
127
128 /* Width in bits of a pointer. */
129 unsigned rs6000_pointer_size;
130
131 #ifdef HAVE_AS_GNU_ATTRIBUTE
132 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
133 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
134 # endif
135 /* Flag whether floating point values have been passed/returned.
136 Note that this doesn't say whether fprs are used, since the
137 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
138 should be set for soft-float values passed in gprs and ieee128
139 values passed in vsx registers. */
140 bool rs6000_passes_float = false;
141 bool rs6000_passes_long_double = false;
142 /* Flag whether vector values have been passed/returned. */
143 bool rs6000_passes_vector = false;
144 /* Flag whether small (<= 8 byte) structures have been returned. */
145 bool rs6000_returns_struct = false;
146 #endif
147
148 /* Value is TRUE if register/mode pair is acceptable. */
149 static bool rs6000_hard_regno_mode_ok_p
150 [NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
151
152 /* Maximum number of registers needed for a given register class and mode. */
153 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
154
155 /* How many registers are needed for a given register and mode. */
156 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
157
158 /* Map register number to register class. */
159 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
160
161 static int dbg_cost_ctrl;
162
163 /* Flag to say the TOC is initialized */
164 int toc_initialized, need_toc_init;
165 char toc_label_name[10];
166
167 /* Cached value of rs6000_variable_issue. This is cached in
168 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
169 static short cached_can_issue_more;
170
171 static GTY(()) section *read_only_data_section;
172 static GTY(()) section *private_data_section;
173 static GTY(()) section *tls_data_section;
174 static GTY(()) section *tls_private_data_section;
175 static GTY(()) section *read_only_private_data_section;
176 static GTY(()) section *sdata2_section;
177
178 section *toc_section = 0;
179
180 /* Describe the vector unit used for modes. */
181 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
182 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
183
184 /* Register classes for various constraints that are based on the target
185 switches. */
186 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
187
188 /* Describe the alignment of a vector. */
189 int rs6000_vector_align[NUM_MACHINE_MODES];
190
191 /* What modes to automatically generate reciprocal divide estimate (fre) and
192 reciprocal sqrt (frsqrte) for. */
193 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
194
195 /* Masks to determine which reciprocal esitmate instructions to generate
196 automatically. */
197 enum rs6000_recip_mask {
198 RECIP_SF_DIV = 0x001, /* Use divide estimate */
199 RECIP_DF_DIV = 0x002,
200 RECIP_V4SF_DIV = 0x004,
201 RECIP_V2DF_DIV = 0x008,
202
203 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
204 RECIP_DF_RSQRT = 0x020,
205 RECIP_V4SF_RSQRT = 0x040,
206 RECIP_V2DF_RSQRT = 0x080,
207
208 /* Various combination of flags for -mrecip=xxx. */
209 RECIP_NONE = 0,
210 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
211 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
212 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
213
214 RECIP_HIGH_PRECISION = RECIP_ALL,
215
216 /* On low precision machines like the power5, don't enable double precision
217 reciprocal square root estimate, since it isn't accurate enough. */
218 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
219 };
220
221 /* -mrecip options. */
222 static struct
223 {
224 const char *string; /* option name */
225 unsigned int mask; /* mask bits to set */
226 } recip_options[] = {
227 { "all", RECIP_ALL },
228 { "none", RECIP_NONE },
229 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
230 | RECIP_V2DF_DIV) },
231 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
232 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
233 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
234 | RECIP_V2DF_RSQRT) },
235 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
236 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
237 };
238
239 /* On PowerPC, we have a limited number of target clones that we care about
240 which means we can use an array to hold the options, rather than having more
241 elaborate data structures to identify each possible variation. Order the
242 clones from the default to the highest ISA. */
243 enum {
244 CLONE_DEFAULT = 0, /* default clone. */
245 CLONE_ISA_2_05, /* ISA 2.05 (power6). */
246 CLONE_ISA_2_06, /* ISA 2.06 (power7). */
247 CLONE_ISA_2_07, /* ISA 2.07 (power8). */
248 CLONE_ISA_3_00, /* ISA 3.0 (power9). */
249 CLONE_ISA_3_1, /* ISA 3.1 (power10). */
250 CLONE_MAX
251 };
252
253 /* Map compiler ISA bits into HWCAP names. */
254 struct clone_map {
255 HOST_WIDE_INT isa_mask; /* rs6000_isa mask */
256 const char *name; /* name to use in __builtin_cpu_supports. */
257 };
258
259 static const struct clone_map rs6000_clone_map[CLONE_MAX] = {
260 { 0, "" }, /* Default options. */
261 { OPTION_MASK_CMPB, "arch_2_05" }, /* ISA 2.05 (power6). */
262 { OPTION_MASK_POPCNTD, "arch_2_06" }, /* ISA 2.06 (power7). */
263 { OPTION_MASK_P8_VECTOR, "arch_2_07" }, /* ISA 2.07 (power8). */
264 { OPTION_MASK_P9_VECTOR, "arch_3_00" }, /* ISA 3.0 (power9). */
265 { OPTION_MASK_POWER10, "arch_3_1" }, /* ISA 3.1 (power10). */
266 };
267
268
269 /* Newer LIBCs explicitly export this symbol to declare that they provide
270 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
271 reference to this symbol whenever we expand a CPU builtin, so that
272 we never link against an old LIBC. */
273 const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform";
274
275 /* True if we have expanded a CPU builtin. */
276 bool cpu_builtin_p = false;
277
278 /* Pointer to function (in rs6000-c.cc) that can define or undefine target
279 macros that have changed. Languages that don't support the preprocessor
280 don't link in rs6000-c.cc, so we can't call it directly. */
281 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
282
283 /* Simplfy register classes into simpler classifications. We assume
284 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
285 check for standard register classes (gpr/floating/altivec/vsx) and
286 floating/vector classes (float/altivec/vsx). */
287
288 enum rs6000_reg_type {
289 NO_REG_TYPE,
290 PSEUDO_REG_TYPE,
291 GPR_REG_TYPE,
292 VSX_REG_TYPE,
293 ALTIVEC_REG_TYPE,
294 FPR_REG_TYPE,
295 SPR_REG_TYPE,
296 CR_REG_TYPE
297 };
298
299 /* Map register class to register type. */
300 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
301
302 /* First/last register type for the 'normal' register types (i.e. general
303 purpose, floating point, altivec, and VSX registers). */
304 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
305
306 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
307
308
309 /* Register classes we care about in secondary reload or go if legitimate
310 address. We only need to worry about GPR, FPR, and Altivec registers here,
311 along an ANY field that is the OR of the 3 register classes. */
312
313 enum rs6000_reload_reg_type {
314 RELOAD_REG_GPR, /* General purpose registers. */
315 RELOAD_REG_FPR, /* Traditional floating point regs. */
316 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
317 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
318 N_RELOAD_REG
319 };
320
321 /* For setting up register classes, loop through the 3 register classes mapping
322 into real registers, and skip the ANY class, which is just an OR of the
323 bits. */
324 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
325 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
326
327 /* Map reload register type to a register in the register class. */
328 struct reload_reg_map_type {
329 const char *name; /* Register class name. */
330 int reg; /* Register in the register class. */
331 };
332
333 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
334 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
335 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
336 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
337 { "Any", -1 }, /* RELOAD_REG_ANY. */
338 };
339
340 /* Mask bits for each register class, indexed per mode. Historically the
341 compiler has been more restrictive which types can do PRE_MODIFY instead of
342 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
343 typedef unsigned char addr_mask_type;
344
345 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
346 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
347 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
348 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
349 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
350 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
351 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
352 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
353
354 /* Register type masks based on the type, of valid addressing modes. */
355 struct rs6000_reg_addr {
356 enum insn_code reload_load; /* INSN to reload for loading. */
357 enum insn_code reload_store; /* INSN to reload for storing. */
358 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
359 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
360 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
361 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
362 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
363 };
364
365 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
366
367 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
368 static inline bool
369 mode_supports_pre_incdec_p (machine_mode mode)
370 {
371 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
372 != 0);
373 }
374
375 /* Helper function to say whether a mode supports PRE_MODIFY. */
376 static inline bool
377 mode_supports_pre_modify_p (machine_mode mode)
378 {
379 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
380 != 0);
381 }
382
383 /* Return true if we have D-form addressing in altivec registers. */
384 static inline bool
385 mode_supports_vmx_dform (machine_mode mode)
386 {
387 return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0);
388 }
389
390 /* Return true if we have D-form addressing in VSX registers. This addressing
391 is more limited than normal d-form addressing in that the offset must be
392 aligned on a 16-byte boundary. */
393 static inline bool
394 mode_supports_dq_form (machine_mode mode)
395 {
396 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET)
397 != 0);
398 }
399
400 /* Given that there exists at least one variable that is set (produced)
401 by OUT_INSN and read (consumed) by IN_INSN, return true iff
402 IN_INSN represents one or more memory store operations and none of
403 the variables set by OUT_INSN is used by IN_INSN as the address of a
404 store operation. If either IN_INSN or OUT_INSN does not represent
405 a "single" RTL SET expression (as loosely defined by the
406 implementation of the single_set function) or a PARALLEL with only
407 SETs, CLOBBERs, and USEs inside, this function returns false.
408
409 This rs6000-specific version of store_data_bypass_p checks for
410 certain conditions that result in assertion failures (and internal
411 compiler errors) in the generic store_data_bypass_p function and
412 returns false rather than calling store_data_bypass_p if one of the
413 problematic conditions is detected. */
414
415 int
416 rs6000_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
417 {
418 rtx out_set, in_set;
419 rtx out_pat, in_pat;
420 rtx out_exp, in_exp;
421 int i, j;
422
423 in_set = single_set (in_insn);
424 if (in_set)
425 {
426 if (MEM_P (SET_DEST (in_set)))
427 {
428 out_set = single_set (out_insn);
429 if (!out_set)
430 {
431 out_pat = PATTERN (out_insn);
432 if (GET_CODE (out_pat) == PARALLEL)
433 {
434 for (i = 0; i < XVECLEN (out_pat, 0); i++)
435 {
436 out_exp = XVECEXP (out_pat, 0, i);
437 if ((GET_CODE (out_exp) == CLOBBER)
438 || (GET_CODE (out_exp) == USE))
439 continue;
440 else if (GET_CODE (out_exp) != SET)
441 return false;
442 }
443 }
444 }
445 }
446 }
447 else
448 {
449 in_pat = PATTERN (in_insn);
450 if (GET_CODE (in_pat) != PARALLEL)
451 return false;
452
453 for (i = 0; i < XVECLEN (in_pat, 0); i++)
454 {
455 in_exp = XVECEXP (in_pat, 0, i);
456 if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE))
457 continue;
458 else if (GET_CODE (in_exp) != SET)
459 return false;
460
461 if (MEM_P (SET_DEST (in_exp)))
462 {
463 out_set = single_set (out_insn);
464 if (!out_set)
465 {
466 out_pat = PATTERN (out_insn);
467 if (GET_CODE (out_pat) != PARALLEL)
468 return false;
469 for (j = 0; j < XVECLEN (out_pat, 0); j++)
470 {
471 out_exp = XVECEXP (out_pat, 0, j);
472 if ((GET_CODE (out_exp) == CLOBBER)
473 || (GET_CODE (out_exp) == USE))
474 continue;
475 else if (GET_CODE (out_exp) != SET)
476 return false;
477 }
478 }
479 }
480 }
481 }
482 return store_data_bypass_p (out_insn, in_insn);
483 }
484
485 \f
486 /* Processor costs (relative to an add) */
487
488 const struct processor_costs *rs6000_cost;
489
490 /* Instruction size costs on 32bit processors. */
491 static const
492 struct processor_costs size32_cost = {
493 COSTS_N_INSNS (1), /* mulsi */
494 COSTS_N_INSNS (1), /* mulsi_const */
495 COSTS_N_INSNS (1), /* mulsi_const9 */
496 COSTS_N_INSNS (1), /* muldi */
497 COSTS_N_INSNS (1), /* divsi */
498 COSTS_N_INSNS (1), /* divdi */
499 COSTS_N_INSNS (1), /* fp */
500 COSTS_N_INSNS (1), /* dmul */
501 COSTS_N_INSNS (1), /* sdiv */
502 COSTS_N_INSNS (1), /* ddiv */
503 32, /* cache line size */
504 0, /* l1 cache */
505 0, /* l2 cache */
506 0, /* streams */
507 0, /* SF->DF convert */
508 };
509
510 /* Instruction size costs on 64bit processors. */
511 static const
512 struct processor_costs size64_cost = {
513 COSTS_N_INSNS (1), /* mulsi */
514 COSTS_N_INSNS (1), /* mulsi_const */
515 COSTS_N_INSNS (1), /* mulsi_const9 */
516 COSTS_N_INSNS (1), /* muldi */
517 COSTS_N_INSNS (1), /* divsi */
518 COSTS_N_INSNS (1), /* divdi */
519 COSTS_N_INSNS (1), /* fp */
520 COSTS_N_INSNS (1), /* dmul */
521 COSTS_N_INSNS (1), /* sdiv */
522 COSTS_N_INSNS (1), /* ddiv */
523 128, /* cache line size */
524 0, /* l1 cache */
525 0, /* l2 cache */
526 0, /* streams */
527 0, /* SF->DF convert */
528 };
529
530 /* Instruction costs on RS64A processors. */
531 static const
532 struct processor_costs rs64a_cost = {
533 COSTS_N_INSNS (20), /* mulsi */
534 COSTS_N_INSNS (12), /* mulsi_const */
535 COSTS_N_INSNS (8), /* mulsi_const9 */
536 COSTS_N_INSNS (34), /* muldi */
537 COSTS_N_INSNS (65), /* divsi */
538 COSTS_N_INSNS (67), /* divdi */
539 COSTS_N_INSNS (4), /* fp */
540 COSTS_N_INSNS (4), /* dmul */
541 COSTS_N_INSNS (31), /* sdiv */
542 COSTS_N_INSNS (31), /* ddiv */
543 128, /* cache line size */
544 128, /* l1 cache */
545 2048, /* l2 cache */
546 1, /* streams */
547 0, /* SF->DF convert */
548 };
549
550 /* Instruction costs on MPCCORE processors. */
551 static const
552 struct processor_costs mpccore_cost = {
553 COSTS_N_INSNS (2), /* mulsi */
554 COSTS_N_INSNS (2), /* mulsi_const */
555 COSTS_N_INSNS (2), /* mulsi_const9 */
556 COSTS_N_INSNS (2), /* muldi */
557 COSTS_N_INSNS (6), /* divsi */
558 COSTS_N_INSNS (6), /* divdi */
559 COSTS_N_INSNS (4), /* fp */
560 COSTS_N_INSNS (5), /* dmul */
561 COSTS_N_INSNS (10), /* sdiv */
562 COSTS_N_INSNS (17), /* ddiv */
563 32, /* cache line size */
564 4, /* l1 cache */
565 16, /* l2 cache */
566 1, /* streams */
567 0, /* SF->DF convert */
568 };
569
570 /* Instruction costs on PPC403 processors. */
571 static const
572 struct processor_costs ppc403_cost = {
573 COSTS_N_INSNS (4), /* mulsi */
574 COSTS_N_INSNS (4), /* mulsi_const */
575 COSTS_N_INSNS (4), /* mulsi_const9 */
576 COSTS_N_INSNS (4), /* muldi */
577 COSTS_N_INSNS (33), /* divsi */
578 COSTS_N_INSNS (33), /* divdi */
579 COSTS_N_INSNS (11), /* fp */
580 COSTS_N_INSNS (11), /* dmul */
581 COSTS_N_INSNS (11), /* sdiv */
582 COSTS_N_INSNS (11), /* ddiv */
583 32, /* cache line size */
584 4, /* l1 cache */
585 16, /* l2 cache */
586 1, /* streams */
587 0, /* SF->DF convert */
588 };
589
590 /* Instruction costs on PPC405 processors. */
591 static const
592 struct processor_costs ppc405_cost = {
593 COSTS_N_INSNS (5), /* mulsi */
594 COSTS_N_INSNS (4), /* mulsi_const */
595 COSTS_N_INSNS (3), /* mulsi_const9 */
596 COSTS_N_INSNS (5), /* muldi */
597 COSTS_N_INSNS (35), /* divsi */
598 COSTS_N_INSNS (35), /* divdi */
599 COSTS_N_INSNS (11), /* fp */
600 COSTS_N_INSNS (11), /* dmul */
601 COSTS_N_INSNS (11), /* sdiv */
602 COSTS_N_INSNS (11), /* ddiv */
603 32, /* cache line size */
604 16, /* l1 cache */
605 128, /* l2 cache */
606 1, /* streams */
607 0, /* SF->DF convert */
608 };
609
610 /* Instruction costs on PPC440 processors. */
611 static const
612 struct processor_costs ppc440_cost = {
613 COSTS_N_INSNS (3), /* mulsi */
614 COSTS_N_INSNS (2), /* mulsi_const */
615 COSTS_N_INSNS (2), /* mulsi_const9 */
616 COSTS_N_INSNS (3), /* muldi */
617 COSTS_N_INSNS (34), /* divsi */
618 COSTS_N_INSNS (34), /* divdi */
619 COSTS_N_INSNS (5), /* fp */
620 COSTS_N_INSNS (5), /* dmul */
621 COSTS_N_INSNS (19), /* sdiv */
622 COSTS_N_INSNS (33), /* ddiv */
623 32, /* cache line size */
624 32, /* l1 cache */
625 256, /* l2 cache */
626 1, /* streams */
627 0, /* SF->DF convert */
628 };
629
630 /* Instruction costs on PPC476 processors. */
631 static const
632 struct processor_costs ppc476_cost = {
633 COSTS_N_INSNS (4), /* mulsi */
634 COSTS_N_INSNS (4), /* mulsi_const */
635 COSTS_N_INSNS (4), /* mulsi_const9 */
636 COSTS_N_INSNS (4), /* muldi */
637 COSTS_N_INSNS (11), /* divsi */
638 COSTS_N_INSNS (11), /* divdi */
639 COSTS_N_INSNS (6), /* fp */
640 COSTS_N_INSNS (6), /* dmul */
641 COSTS_N_INSNS (19), /* sdiv */
642 COSTS_N_INSNS (33), /* ddiv */
643 32, /* l1 cache line size */
644 32, /* l1 cache */
645 512, /* l2 cache */
646 1, /* streams */
647 0, /* SF->DF convert */
648 };
649
650 /* Instruction costs on PPC601 processors. */
651 static const
652 struct processor_costs ppc601_cost = {
653 COSTS_N_INSNS (5), /* mulsi */
654 COSTS_N_INSNS (5), /* mulsi_const */
655 COSTS_N_INSNS (5), /* mulsi_const9 */
656 COSTS_N_INSNS (5), /* muldi */
657 COSTS_N_INSNS (36), /* divsi */
658 COSTS_N_INSNS (36), /* divdi */
659 COSTS_N_INSNS (4), /* fp */
660 COSTS_N_INSNS (5), /* dmul */
661 COSTS_N_INSNS (17), /* sdiv */
662 COSTS_N_INSNS (31), /* ddiv */
663 32, /* cache line size */
664 32, /* l1 cache */
665 256, /* l2 cache */
666 1, /* streams */
667 0, /* SF->DF convert */
668 };
669
670 /* Instruction costs on PPC603 processors. */
671 static const
672 struct processor_costs ppc603_cost = {
673 COSTS_N_INSNS (5), /* mulsi */
674 COSTS_N_INSNS (3), /* mulsi_const */
675 COSTS_N_INSNS (2), /* mulsi_const9 */
676 COSTS_N_INSNS (5), /* muldi */
677 COSTS_N_INSNS (37), /* divsi */
678 COSTS_N_INSNS (37), /* divdi */
679 COSTS_N_INSNS (3), /* fp */
680 COSTS_N_INSNS (4), /* dmul */
681 COSTS_N_INSNS (18), /* sdiv */
682 COSTS_N_INSNS (33), /* ddiv */
683 32, /* cache line size */
684 8, /* l1 cache */
685 64, /* l2 cache */
686 1, /* streams */
687 0, /* SF->DF convert */
688 };
689
690 /* Instruction costs on PPC604 processors. */
691 static const
692 struct processor_costs ppc604_cost = {
693 COSTS_N_INSNS (4), /* mulsi */
694 COSTS_N_INSNS (4), /* mulsi_const */
695 COSTS_N_INSNS (4), /* mulsi_const9 */
696 COSTS_N_INSNS (4), /* muldi */
697 COSTS_N_INSNS (20), /* divsi */
698 COSTS_N_INSNS (20), /* divdi */
699 COSTS_N_INSNS (3), /* fp */
700 COSTS_N_INSNS (3), /* dmul */
701 COSTS_N_INSNS (18), /* sdiv */
702 COSTS_N_INSNS (32), /* ddiv */
703 32, /* cache line size */
704 16, /* l1 cache */
705 512, /* l2 cache */
706 1, /* streams */
707 0, /* SF->DF convert */
708 };
709
710 /* Instruction costs on PPC604e processors. */
711 static const
712 struct processor_costs ppc604e_cost = {
713 COSTS_N_INSNS (2), /* mulsi */
714 COSTS_N_INSNS (2), /* mulsi_const */
715 COSTS_N_INSNS (2), /* mulsi_const9 */
716 COSTS_N_INSNS (2), /* muldi */
717 COSTS_N_INSNS (20), /* divsi */
718 COSTS_N_INSNS (20), /* divdi */
719 COSTS_N_INSNS (3), /* fp */
720 COSTS_N_INSNS (3), /* dmul */
721 COSTS_N_INSNS (18), /* sdiv */
722 COSTS_N_INSNS (32), /* ddiv */
723 32, /* cache line size */
724 32, /* l1 cache */
725 1024, /* l2 cache */
726 1, /* streams */
727 0, /* SF->DF convert */
728 };
729
730 /* Instruction costs on PPC620 processors. */
731 static const
732 struct processor_costs ppc620_cost = {
733 COSTS_N_INSNS (5), /* mulsi */
734 COSTS_N_INSNS (4), /* mulsi_const */
735 COSTS_N_INSNS (3), /* mulsi_const9 */
736 COSTS_N_INSNS (7), /* muldi */
737 COSTS_N_INSNS (21), /* divsi */
738 COSTS_N_INSNS (37), /* divdi */
739 COSTS_N_INSNS (3), /* fp */
740 COSTS_N_INSNS (3), /* dmul */
741 COSTS_N_INSNS (18), /* sdiv */
742 COSTS_N_INSNS (32), /* ddiv */
743 128, /* cache line size */
744 32, /* l1 cache */
745 1024, /* l2 cache */
746 1, /* streams */
747 0, /* SF->DF convert */
748 };
749
750 /* Instruction costs on PPC630 processors. */
751 static const
752 struct processor_costs ppc630_cost = {
753 COSTS_N_INSNS (5), /* mulsi */
754 COSTS_N_INSNS (4), /* mulsi_const */
755 COSTS_N_INSNS (3), /* mulsi_const9 */
756 COSTS_N_INSNS (7), /* muldi */
757 COSTS_N_INSNS (21), /* divsi */
758 COSTS_N_INSNS (37), /* divdi */
759 COSTS_N_INSNS (3), /* fp */
760 COSTS_N_INSNS (3), /* dmul */
761 COSTS_N_INSNS (17), /* sdiv */
762 COSTS_N_INSNS (21), /* ddiv */
763 128, /* cache line size */
764 64, /* l1 cache */
765 1024, /* l2 cache */
766 1, /* streams */
767 0, /* SF->DF convert */
768 };
769
770 /* Instruction costs on Cell processor. */
771 /* COSTS_N_INSNS (1) ~ one add. */
772 static const
773 struct processor_costs ppccell_cost = {
774 COSTS_N_INSNS (9/2)+2, /* mulsi */
775 COSTS_N_INSNS (6/2), /* mulsi_const */
776 COSTS_N_INSNS (6/2), /* mulsi_const9 */
777 COSTS_N_INSNS (15/2)+2, /* muldi */
778 COSTS_N_INSNS (38/2), /* divsi */
779 COSTS_N_INSNS (70/2), /* divdi */
780 COSTS_N_INSNS (10/2), /* fp */
781 COSTS_N_INSNS (10/2), /* dmul */
782 COSTS_N_INSNS (74/2), /* sdiv */
783 COSTS_N_INSNS (74/2), /* ddiv */
784 128, /* cache line size */
785 32, /* l1 cache */
786 512, /* l2 cache */
787 6, /* streams */
788 0, /* SF->DF convert */
789 };
790
791 /* Instruction costs on PPC750 and PPC7400 processors. */
792 static const
793 struct processor_costs ppc750_cost = {
794 COSTS_N_INSNS (5), /* mulsi */
795 COSTS_N_INSNS (3), /* mulsi_const */
796 COSTS_N_INSNS (2), /* mulsi_const9 */
797 COSTS_N_INSNS (5), /* muldi */
798 COSTS_N_INSNS (17), /* divsi */
799 COSTS_N_INSNS (17), /* divdi */
800 COSTS_N_INSNS (3), /* fp */
801 COSTS_N_INSNS (3), /* dmul */
802 COSTS_N_INSNS (17), /* sdiv */
803 COSTS_N_INSNS (31), /* ddiv */
804 32, /* cache line size */
805 32, /* l1 cache */
806 512, /* l2 cache */
807 1, /* streams */
808 0, /* SF->DF convert */
809 };
810
811 /* Instruction costs on PPC7450 processors. */
812 static const
813 struct processor_costs ppc7450_cost = {
814 COSTS_N_INSNS (4), /* mulsi */
815 COSTS_N_INSNS (3), /* mulsi_const */
816 COSTS_N_INSNS (3), /* mulsi_const9 */
817 COSTS_N_INSNS (4), /* muldi */
818 COSTS_N_INSNS (23), /* divsi */
819 COSTS_N_INSNS (23), /* divdi */
820 COSTS_N_INSNS (5), /* fp */
821 COSTS_N_INSNS (5), /* dmul */
822 COSTS_N_INSNS (21), /* sdiv */
823 COSTS_N_INSNS (35), /* ddiv */
824 32, /* cache line size */
825 32, /* l1 cache */
826 1024, /* l2 cache */
827 1, /* streams */
828 0, /* SF->DF convert */
829 };
830
831 /* Instruction costs on PPC8540 processors. */
832 static const
833 struct processor_costs ppc8540_cost = {
834 COSTS_N_INSNS (4), /* mulsi */
835 COSTS_N_INSNS (4), /* mulsi_const */
836 COSTS_N_INSNS (4), /* mulsi_const9 */
837 COSTS_N_INSNS (4), /* muldi */
838 COSTS_N_INSNS (19), /* divsi */
839 COSTS_N_INSNS (19), /* divdi */
840 COSTS_N_INSNS (4), /* fp */
841 COSTS_N_INSNS (4), /* dmul */
842 COSTS_N_INSNS (29), /* sdiv */
843 COSTS_N_INSNS (29), /* ddiv */
844 32, /* cache line size */
845 32, /* l1 cache */
846 256, /* l2 cache */
847 1, /* prefetch streams /*/
848 0, /* SF->DF convert */
849 };
850
851 /* Instruction costs on E300C2 and E300C3 cores. */
852 static const
853 struct processor_costs ppce300c2c3_cost = {
854 COSTS_N_INSNS (4), /* mulsi */
855 COSTS_N_INSNS (4), /* mulsi_const */
856 COSTS_N_INSNS (4), /* mulsi_const9 */
857 COSTS_N_INSNS (4), /* muldi */
858 COSTS_N_INSNS (19), /* divsi */
859 COSTS_N_INSNS (19), /* divdi */
860 COSTS_N_INSNS (3), /* fp */
861 COSTS_N_INSNS (4), /* dmul */
862 COSTS_N_INSNS (18), /* sdiv */
863 COSTS_N_INSNS (33), /* ddiv */
864 32,
865 16, /* l1 cache */
866 16, /* l2 cache */
867 1, /* prefetch streams /*/
868 0, /* SF->DF convert */
869 };
870
871 /* Instruction costs on PPCE500MC processors. */
872 static const
873 struct processor_costs ppce500mc_cost = {
874 COSTS_N_INSNS (4), /* mulsi */
875 COSTS_N_INSNS (4), /* mulsi_const */
876 COSTS_N_INSNS (4), /* mulsi_const9 */
877 COSTS_N_INSNS (4), /* muldi */
878 COSTS_N_INSNS (14), /* divsi */
879 COSTS_N_INSNS (14), /* divdi */
880 COSTS_N_INSNS (8), /* fp */
881 COSTS_N_INSNS (10), /* dmul */
882 COSTS_N_INSNS (36), /* sdiv */
883 COSTS_N_INSNS (66), /* ddiv */
884 64, /* cache line size */
885 32, /* l1 cache */
886 128, /* l2 cache */
887 1, /* prefetch streams /*/
888 0, /* SF->DF convert */
889 };
890
891 /* Instruction costs on PPCE500MC64 processors. */
892 static const
893 struct processor_costs ppce500mc64_cost = {
894 COSTS_N_INSNS (4), /* mulsi */
895 COSTS_N_INSNS (4), /* mulsi_const */
896 COSTS_N_INSNS (4), /* mulsi_const9 */
897 COSTS_N_INSNS (4), /* muldi */
898 COSTS_N_INSNS (14), /* divsi */
899 COSTS_N_INSNS (14), /* divdi */
900 COSTS_N_INSNS (4), /* fp */
901 COSTS_N_INSNS (10), /* dmul */
902 COSTS_N_INSNS (36), /* sdiv */
903 COSTS_N_INSNS (66), /* ddiv */
904 64, /* cache line size */
905 32, /* l1 cache */
906 128, /* l2 cache */
907 1, /* prefetch streams /*/
908 0, /* SF->DF convert */
909 };
910
911 /* Instruction costs on PPCE5500 processors. */
912 static const
913 struct processor_costs ppce5500_cost = {
914 COSTS_N_INSNS (5), /* mulsi */
915 COSTS_N_INSNS (5), /* mulsi_const */
916 COSTS_N_INSNS (4), /* mulsi_const9 */
917 COSTS_N_INSNS (5), /* muldi */
918 COSTS_N_INSNS (14), /* divsi */
919 COSTS_N_INSNS (14), /* divdi */
920 COSTS_N_INSNS (7), /* fp */
921 COSTS_N_INSNS (10), /* dmul */
922 COSTS_N_INSNS (36), /* sdiv */
923 COSTS_N_INSNS (66), /* ddiv */
924 64, /* cache line size */
925 32, /* l1 cache */
926 128, /* l2 cache */
927 1, /* prefetch streams /*/
928 0, /* SF->DF convert */
929 };
930
931 /* Instruction costs on PPCE6500 processors. */
932 static const
933 struct processor_costs ppce6500_cost = {
934 COSTS_N_INSNS (5), /* mulsi */
935 COSTS_N_INSNS (5), /* mulsi_const */
936 COSTS_N_INSNS (4), /* mulsi_const9 */
937 COSTS_N_INSNS (5), /* muldi */
938 COSTS_N_INSNS (14), /* divsi */
939 COSTS_N_INSNS (14), /* divdi */
940 COSTS_N_INSNS (7), /* fp */
941 COSTS_N_INSNS (10), /* dmul */
942 COSTS_N_INSNS (36), /* sdiv */
943 COSTS_N_INSNS (66), /* ddiv */
944 64, /* cache line size */
945 32, /* l1 cache */
946 128, /* l2 cache */
947 1, /* prefetch streams /*/
948 0, /* SF->DF convert */
949 };
950
951 /* Instruction costs on AppliedMicro Titan processors. */
952 static const
953 struct processor_costs titan_cost = {
954 COSTS_N_INSNS (5), /* mulsi */
955 COSTS_N_INSNS (5), /* mulsi_const */
956 COSTS_N_INSNS (5), /* mulsi_const9 */
957 COSTS_N_INSNS (5), /* muldi */
958 COSTS_N_INSNS (18), /* divsi */
959 COSTS_N_INSNS (18), /* divdi */
960 COSTS_N_INSNS (10), /* fp */
961 COSTS_N_INSNS (10), /* dmul */
962 COSTS_N_INSNS (46), /* sdiv */
963 COSTS_N_INSNS (72), /* ddiv */
964 32, /* cache line size */
965 32, /* l1 cache */
966 512, /* l2 cache */
967 1, /* prefetch streams /*/
968 0, /* SF->DF convert */
969 };
970
971 /* Instruction costs on POWER4 and POWER5 processors. */
972 static const
973 struct processor_costs power4_cost = {
974 COSTS_N_INSNS (3), /* mulsi */
975 COSTS_N_INSNS (2), /* mulsi_const */
976 COSTS_N_INSNS (2), /* mulsi_const9 */
977 COSTS_N_INSNS (4), /* muldi */
978 COSTS_N_INSNS (18), /* divsi */
979 COSTS_N_INSNS (34), /* divdi */
980 COSTS_N_INSNS (3), /* fp */
981 COSTS_N_INSNS (3), /* dmul */
982 COSTS_N_INSNS (17), /* sdiv */
983 COSTS_N_INSNS (17), /* ddiv */
984 128, /* cache line size */
985 32, /* l1 cache */
986 1024, /* l2 cache */
987 8, /* prefetch streams /*/
988 0, /* SF->DF convert */
989 };
990
991 /* Instruction costs on POWER6 processors. */
992 static const
993 struct processor_costs power6_cost = {
994 COSTS_N_INSNS (8), /* mulsi */
995 COSTS_N_INSNS (8), /* mulsi_const */
996 COSTS_N_INSNS (8), /* mulsi_const9 */
997 COSTS_N_INSNS (8), /* muldi */
998 COSTS_N_INSNS (22), /* divsi */
999 COSTS_N_INSNS (28), /* divdi */
1000 COSTS_N_INSNS (3), /* fp */
1001 COSTS_N_INSNS (3), /* dmul */
1002 COSTS_N_INSNS (13), /* sdiv */
1003 COSTS_N_INSNS (16), /* ddiv */
1004 128, /* cache line size */
1005 64, /* l1 cache */
1006 2048, /* l2 cache */
1007 16, /* prefetch streams */
1008 0, /* SF->DF convert */
1009 };
1010
1011 /* Instruction costs on POWER7 processors. */
1012 static const
1013 struct processor_costs power7_cost = {
1014 COSTS_N_INSNS (2), /* mulsi */
1015 COSTS_N_INSNS (2), /* mulsi_const */
1016 COSTS_N_INSNS (2), /* mulsi_const9 */
1017 COSTS_N_INSNS (2), /* muldi */
1018 COSTS_N_INSNS (18), /* divsi */
1019 COSTS_N_INSNS (34), /* divdi */
1020 COSTS_N_INSNS (3), /* fp */
1021 COSTS_N_INSNS (3), /* dmul */
1022 COSTS_N_INSNS (13), /* sdiv */
1023 COSTS_N_INSNS (16), /* ddiv */
1024 128, /* cache line size */
1025 32, /* l1 cache */
1026 256, /* l2 cache */
1027 12, /* prefetch streams */
1028 COSTS_N_INSNS (3), /* SF->DF convert */
1029 };
1030
1031 /* Instruction costs on POWER8 processors. */
1032 static const
1033 struct processor_costs power8_cost = {
1034 COSTS_N_INSNS (3), /* mulsi */
1035 COSTS_N_INSNS (3), /* mulsi_const */
1036 COSTS_N_INSNS (3), /* mulsi_const9 */
1037 COSTS_N_INSNS (3), /* muldi */
1038 COSTS_N_INSNS (19), /* divsi */
1039 COSTS_N_INSNS (35), /* divdi */
1040 COSTS_N_INSNS (3), /* fp */
1041 COSTS_N_INSNS (3), /* dmul */
1042 COSTS_N_INSNS (14), /* sdiv */
1043 COSTS_N_INSNS (17), /* ddiv */
1044 128, /* cache line size */
1045 32, /* l1 cache */
1046 512, /* l2 cache */
1047 12, /* prefetch streams */
1048 COSTS_N_INSNS (3), /* SF->DF convert */
1049 };
1050
1051 /* Instruction costs on POWER9 processors. */
1052 static const
1053 struct processor_costs power9_cost = {
1054 COSTS_N_INSNS (3), /* mulsi */
1055 COSTS_N_INSNS (3), /* mulsi_const */
1056 COSTS_N_INSNS (3), /* mulsi_const9 */
1057 COSTS_N_INSNS (3), /* muldi */
1058 COSTS_N_INSNS (8), /* divsi */
1059 COSTS_N_INSNS (12), /* divdi */
1060 COSTS_N_INSNS (3), /* fp */
1061 COSTS_N_INSNS (3), /* dmul */
1062 COSTS_N_INSNS (13), /* sdiv */
1063 COSTS_N_INSNS (18), /* ddiv */
1064 128, /* cache line size */
1065 32, /* l1 cache */
1066 512, /* l2 cache */
1067 8, /* prefetch streams */
1068 COSTS_N_INSNS (3), /* SF->DF convert */
1069 };
1070
1071 /* Instruction costs on POWER10 processors. */
1072 static const
1073 struct processor_costs power10_cost = {
1074 COSTS_N_INSNS (2), /* mulsi */
1075 COSTS_N_INSNS (2), /* mulsi_const */
1076 COSTS_N_INSNS (2), /* mulsi_const9 */
1077 COSTS_N_INSNS (2), /* muldi */
1078 COSTS_N_INSNS (6), /* divsi */
1079 COSTS_N_INSNS (6), /* divdi */
1080 COSTS_N_INSNS (2), /* fp */
1081 COSTS_N_INSNS (2), /* dmul */
1082 COSTS_N_INSNS (11), /* sdiv */
1083 COSTS_N_INSNS (13), /* ddiv */
1084 128, /* cache line size */
1085 32, /* l1 cache */
1086 512, /* l2 cache */
1087 16, /* prefetch streams */
1088 COSTS_N_INSNS (2), /* SF->DF convert */
1089 };
1090
1091 /* Instruction costs on POWER A2 processors. */
1092 static const
1093 struct processor_costs ppca2_cost = {
1094 COSTS_N_INSNS (16), /* mulsi */
1095 COSTS_N_INSNS (16), /* mulsi_const */
1096 COSTS_N_INSNS (16), /* mulsi_const9 */
1097 COSTS_N_INSNS (16), /* muldi */
1098 COSTS_N_INSNS (22), /* divsi */
1099 COSTS_N_INSNS (28), /* divdi */
1100 COSTS_N_INSNS (3), /* fp */
1101 COSTS_N_INSNS (3), /* dmul */
1102 COSTS_N_INSNS (59), /* sdiv */
1103 COSTS_N_INSNS (72), /* ddiv */
1104 64,
1105 16, /* l1 cache */
1106 2048, /* l2 cache */
1107 16, /* prefetch streams */
1108 0, /* SF->DF convert */
1109 };
1110
1111 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1112 static tree (*rs6000_veclib_handler) (combined_fn, tree, tree);
1113
1114 \f
1115 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1116 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1117 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1118 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1119 static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
1120 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1121 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1122 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
1123 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1124 bool);
1125 static int rs6000_debug_adjust_cost (rtx_insn *, int, rtx_insn *, int,
1126 unsigned int);
1127 static bool is_microcoded_insn (rtx_insn *);
1128 static bool is_nonpipeline_insn (rtx_insn *);
1129 static bool is_cracked_insn (rtx_insn *);
1130 static bool is_load_insn (rtx, rtx *);
1131 static bool is_store_insn (rtx, rtx *);
1132 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1133 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1134 static bool insn_must_be_first_in_group (rtx_insn *);
1135 static bool insn_must_be_last_in_group (rtx_insn *);
1136 bool easy_vector_constant (rtx, machine_mode);
1137 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1138 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1139 #if TARGET_MACHO
1140 static tree get_prev_label (tree);
1141 #endif
1142 static bool rs6000_mode_dependent_address (const_rtx);
1143 static bool rs6000_debug_mode_dependent_address (const_rtx);
1144 static bool rs6000_offsettable_memref_p (rtx, machine_mode, bool);
1145 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1146 machine_mode, rtx);
1147 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1148 machine_mode,
1149 rtx);
1150 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1151 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1152 enum reg_class);
1153 static bool rs6000_debug_secondary_memory_needed (machine_mode,
1154 reg_class_t,
1155 reg_class_t);
1156 static bool rs6000_debug_can_change_mode_class (machine_mode,
1157 machine_mode,
1158 reg_class_t);
1159
1160 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1161 = rs6000_mode_dependent_address;
1162
1163 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1164 machine_mode, rtx)
1165 = rs6000_secondary_reload_class;
1166
1167 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1168 = rs6000_preferred_reload_class;
1169
1170 const int INSN_NOT_AVAILABLE = -1;
1171
1172 static void rs6000_print_isa_options (FILE *, int, const char *,
1173 HOST_WIDE_INT);
1174 static void rs6000_print_builtin_options (FILE *, int, const char *,
1175 HOST_WIDE_INT);
1176 static HOST_WIDE_INT rs6000_disable_incompatible_switches (void);
1177
1178 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1179 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1180 enum rs6000_reg_type,
1181 machine_mode,
1182 secondary_reload_info *,
1183 bool);
1184 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1185
1186 /* Hash table stuff for keeping track of TOC entries. */
1187
1188 struct GTY((for_user)) toc_hash_struct
1189 {
1190 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1191 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1192 rtx key;
1193 machine_mode key_mode;
1194 int labelno;
1195 };
1196
1197 struct toc_hasher : ggc_ptr_hash<toc_hash_struct>
1198 {
1199 static hashval_t hash (toc_hash_struct *);
1200 static bool equal (toc_hash_struct *, toc_hash_struct *);
1201 };
1202
1203 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1204
1205
1206 \f
1207 /* Default register names. */
1208 char rs6000_reg_names[][8] =
1209 {
1210 /* GPRs */
1211 "0", "1", "2", "3", "4", "5", "6", "7",
1212 "8", "9", "10", "11", "12", "13", "14", "15",
1213 "16", "17", "18", "19", "20", "21", "22", "23",
1214 "24", "25", "26", "27", "28", "29", "30", "31",
1215 /* FPRs */
1216 "0", "1", "2", "3", "4", "5", "6", "7",
1217 "8", "9", "10", "11", "12", "13", "14", "15",
1218 "16", "17", "18", "19", "20", "21", "22", "23",
1219 "24", "25", "26", "27", "28", "29", "30", "31",
1220 /* VRs */
1221 "0", "1", "2", "3", "4", "5", "6", "7",
1222 "8", "9", "10", "11", "12", "13", "14", "15",
1223 "16", "17", "18", "19", "20", "21", "22", "23",
1224 "24", "25", "26", "27", "28", "29", "30", "31",
1225 /* lr ctr ca ap */
1226 "lr", "ctr", "ca", "ap",
1227 /* cr0..cr7 */
1228 "0", "1", "2", "3", "4", "5", "6", "7",
1229 /* vrsave vscr sfp */
1230 "vrsave", "vscr", "sfp",
1231 };
1232
1233 #ifdef TARGET_REGNAMES
1234 static const char alt_reg_names[][8] =
1235 {
1236 /* GPRs */
1237 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1238 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1239 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1240 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1241 /* FPRs */
1242 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1243 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1244 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1245 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1246 /* VRs */
1247 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1248 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1249 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1250 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1251 /* lr ctr ca ap */
1252 "lr", "ctr", "ca", "ap",
1253 /* cr0..cr7 */
1254 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1255 /* vrsave vscr sfp */
1256 "vrsave", "vscr", "sfp",
1257 };
1258 #endif
1259
1260 /* Table of valid machine attributes. */
1261
1262 static const struct attribute_spec rs6000_attribute_table[] =
1263 {
1264 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
1265 affects_type_identity, handler, exclude } */
1266 { "altivec", 1, 1, false, true, false, false,
1267 rs6000_handle_altivec_attribute, NULL },
1268 { "longcall", 0, 0, false, true, true, false,
1269 rs6000_handle_longcall_attribute, NULL },
1270 { "shortcall", 0, 0, false, true, true, false,
1271 rs6000_handle_longcall_attribute, NULL },
1272 { "ms_struct", 0, 0, false, false, false, false,
1273 rs6000_handle_struct_attribute, NULL },
1274 { "gcc_struct", 0, 0, false, false, false, false,
1275 rs6000_handle_struct_attribute, NULL },
1276 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1277 SUBTARGET_ATTRIBUTE_TABLE,
1278 #endif
1279 { NULL, 0, 0, false, false, false, false, NULL, NULL }
1280 };
1281 \f
1282 #ifndef TARGET_PROFILE_KERNEL
1283 #define TARGET_PROFILE_KERNEL 0
1284 #endif
1285 \f
1286 /* Initialize the GCC target structure. */
1287 #undef TARGET_ATTRIBUTE_TABLE
1288 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1289 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1290 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1291 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1292 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1293
1294 #undef TARGET_ASM_ALIGNED_DI_OP
1295 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1296
1297 /* Default unaligned ops are only provided for ELF. Find the ops needed
1298 for non-ELF systems. */
1299 #ifndef OBJECT_FORMAT_ELF
1300 #if TARGET_XCOFF
1301 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1302 64-bit targets. */
1303 #undef TARGET_ASM_UNALIGNED_HI_OP
1304 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1305 #undef TARGET_ASM_UNALIGNED_SI_OP
1306 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1307 #undef TARGET_ASM_UNALIGNED_DI_OP
1308 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1309 #else
1310 /* For Darwin. */
1311 #undef TARGET_ASM_UNALIGNED_HI_OP
1312 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1313 #undef TARGET_ASM_UNALIGNED_SI_OP
1314 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1315 #undef TARGET_ASM_UNALIGNED_DI_OP
1316 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1317 #undef TARGET_ASM_ALIGNED_DI_OP
1318 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1319 #endif
1320 #endif
1321
1322 /* This hook deals with fixups for relocatable code and DI-mode objects
1323 in 64-bit code. */
1324 #undef TARGET_ASM_INTEGER
1325 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1326
1327 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1328 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1329 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1330 #endif
1331
1332 #undef TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY
1333 #define TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY \
1334 rs6000_print_patchable_function_entry
1335
1336 #undef TARGET_SET_UP_BY_PROLOGUE
1337 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1338
1339 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1340 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1341 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1342 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1343 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1344 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1345 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1346 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1347 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1348 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1349 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1350 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1351
1352 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1353 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1354
1355 #undef TARGET_INTERNAL_ARG_POINTER
1356 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1357
1358 #undef TARGET_HAVE_TLS
1359 #define TARGET_HAVE_TLS HAVE_AS_TLS
1360
1361 #undef TARGET_CANNOT_FORCE_CONST_MEM
1362 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1363
1364 #undef TARGET_DELEGITIMIZE_ADDRESS
1365 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1366
1367 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1368 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1369
1370 #undef TARGET_LEGITIMATE_COMBINED_INSN
1371 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1372
1373 #undef TARGET_ASM_FUNCTION_PROLOGUE
1374 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1375 #undef TARGET_ASM_FUNCTION_EPILOGUE
1376 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1377
1378 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1379 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1380
1381 #undef TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC
1382 #define TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC rs6000_gen_pic_addr_diff_vec
1383
1384 #undef TARGET_LEGITIMIZE_ADDRESS
1385 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1386
1387 #undef TARGET_SCHED_VARIABLE_ISSUE
1388 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1389
1390 #undef TARGET_SCHED_ISSUE_RATE
1391 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1392 #undef TARGET_SCHED_ADJUST_COST
1393 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1394 #undef TARGET_SCHED_ADJUST_PRIORITY
1395 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1396 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1397 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1398 #undef TARGET_SCHED_INIT
1399 #define TARGET_SCHED_INIT rs6000_sched_init
1400 #undef TARGET_SCHED_FINISH
1401 #define TARGET_SCHED_FINISH rs6000_sched_finish
1402 #undef TARGET_SCHED_REORDER
1403 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1404 #undef TARGET_SCHED_REORDER2
1405 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1406
1407 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1408 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1409
1410 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1411 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1412
1413 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1414 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1415 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1416 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1417 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1418 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1419 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1420 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1421
1422 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1423 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1424
1425 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1426 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1427 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1428 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1429 rs6000_builtin_support_vector_misalignment
1430 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1431 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1432 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1433 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1434 rs6000_builtin_vectorization_cost
1435 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1436 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1437 rs6000_preferred_simd_mode
1438 #undef TARGET_VECTORIZE_CREATE_COSTS
1439 #define TARGET_VECTORIZE_CREATE_COSTS rs6000_vectorize_create_costs
1440
1441 #undef TARGET_LOOP_UNROLL_ADJUST
1442 #define TARGET_LOOP_UNROLL_ADJUST rs6000_loop_unroll_adjust
1443
1444 #undef TARGET_INIT_BUILTINS
1445 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1446 #undef TARGET_BUILTIN_DECL
1447 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1448
1449 #undef TARGET_FOLD_BUILTIN
1450 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1451 #undef TARGET_GIMPLE_FOLD_BUILTIN
1452 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1453
1454 #undef TARGET_EXPAND_BUILTIN
1455 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1456
1457 #undef TARGET_MANGLE_TYPE
1458 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1459
1460 #undef TARGET_INIT_LIBFUNCS
1461 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1462
1463 #if TARGET_MACHO
1464 #undef TARGET_BINDS_LOCAL_P
1465 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1466 #endif
1467
1468 #undef TARGET_MS_BITFIELD_LAYOUT_P
1469 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1470
1471 #undef TARGET_ASM_OUTPUT_MI_THUNK
1472 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1473
1474 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1475 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1476
1477 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1478 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1479
1480 #undef TARGET_REGISTER_MOVE_COST
1481 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1482 #undef TARGET_MEMORY_MOVE_COST
1483 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1484 #undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
1485 #define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
1486 rs6000_ira_change_pseudo_allocno_class
1487 #undef TARGET_CANNOT_COPY_INSN_P
1488 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1489 #undef TARGET_RTX_COSTS
1490 #define TARGET_RTX_COSTS rs6000_rtx_costs
1491 #undef TARGET_ADDRESS_COST
1492 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1493 #undef TARGET_INSN_COST
1494 #define TARGET_INSN_COST rs6000_insn_cost
1495
1496 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1497 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1498
1499 #undef TARGET_PROMOTE_FUNCTION_MODE
1500 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1501
1502 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
1503 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE rs6000_override_options_after_change
1504
1505 #undef TARGET_RETURN_IN_MEMORY
1506 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1507
1508 #undef TARGET_RETURN_IN_MSB
1509 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1510
1511 #undef TARGET_SETUP_INCOMING_VARARGS
1512 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1513
1514 /* Always strict argument naming on rs6000. */
1515 #undef TARGET_STRICT_ARGUMENT_NAMING
1516 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1517 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1518 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1519 #undef TARGET_SPLIT_COMPLEX_ARG
1520 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1521 #undef TARGET_MUST_PASS_IN_STACK
1522 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1523 #undef TARGET_PASS_BY_REFERENCE
1524 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1525 #undef TARGET_ARG_PARTIAL_BYTES
1526 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1527 #undef TARGET_FUNCTION_ARG_ADVANCE
1528 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1529 #undef TARGET_FUNCTION_ARG
1530 #define TARGET_FUNCTION_ARG rs6000_function_arg
1531 #undef TARGET_FUNCTION_ARG_PADDING
1532 #define TARGET_FUNCTION_ARG_PADDING rs6000_function_arg_padding
1533 #undef TARGET_FUNCTION_ARG_BOUNDARY
1534 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1535
1536 #undef TARGET_BUILD_BUILTIN_VA_LIST
1537 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1538
1539 #undef TARGET_EXPAND_BUILTIN_VA_START
1540 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1541
1542 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1543 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1544
1545 #undef TARGET_EH_RETURN_FILTER_MODE
1546 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1547
1548 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1549 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1550
1551 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
1552 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
1553 rs6000_libgcc_floating_mode_supported_p
1554
1555 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1556 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1557
1558 #undef TARGET_FLOATN_MODE
1559 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1560
1561 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1562 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1563
1564 #undef TARGET_MD_ASM_ADJUST
1565 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1566
1567 #undef TARGET_OPTION_OVERRIDE
1568 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1569
1570 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1571 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1572 rs6000_builtin_vectorized_function
1573
1574 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1575 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1576 rs6000_builtin_md_vectorized_function
1577
1578 #undef TARGET_STACK_PROTECT_GUARD
1579 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1580
1581 #if !TARGET_MACHO
1582 #undef TARGET_STACK_PROTECT_FAIL
1583 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1584 #endif
1585
1586 #ifdef HAVE_AS_TLS
1587 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1588 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1589 #endif
1590
1591 /* Use a 32-bit anchor range. This leads to sequences like:
1592
1593 addis tmp,anchor,high
1594 add dest,tmp,low
1595
1596 where tmp itself acts as an anchor, and can be shared between
1597 accesses to the same 64k page. */
1598 #undef TARGET_MIN_ANCHOR_OFFSET
1599 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1600 #undef TARGET_MAX_ANCHOR_OFFSET
1601 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1602 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1603 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1604 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1605 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1606
1607 #undef TARGET_BUILTIN_RECIPROCAL
1608 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1609
1610 #undef TARGET_SECONDARY_RELOAD
1611 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1612 #undef TARGET_SECONDARY_MEMORY_NEEDED
1613 #define TARGET_SECONDARY_MEMORY_NEEDED rs6000_secondary_memory_needed
1614 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
1615 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE rs6000_secondary_memory_needed_mode
1616
1617 #undef TARGET_LEGITIMATE_ADDRESS_P
1618 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1619
1620 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1621 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1622
1623 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1624 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1625
1626 #undef TARGET_CAN_ELIMINATE
1627 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1628
1629 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1630 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1631
1632 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1633 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1634
1635 #undef TARGET_TRAMPOLINE_INIT
1636 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1637
1638 #undef TARGET_FUNCTION_VALUE
1639 #define TARGET_FUNCTION_VALUE rs6000_function_value
1640
1641 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1642 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1643
1644 #undef TARGET_OPTION_SAVE
1645 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1646
1647 #undef TARGET_OPTION_RESTORE
1648 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1649
1650 #undef TARGET_OPTION_PRINT
1651 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1652
1653 #undef TARGET_CAN_INLINE_P
1654 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1655
1656 #undef TARGET_SET_CURRENT_FUNCTION
1657 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1658
1659 #undef TARGET_LEGITIMATE_CONSTANT_P
1660 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1661
1662 #undef TARGET_VECTORIZE_VEC_PERM_CONST
1663 #define TARGET_VECTORIZE_VEC_PERM_CONST rs6000_vectorize_vec_perm_const
1664
1665 #undef TARGET_CAN_USE_DOLOOP_P
1666 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1667
1668 #undef TARGET_PREDICT_DOLOOP_P
1669 #define TARGET_PREDICT_DOLOOP_P rs6000_predict_doloop_p
1670
1671 #undef TARGET_HAVE_COUNT_REG_DECR_P
1672 #define TARGET_HAVE_COUNT_REG_DECR_P true
1673
1674 /* 1000000000 is infinite cost in IVOPTs. */
1675 #undef TARGET_DOLOOP_COST_FOR_GENERIC
1676 #define TARGET_DOLOOP_COST_FOR_GENERIC 1000000000
1677
1678 #undef TARGET_DOLOOP_COST_FOR_ADDRESS
1679 #define TARGET_DOLOOP_COST_FOR_ADDRESS 1000000000
1680
1681 #undef TARGET_PREFERRED_DOLOOP_MODE
1682 #define TARGET_PREFERRED_DOLOOP_MODE rs6000_preferred_doloop_mode
1683
1684 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1685 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1686
1687 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1688 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1689 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1690 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1691 #undef TARGET_UNWIND_WORD_MODE
1692 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1693
1694 #undef TARGET_OFFLOAD_OPTIONS
1695 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1696
1697 #undef TARGET_C_MODE_FOR_SUFFIX
1698 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1699
1700 #undef TARGET_INVALID_BINARY_OP
1701 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1702
1703 #undef TARGET_OPTAB_SUPPORTED_P
1704 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1705
1706 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1707 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1708
1709 #undef TARGET_COMPARE_VERSION_PRIORITY
1710 #define TARGET_COMPARE_VERSION_PRIORITY rs6000_compare_version_priority
1711
1712 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
1713 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
1714 rs6000_generate_version_dispatcher_body
1715
1716 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
1717 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
1718 rs6000_get_function_versions_dispatcher
1719
1720 #undef TARGET_OPTION_FUNCTION_VERSIONS
1721 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
1722
1723 #undef TARGET_HARD_REGNO_NREGS
1724 #define TARGET_HARD_REGNO_NREGS rs6000_hard_regno_nregs_hook
1725 #undef TARGET_HARD_REGNO_MODE_OK
1726 #define TARGET_HARD_REGNO_MODE_OK rs6000_hard_regno_mode_ok
1727
1728 #undef TARGET_MODES_TIEABLE_P
1729 #define TARGET_MODES_TIEABLE_P rs6000_modes_tieable_p
1730
1731 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
1732 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
1733 rs6000_hard_regno_call_part_clobbered
1734
1735 #undef TARGET_SLOW_UNALIGNED_ACCESS
1736 #define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access
1737
1738 #undef TARGET_CAN_CHANGE_MODE_CLASS
1739 #define TARGET_CAN_CHANGE_MODE_CLASS rs6000_can_change_mode_class
1740
1741 #undef TARGET_CONSTANT_ALIGNMENT
1742 #define TARGET_CONSTANT_ALIGNMENT rs6000_constant_alignment
1743
1744 #undef TARGET_STARTING_FRAME_OFFSET
1745 #define TARGET_STARTING_FRAME_OFFSET rs6000_starting_frame_offset
1746
1747 #undef TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P
1748 #define TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P hook_bool_void_true
1749
1750 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
1751 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME rs6000_mangle_decl_assembler_name
1752
1753 #undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P
1754 #define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P \
1755 rs6000_cannot_substitute_mem_equiv_p
1756
1757 #undef TARGET_INVALID_CONVERSION
1758 #define TARGET_INVALID_CONVERSION rs6000_invalid_conversion
1759
1760 #undef TARGET_NEED_IPA_FN_TARGET_INFO
1761 #define TARGET_NEED_IPA_FN_TARGET_INFO rs6000_need_ipa_fn_target_info
1762
1763 #undef TARGET_UPDATE_IPA_FN_TARGET_INFO
1764 #define TARGET_UPDATE_IPA_FN_TARGET_INFO rs6000_update_ipa_fn_target_info
1765 \f
1766
1767 /* Processor table. */
1768 struct rs6000_ptt
1769 {
1770 const char *const name; /* Canonical processor name. */
1771 const enum processor_type processor; /* Processor type enum value. */
1772 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1773 };
1774
1775 static struct rs6000_ptt const processor_target_table[] =
1776 {
1777 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1778 #include "rs6000-cpus.def"
1779 #undef RS6000_CPU
1780 };
1781
1782 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1783 name is invalid. */
1784
1785 static int
1786 rs6000_cpu_name_lookup (const char *name)
1787 {
1788 size_t i;
1789
1790 if (name != NULL)
1791 {
1792 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
1793 if (! strcmp (name, processor_target_table[i].name))
1794 return (int)i;
1795 }
1796
1797 return -1;
1798 }
1799
1800 \f
1801 /* Return number of consecutive hard regs needed starting at reg REGNO
1802 to hold something of mode MODE.
1803 This is ordinarily the length in words of a value of mode MODE
1804 but can be less for certain modes in special long registers.
1805
1806 POWER and PowerPC GPRs hold 32 bits worth;
1807 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1808
1809 static int
1810 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
1811 {
1812 unsigned HOST_WIDE_INT reg_size;
1813
1814 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
1815 128-bit floating point that can go in vector registers, which has VSX
1816 memory addressing. */
1817 if (FP_REGNO_P (regno))
1818 reg_size = (VECTOR_MEM_VSX_P (mode) || VECTOR_ALIGNMENT_P (mode)
1819 ? UNITS_PER_VSX_WORD
1820 : UNITS_PER_FP_WORD);
1821
1822 else if (ALTIVEC_REGNO_P (regno))
1823 reg_size = UNITS_PER_ALTIVEC_WORD;
1824
1825 else
1826 reg_size = UNITS_PER_WORD;
1827
1828 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
1829 }
1830
1831 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1832 MODE. */
1833 static int
1834 rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
1835 {
1836 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
1837
1838 if (COMPLEX_MODE_P (mode))
1839 mode = GET_MODE_INNER (mode);
1840
1841 /* Vector pair modes need even/odd VSX register pairs. Only allow vector
1842 registers. */
1843 if (mode == OOmode)
1844 return (TARGET_MMA && VSX_REGNO_P (regno) && (regno & 1) == 0);
1845
1846 /* MMA accumulator modes need FPR registers divisible by 4. */
1847 if (mode == XOmode)
1848 return (TARGET_MMA && FP_REGNO_P (regno) && (regno & 3) == 0);
1849
1850 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1851 register combinations, and use PTImode where we need to deal with quad
1852 word memory operations. Don't allow quad words in the argument or frame
1853 pointer registers, just registers 0..31. */
1854 if (mode == PTImode)
1855 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1856 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1857 && ((regno & 1) == 0));
1858
1859 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1860 implementations. Don't allow an item to be split between a FP register
1861 and an Altivec register. Allow TImode in all VSX registers if the user
1862 asked for it. */
1863 if (TARGET_VSX && VSX_REGNO_P (regno)
1864 && (VECTOR_MEM_VSX_P (mode)
1865 || VECTOR_ALIGNMENT_P (mode)
1866 || reg_addr[mode].scalar_in_vmx_p
1867 || mode == TImode
1868 || (TARGET_VADDUQM && mode == V1TImode)))
1869 {
1870 if (FP_REGNO_P (regno))
1871 return FP_REGNO_P (last_regno);
1872
1873 if (ALTIVEC_REGNO_P (regno))
1874 {
1875 if (GET_MODE_SIZE (mode) < 16 && !reg_addr[mode].scalar_in_vmx_p)
1876 return 0;
1877
1878 return ALTIVEC_REGNO_P (last_regno);
1879 }
1880 }
1881
1882 /* The GPRs can hold any mode, but values bigger than one register
1883 cannot go past R31. */
1884 if (INT_REGNO_P (regno))
1885 return INT_REGNO_P (last_regno);
1886
1887 /* The float registers (except for VSX vector modes) can only hold floating
1888 modes and DImode. */
1889 if (FP_REGNO_P (regno))
1890 {
1891 if (VECTOR_ALIGNMENT_P (mode))
1892 return false;
1893
1894 if (SCALAR_FLOAT_MODE_P (mode)
1895 && (mode != TDmode || (regno % 2) == 0)
1896 && FP_REGNO_P (last_regno))
1897 return 1;
1898
1899 if (GET_MODE_CLASS (mode) == MODE_INT)
1900 {
1901 if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
1902 return 1;
1903
1904 if (TARGET_P8_VECTOR && (mode == SImode))
1905 return 1;
1906
1907 if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode))
1908 return 1;
1909 }
1910
1911 return 0;
1912 }
1913
1914 /* The CR register can only hold CC modes. */
1915 if (CR_REGNO_P (regno))
1916 return GET_MODE_CLASS (mode) == MODE_CC;
1917
1918 if (CA_REGNO_P (regno))
1919 return mode == Pmode || mode == SImode;
1920
1921 /* AltiVec only in AldyVec registers. */
1922 if (ALTIVEC_REGNO_P (regno))
1923 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
1924 || mode == V1TImode);
1925
1926 /* We cannot put non-VSX TImode or PTImode anywhere except general register
1927 and it must be able to fit within the register set. */
1928
1929 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
1930 }
1931
1932 /* Implement TARGET_HARD_REGNO_NREGS. */
1933
1934 static unsigned int
1935 rs6000_hard_regno_nregs_hook (unsigned int regno, machine_mode mode)
1936 {
1937 return rs6000_hard_regno_nregs[mode][regno];
1938 }
1939
1940 /* Implement TARGET_HARD_REGNO_MODE_OK. */
1941
1942 static bool
1943 rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
1944 {
1945 return rs6000_hard_regno_mode_ok_p[mode][regno];
1946 }
1947
1948 /* Implement TARGET_MODES_TIEABLE_P.
1949
1950 PTImode cannot tie with other modes because PTImode is restricted to even
1951 GPR registers, and TImode can go in any GPR as well as VSX registers (PR
1952 57744).
1953
1954 Similarly, don't allow OOmode (vector pair, restricted to even VSX
1955 registers) or XOmode (vector quad, restricted to FPR registers divisible
1956 by 4) to tie with other modes.
1957
1958 Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
1959 128-bit floating point on VSX systems ties with other vectors. */
1960
1961 static bool
1962 rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
1963 {
1964 if (mode1 == PTImode || mode1 == OOmode || mode1 == XOmode
1965 || mode2 == PTImode || mode2 == OOmode || mode2 == XOmode)
1966 return mode1 == mode2;
1967
1968 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
1969 return ALTIVEC_OR_VSX_VECTOR_MODE (mode2);
1970 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode2))
1971 return false;
1972
1973 if (SCALAR_FLOAT_MODE_P (mode1))
1974 return SCALAR_FLOAT_MODE_P (mode2);
1975 if (SCALAR_FLOAT_MODE_P (mode2))
1976 return false;
1977
1978 if (GET_MODE_CLASS (mode1) == MODE_CC)
1979 return GET_MODE_CLASS (mode2) == MODE_CC;
1980 if (GET_MODE_CLASS (mode2) == MODE_CC)
1981 return false;
1982
1983 return true;
1984 }
1985
1986 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. */
1987
1988 static bool
1989 rs6000_hard_regno_call_part_clobbered (unsigned int, unsigned int regno,
1990 machine_mode mode)
1991 {
1992 if (TARGET_32BIT
1993 && TARGET_POWERPC64
1994 && GET_MODE_SIZE (mode) > 4
1995 && INT_REGNO_P (regno))
1996 return true;
1997
1998 if (TARGET_VSX
1999 && FP_REGNO_P (regno)
2000 && GET_MODE_SIZE (mode) > 8
2001 && !FLOAT128_2REG_P (mode))
2002 return true;
2003
2004 return false;
2005 }
2006
2007 /* Print interesting facts about registers. */
2008 static void
2009 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
2010 {
2011 int r, m;
2012
2013 for (r = first_regno; r <= last_regno; ++r)
2014 {
2015 const char *comma = "";
2016 int len;
2017
2018 if (first_regno == last_regno)
2019 fprintf (stderr, "%s:\t", reg_name);
2020 else
2021 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
2022
2023 len = 8;
2024 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2025 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
2026 {
2027 if (len > 70)
2028 {
2029 fprintf (stderr, ",\n\t");
2030 len = 8;
2031 comma = "";
2032 }
2033
2034 if (rs6000_hard_regno_nregs[m][r] > 1)
2035 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
2036 rs6000_hard_regno_nregs[m][r]);
2037 else
2038 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
2039
2040 comma = ", ";
2041 }
2042
2043 if (call_used_or_fixed_reg_p (r))
2044 {
2045 if (len > 70)
2046 {
2047 fprintf (stderr, ",\n\t");
2048 len = 8;
2049 comma = "";
2050 }
2051
2052 len += fprintf (stderr, "%s%s", comma, "call-used");
2053 comma = ", ";
2054 }
2055
2056 if (fixed_regs[r])
2057 {
2058 if (len > 70)
2059 {
2060 fprintf (stderr, ",\n\t");
2061 len = 8;
2062 comma = "";
2063 }
2064
2065 len += fprintf (stderr, "%s%s", comma, "fixed");
2066 comma = ", ";
2067 }
2068
2069 if (len > 70)
2070 {
2071 fprintf (stderr, ",\n\t");
2072 comma = "";
2073 }
2074
2075 len += fprintf (stderr, "%sreg-class = %s", comma,
2076 reg_class_names[(int)rs6000_regno_regclass[r]]);
2077 comma = ", ";
2078
2079 if (len > 70)
2080 {
2081 fprintf (stderr, ",\n\t");
2082 comma = "";
2083 }
2084
2085 fprintf (stderr, "%sregno = %d\n", comma, r);
2086 }
2087 }
2088
2089 static const char *
2090 rs6000_debug_vector_unit (enum rs6000_vector v)
2091 {
2092 const char *ret;
2093
2094 switch (v)
2095 {
2096 case VECTOR_NONE: ret = "none"; break;
2097 case VECTOR_ALTIVEC: ret = "altivec"; break;
2098 case VECTOR_VSX: ret = "vsx"; break;
2099 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
2100 default: ret = "unknown"; break;
2101 }
2102
2103 return ret;
2104 }
2105
2106 /* Inner function printing just the address mask for a particular reload
2107 register class. */
2108 DEBUG_FUNCTION char *
2109 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
2110 {
2111 static char ret[8];
2112 char *p = ret;
2113
2114 if ((mask & RELOAD_REG_VALID) != 0)
2115 *p++ = 'v';
2116 else if (keep_spaces)
2117 *p++ = ' ';
2118
2119 if ((mask & RELOAD_REG_MULTIPLE) != 0)
2120 *p++ = 'm';
2121 else if (keep_spaces)
2122 *p++ = ' ';
2123
2124 if ((mask & RELOAD_REG_INDEXED) != 0)
2125 *p++ = 'i';
2126 else if (keep_spaces)
2127 *p++ = ' ';
2128
2129 if ((mask & RELOAD_REG_QUAD_OFFSET) != 0)
2130 *p++ = 'O';
2131 else if ((mask & RELOAD_REG_OFFSET) != 0)
2132 *p++ = 'o';
2133 else if (keep_spaces)
2134 *p++ = ' ';
2135
2136 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2137 *p++ = '+';
2138 else if (keep_spaces)
2139 *p++ = ' ';
2140
2141 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2142 *p++ = '+';
2143 else if (keep_spaces)
2144 *p++ = ' ';
2145
2146 if ((mask & RELOAD_REG_AND_M16) != 0)
2147 *p++ = '&';
2148 else if (keep_spaces)
2149 *p++ = ' ';
2150
2151 *p = '\0';
2152
2153 return ret;
2154 }
2155
2156 /* Print the address masks in a human readble fashion. */
2157 DEBUG_FUNCTION void
2158 rs6000_debug_print_mode (ssize_t m)
2159 {
2160 ssize_t rc;
2161 int spaces = 0;
2162
2163 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2164 for (rc = 0; rc < N_RELOAD_REG; rc++)
2165 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2166 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2167
2168 if ((reg_addr[m].reload_store != CODE_FOR_nothing)
2169 || (reg_addr[m].reload_load != CODE_FOR_nothing))
2170 {
2171 fprintf (stderr, "%*s Reload=%c%c", spaces, "",
2172 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2173 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
2174 spaces = 0;
2175 }
2176 else
2177 spaces += strlen (" Reload=sl");
2178
2179 if (reg_addr[m].scalar_in_vmx_p)
2180 {
2181 fprintf (stderr, "%*s Upper=y", spaces, "");
2182 spaces = 0;
2183 }
2184 else
2185 spaces += strlen (" Upper=y");
2186
2187 if (rs6000_vector_unit[m] != VECTOR_NONE
2188 || rs6000_vector_mem[m] != VECTOR_NONE)
2189 {
2190 fprintf (stderr, "%*s vector: arith=%-10s mem=%s",
2191 spaces, "",
2192 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2193 rs6000_debug_vector_unit (rs6000_vector_mem[m]));
2194 }
2195
2196 fputs ("\n", stderr);
2197 }
2198
2199 #define DEBUG_FMT_ID "%-32s= "
2200 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2201 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2202 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2203
2204 /* Print various interesting information with -mdebug=reg. */
2205 static void
2206 rs6000_debug_reg_global (void)
2207 {
2208 static const char *const tf[2] = { "false", "true" };
2209 const char *nl = (const char *)0;
2210 int m;
2211 size_t m1, m2, v;
2212 char costly_num[20];
2213 char nop_num[20];
2214 char flags_buffer[40];
2215 const char *costly_str;
2216 const char *nop_str;
2217 const char *trace_str;
2218 const char *abi_str;
2219 const char *cmodel_str;
2220 struct cl_target_option cl_opts;
2221
2222 /* Modes we want tieable information on. */
2223 static const machine_mode print_tieable_modes[] = {
2224 QImode,
2225 HImode,
2226 SImode,
2227 DImode,
2228 TImode,
2229 PTImode,
2230 SFmode,
2231 DFmode,
2232 TFmode,
2233 IFmode,
2234 KFmode,
2235 SDmode,
2236 DDmode,
2237 TDmode,
2238 V2SImode,
2239 V2SFmode,
2240 V16QImode,
2241 V8HImode,
2242 V4SImode,
2243 V2DImode,
2244 V1TImode,
2245 V32QImode,
2246 V16HImode,
2247 V8SImode,
2248 V4DImode,
2249 V2TImode,
2250 V4SFmode,
2251 V2DFmode,
2252 V8SFmode,
2253 V4DFmode,
2254 OOmode,
2255 XOmode,
2256 CCmode,
2257 CCUNSmode,
2258 CCEQmode,
2259 CCFPmode,
2260 };
2261
2262 /* Virtual regs we are interested in. */
2263 const static struct {
2264 int regno; /* register number. */
2265 const char *name; /* register name. */
2266 } virtual_regs[] = {
2267 { STACK_POINTER_REGNUM, "stack pointer:" },
2268 { TOC_REGNUM, "toc: " },
2269 { STATIC_CHAIN_REGNUM, "static chain: " },
2270 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2271 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2272 { ARG_POINTER_REGNUM, "arg pointer: " },
2273 { FRAME_POINTER_REGNUM, "frame pointer:" },
2274 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2275 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2276 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2277 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2278 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2279 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2280 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2281 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2282 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2283 };
2284
2285 fputs ("\nHard register information:\n", stderr);
2286 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2287 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2288 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2289 LAST_ALTIVEC_REGNO,
2290 "vs");
2291 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2292 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2293 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2294 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2295 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2296 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2297
2298 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2299 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2300 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2301
2302 fprintf (stderr,
2303 "\n"
2304 "d reg_class = %s\n"
2305 "v reg_class = %s\n"
2306 "wa reg_class = %s\n"
2307 "we reg_class = %s\n"
2308 "wr reg_class = %s\n"
2309 "wx reg_class = %s\n"
2310 "wA reg_class = %s\n"
2311 "\n",
2312 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2313 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2314 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2315 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
2316 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2317 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2318 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]]);
2319
2320 nl = "\n";
2321 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2322 rs6000_debug_print_mode (m);
2323
2324 fputs ("\n", stderr);
2325
2326 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2327 {
2328 machine_mode mode1 = print_tieable_modes[m1];
2329 bool first_time = true;
2330
2331 nl = (const char *)0;
2332 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2333 {
2334 machine_mode mode2 = print_tieable_modes[m2];
2335 if (mode1 != mode2 && rs6000_modes_tieable_p (mode1, mode2))
2336 {
2337 if (first_time)
2338 {
2339 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2340 nl = "\n";
2341 first_time = false;
2342 }
2343
2344 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2345 }
2346 }
2347
2348 if (!first_time)
2349 fputs ("\n", stderr);
2350 }
2351
2352 if (nl)
2353 fputs (nl, stderr);
2354
2355 if (rs6000_recip_control)
2356 {
2357 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2358
2359 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2360 if (rs6000_recip_bits[m])
2361 {
2362 fprintf (stderr,
2363 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2364 GET_MODE_NAME (m),
2365 (RS6000_RECIP_AUTO_RE_P (m)
2366 ? "auto"
2367 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2368 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2369 ? "auto"
2370 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2371 }
2372
2373 fputs ("\n", stderr);
2374 }
2375
2376 if (rs6000_cpu_index >= 0)
2377 {
2378 const char *name = processor_target_table[rs6000_cpu_index].name;
2379 HOST_WIDE_INT flags
2380 = processor_target_table[rs6000_cpu_index].target_enable;
2381
2382 sprintf (flags_buffer, "-mcpu=%s flags", name);
2383 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2384 }
2385 else
2386 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2387
2388 if (rs6000_tune_index >= 0)
2389 {
2390 const char *name = processor_target_table[rs6000_tune_index].name;
2391 HOST_WIDE_INT flags
2392 = processor_target_table[rs6000_tune_index].target_enable;
2393
2394 sprintf (flags_buffer, "-mtune=%s flags", name);
2395 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2396 }
2397 else
2398 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2399
2400 cl_target_option_save (&cl_opts, &global_options, &global_options_set);
2401 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2402 rs6000_isa_flags);
2403
2404 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2405 rs6000_isa_flags_explicit);
2406
2407 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2408 rs6000_builtin_mask);
2409
2410 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2411
2412 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2413 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2414
2415 switch (rs6000_sched_costly_dep)
2416 {
2417 case max_dep_latency:
2418 costly_str = "max_dep_latency";
2419 break;
2420
2421 case no_dep_costly:
2422 costly_str = "no_dep_costly";
2423 break;
2424
2425 case all_deps_costly:
2426 costly_str = "all_deps_costly";
2427 break;
2428
2429 case true_store_to_load_dep_costly:
2430 costly_str = "true_store_to_load_dep_costly";
2431 break;
2432
2433 case store_to_load_dep_costly:
2434 costly_str = "store_to_load_dep_costly";
2435 break;
2436
2437 default:
2438 costly_str = costly_num;
2439 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2440 break;
2441 }
2442
2443 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2444
2445 switch (rs6000_sched_insert_nops)
2446 {
2447 case sched_finish_regroup_exact:
2448 nop_str = "sched_finish_regroup_exact";
2449 break;
2450
2451 case sched_finish_pad_groups:
2452 nop_str = "sched_finish_pad_groups";
2453 break;
2454
2455 case sched_finish_none:
2456 nop_str = "sched_finish_none";
2457 break;
2458
2459 default:
2460 nop_str = nop_num;
2461 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2462 break;
2463 }
2464
2465 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2466
2467 switch (rs6000_sdata)
2468 {
2469 default:
2470 case SDATA_NONE:
2471 break;
2472
2473 case SDATA_DATA:
2474 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2475 break;
2476
2477 case SDATA_SYSV:
2478 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2479 break;
2480
2481 case SDATA_EABI:
2482 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2483 break;
2484
2485 }
2486
2487 switch (rs6000_traceback)
2488 {
2489 case traceback_default: trace_str = "default"; break;
2490 case traceback_none: trace_str = "none"; break;
2491 case traceback_part: trace_str = "part"; break;
2492 case traceback_full: trace_str = "full"; break;
2493 default: trace_str = "unknown"; break;
2494 }
2495
2496 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2497
2498 switch (rs6000_current_cmodel)
2499 {
2500 case CMODEL_SMALL: cmodel_str = "small"; break;
2501 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2502 case CMODEL_LARGE: cmodel_str = "large"; break;
2503 default: cmodel_str = "unknown"; break;
2504 }
2505
2506 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2507
2508 switch (rs6000_current_abi)
2509 {
2510 case ABI_NONE: abi_str = "none"; break;
2511 case ABI_AIX: abi_str = "aix"; break;
2512 case ABI_ELFv2: abi_str = "ELFv2"; break;
2513 case ABI_V4: abi_str = "V4"; break;
2514 case ABI_DARWIN: abi_str = "darwin"; break;
2515 default: abi_str = "unknown"; break;
2516 }
2517
2518 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2519
2520 if (rs6000_altivec_abi)
2521 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2522
2523 if (rs6000_aix_extabi)
2524 fprintf (stderr, DEBUG_FMT_S, "AIX vec-extabi", "true");
2525
2526 if (rs6000_darwin64_abi)
2527 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2528
2529 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2530 (TARGET_SOFT_FLOAT ? "true" : "false"));
2531
2532 if (TARGET_LINK_STACK)
2533 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2534
2535 if (TARGET_P8_FUSION)
2536 {
2537 char options[80];
2538
2539 strcpy (options, "power8");
2540 if (TARGET_P8_FUSION_SIGN)
2541 strcat (options, ", sign");
2542
2543 fprintf (stderr, DEBUG_FMT_S, "fusion", options);
2544 }
2545
2546 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2547 TARGET_SECURE_PLT ? "secure" : "bss");
2548 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2549 aix_struct_return ? "aix" : "sysv");
2550 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2551 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2552 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2553 tf[!!rs6000_align_branch_targets]);
2554 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2555 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2556 rs6000_long_double_type_size);
2557 if (rs6000_long_double_type_size > 64)
2558 {
2559 fprintf (stderr, DEBUG_FMT_S, "long double type",
2560 TARGET_IEEEQUAD ? "IEEE" : "IBM");
2561 fprintf (stderr, DEBUG_FMT_S, "default long double type",
2562 TARGET_IEEEQUAD_DEFAULT ? "IEEE" : "IBM");
2563 }
2564 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2565 (int)rs6000_sched_restricted_insns_priority);
2566 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2567 (int)END_BUILTINS);
2568
2569 fprintf (stderr, DEBUG_FMT_D, "Enable float128 on VSX",
2570 (int)TARGET_FLOAT128_ENABLE_TYPE);
2571
2572 if (TARGET_VSX)
2573 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2574 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2575
2576 if (TARGET_DIRECT_MOVE_128)
2577 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element",
2578 (int)VECTOR_ELEMENT_MFVSRLD_64BIT);
2579 }
2580
2581 \f
2582 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2583 legitimate address support to figure out the appropriate addressing to
2584 use. */
2585
2586 static void
2587 rs6000_setup_reg_addr_masks (void)
2588 {
2589 ssize_t rc, reg, m, nregs;
2590 addr_mask_type any_addr_mask, addr_mask;
2591
2592 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2593 {
2594 machine_mode m2 = (machine_mode) m;
2595 bool complex_p = false;
2596 bool small_int_p = (m2 == QImode || m2 == HImode || m2 == SImode);
2597 size_t msize;
2598
2599 if (COMPLEX_MODE_P (m2))
2600 {
2601 complex_p = true;
2602 m2 = GET_MODE_INNER (m2);
2603 }
2604
2605 msize = GET_MODE_SIZE (m2);
2606
2607 /* SDmode is special in that we want to access it only via REG+REG
2608 addressing on power7 and above, since we want to use the LFIWZX and
2609 STFIWZX instructions to load it. */
2610 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2611
2612 any_addr_mask = 0;
2613 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2614 {
2615 addr_mask = 0;
2616 reg = reload_reg_map[rc].reg;
2617
2618 /* Can mode values go in the GPR/FPR/Altivec registers? */
2619 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2620 {
2621 bool small_int_vsx_p = (small_int_p
2622 && (rc == RELOAD_REG_FPR
2623 || rc == RELOAD_REG_VMX));
2624
2625 nregs = rs6000_hard_regno_nregs[m][reg];
2626 addr_mask |= RELOAD_REG_VALID;
2627
2628 /* Indicate if the mode takes more than 1 physical register. If
2629 it takes a single register, indicate it can do REG+REG
2630 addressing. Small integers in VSX registers can only do
2631 REG+REG addressing. */
2632 if (small_int_vsx_p)
2633 addr_mask |= RELOAD_REG_INDEXED;
2634 else if (nregs > 1 || m == BLKmode || complex_p)
2635 addr_mask |= RELOAD_REG_MULTIPLE;
2636 else
2637 addr_mask |= RELOAD_REG_INDEXED;
2638
2639 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2640 addressing. If we allow scalars into Altivec registers,
2641 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY.
2642
2643 For VSX systems, we don't allow update addressing for
2644 DFmode/SFmode if those registers can go in both the
2645 traditional floating point registers and Altivec registers.
2646 The load/store instructions for the Altivec registers do not
2647 have update forms. If we allowed update addressing, it seems
2648 to break IV-OPT code using floating point if the index type is
2649 int instead of long (PR target/81550 and target/84042). */
2650
2651 if (TARGET_UPDATE
2652 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2653 && msize <= 8
2654 && !VECTOR_MODE_P (m2)
2655 && !VECTOR_ALIGNMENT_P (m2)
2656 && !complex_p
2657 && (m != E_DFmode || !TARGET_VSX)
2658 && (m != E_SFmode || !TARGET_P8_VECTOR)
2659 && !small_int_vsx_p)
2660 {
2661 addr_mask |= RELOAD_REG_PRE_INCDEC;
2662
2663 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2664 we don't allow PRE_MODIFY for some multi-register
2665 operations. */
2666 switch (m)
2667 {
2668 default:
2669 addr_mask |= RELOAD_REG_PRE_MODIFY;
2670 break;
2671
2672 case E_DImode:
2673 if (TARGET_POWERPC64)
2674 addr_mask |= RELOAD_REG_PRE_MODIFY;
2675 break;
2676
2677 case E_DFmode:
2678 case E_DDmode:
2679 if (TARGET_HARD_FLOAT)
2680 addr_mask |= RELOAD_REG_PRE_MODIFY;
2681 break;
2682 }
2683 }
2684 }
2685
2686 /* GPR and FPR registers can do REG+OFFSET addressing, except
2687 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
2688 for 64-bit scalars and 32-bit SFmode to altivec registers. */
2689 if ((addr_mask != 0) && !indexed_only_p
2690 && msize <= 8
2691 && (rc == RELOAD_REG_GPR
2692 || ((msize == 8 || m2 == SFmode)
2693 && (rc == RELOAD_REG_FPR
2694 || (rc == RELOAD_REG_VMX && TARGET_P9_VECTOR)))))
2695 addr_mask |= RELOAD_REG_OFFSET;
2696
2697 /* IBM 128-bit can do REG+OFFSET addressing. */
2698 else if ((addr_mask != 0) && !indexed_only_p
2699 && FLOAT128_IBM_P (m))
2700 addr_mask |= RELOAD_REG_OFFSET;
2701
2702 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2703 instructions are enabled. The offset for 128-bit VSX registers is
2704 only 12-bits. While GPRs can handle the full offset range, VSX
2705 registers can only handle the restricted range. */
2706 else if ((addr_mask != 0) && !indexed_only_p
2707 && msize == 16 && TARGET_P9_VECTOR
2708 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2)
2709 || (m2 == TImode && TARGET_VSX)))
2710 {
2711 addr_mask |= RELOAD_REG_OFFSET;
2712 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2713 addr_mask |= RELOAD_REG_QUAD_OFFSET;
2714 }
2715
2716 /* Vector pairs can do both indexed and offset loads if the
2717 instructions are enabled, otherwise they can only do offset loads
2718 since it will be broken into two vector moves. Vector quads can
2719 only do offset loads. */
2720 else if ((addr_mask != 0) && TARGET_MMA
2721 && (m2 == OOmode || m2 == XOmode))
2722 {
2723 addr_mask |= RELOAD_REG_OFFSET;
2724 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2725 {
2726 addr_mask |= RELOAD_REG_QUAD_OFFSET;
2727 if (m2 == OOmode)
2728 addr_mask |= RELOAD_REG_INDEXED;
2729 }
2730 }
2731
2732 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2733 addressing on 128-bit types. */
2734 if (rc == RELOAD_REG_VMX && msize == 16
2735 && (addr_mask & RELOAD_REG_VALID) != 0)
2736 addr_mask |= RELOAD_REG_AND_M16;
2737
2738 reg_addr[m].addr_mask[rc] = addr_mask;
2739 any_addr_mask |= addr_mask;
2740 }
2741
2742 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2743 }
2744 }
2745
2746 \f
2747 /* Initialize the various global tables that are based on register size. */
2748 static void
2749 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2750 {
2751 ssize_t r, m, c;
2752 int align64;
2753 int align32;
2754
2755 /* Precalculate REGNO_REG_CLASS. */
2756 rs6000_regno_regclass[0] = GENERAL_REGS;
2757 for (r = 1; r < 32; ++r)
2758 rs6000_regno_regclass[r] = BASE_REGS;
2759
2760 for (r = 32; r < 64; ++r)
2761 rs6000_regno_regclass[r] = FLOAT_REGS;
2762
2763 for (r = 64; HARD_REGISTER_NUM_P (r); ++r)
2764 rs6000_regno_regclass[r] = NO_REGS;
2765
2766 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2767 rs6000_regno_regclass[r] = ALTIVEC_REGS;
2768
2769 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2770 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
2771 rs6000_regno_regclass[r] = CR_REGS;
2772
2773 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
2774 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
2775 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
2776 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
2777 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
2778 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
2779 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
2780
2781 /* Precalculate register class to simpler reload register class. We don't
2782 need all of the register classes that are combinations of different
2783 classes, just the simple ones that have constraint letters. */
2784 for (c = 0; c < N_REG_CLASSES; c++)
2785 reg_class_to_reg_type[c] = NO_REG_TYPE;
2786
2787 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
2788 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
2789 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
2790 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
2791 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
2792 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
2793 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
2794 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
2795 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
2796 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
2797
2798 if (TARGET_VSX)
2799 {
2800 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
2801 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
2802 }
2803 else
2804 {
2805 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
2806 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
2807 }
2808
2809 /* Precalculate the valid memory formats as well as the vector information,
2810 this must be set up before the rs6000_hard_regno_nregs_internal calls
2811 below. */
2812 gcc_assert ((int)VECTOR_NONE == 0);
2813 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
2814 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_mem));
2815
2816 gcc_assert ((int)CODE_FOR_nothing == 0);
2817 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
2818
2819 gcc_assert ((int)NO_REGS == 0);
2820 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
2821
2822 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2823 believes it can use native alignment or still uses 128-bit alignment. */
2824 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
2825 {
2826 align64 = 64;
2827 align32 = 32;
2828 }
2829 else
2830 {
2831 align64 = 128;
2832 align32 = 128;
2833 }
2834
2835 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
2836 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
2837 if (TARGET_FLOAT128_TYPE)
2838 {
2839 rs6000_vector_mem[KFmode] = VECTOR_VSX;
2840 rs6000_vector_align[KFmode] = 128;
2841
2842 if (FLOAT128_IEEE_P (TFmode))
2843 {
2844 rs6000_vector_mem[TFmode] = VECTOR_VSX;
2845 rs6000_vector_align[TFmode] = 128;
2846 }
2847 }
2848
2849 /* V2DF mode, VSX only. */
2850 if (TARGET_VSX)
2851 {
2852 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
2853 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
2854 rs6000_vector_align[V2DFmode] = align64;
2855 }
2856
2857 /* V4SF mode, either VSX or Altivec. */
2858 if (TARGET_VSX)
2859 {
2860 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
2861 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
2862 rs6000_vector_align[V4SFmode] = align32;
2863 }
2864 else if (TARGET_ALTIVEC)
2865 {
2866 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
2867 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
2868 rs6000_vector_align[V4SFmode] = align32;
2869 }
2870
2871 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2872 and stores. */
2873 if (TARGET_ALTIVEC)
2874 {
2875 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
2876 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
2877 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
2878 rs6000_vector_align[V4SImode] = align32;
2879 rs6000_vector_align[V8HImode] = align32;
2880 rs6000_vector_align[V16QImode] = align32;
2881
2882 if (TARGET_VSX)
2883 {
2884 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
2885 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
2886 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
2887 }
2888 else
2889 {
2890 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
2891 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
2892 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
2893 }
2894 }
2895
2896 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
2897 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
2898 if (TARGET_VSX)
2899 {
2900 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
2901 rs6000_vector_unit[V2DImode]
2902 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2903 rs6000_vector_align[V2DImode] = align64;
2904
2905 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
2906 rs6000_vector_unit[V1TImode]
2907 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2908 rs6000_vector_align[V1TImode] = 128;
2909 }
2910
2911 /* DFmode, see if we want to use the VSX unit. Memory is handled
2912 differently, so don't set rs6000_vector_mem. */
2913 if (TARGET_VSX)
2914 {
2915 rs6000_vector_unit[DFmode] = VECTOR_VSX;
2916 rs6000_vector_align[DFmode] = 64;
2917 }
2918
2919 /* SFmode, see if we want to use the VSX unit. */
2920 if (TARGET_P8_VECTOR)
2921 {
2922 rs6000_vector_unit[SFmode] = VECTOR_VSX;
2923 rs6000_vector_align[SFmode] = 32;
2924 }
2925
2926 /* Allow TImode in VSX register and set the VSX memory macros. */
2927 if (TARGET_VSX)
2928 {
2929 rs6000_vector_mem[TImode] = VECTOR_VSX;
2930 rs6000_vector_align[TImode] = align64;
2931 }
2932
2933 /* Add support for vector pairs and vector quad registers. */
2934 if (TARGET_MMA)
2935 {
2936 rs6000_vector_unit[OOmode] = VECTOR_NONE;
2937 rs6000_vector_mem[OOmode] = VECTOR_VSX;
2938 rs6000_vector_align[OOmode] = 256;
2939
2940 rs6000_vector_unit[XOmode] = VECTOR_NONE;
2941 rs6000_vector_mem[XOmode] = VECTOR_VSX;
2942 rs6000_vector_align[XOmode] = 512;
2943 }
2944
2945 /* Register class constraints for the constraints that depend on compile
2946 switches. When the VSX code was added, different constraints were added
2947 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
2948 of the VSX registers are used. The register classes for scalar floating
2949 point types is set, based on whether we allow that type into the upper
2950 (Altivec) registers. GCC has register classes to target the Altivec
2951 registers for load/store operations, to select using a VSX memory
2952 operation instead of the traditional floating point operation. The
2953 constraints are:
2954
2955 d - Register class to use with traditional DFmode instructions.
2956 v - Altivec register.
2957 wa - Any VSX register.
2958 wc - Reserved to represent individual CR bits (used in LLVM).
2959 wn - always NO_REGS.
2960 wr - GPR if 64-bit mode is permitted.
2961 wx - Float register if we can do 32-bit int stores. */
2962
2963 if (TARGET_HARD_FLOAT)
2964 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS;
2965 if (TARGET_ALTIVEC)
2966 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
2967 if (TARGET_VSX)
2968 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
2969
2970 if (TARGET_POWERPC64)
2971 {
2972 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
2973 rs6000_constraints[RS6000_CONSTRAINT_wA] = BASE_REGS;
2974 }
2975
2976 if (TARGET_STFIWX)
2977 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
2978
2979 /* Support for new direct moves (ISA 3.0 + 64bit). */
2980 if (TARGET_DIRECT_MOVE_128)
2981 rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
2982
2983 /* Set up the reload helper and direct move functions. */
2984 if (TARGET_VSX || TARGET_ALTIVEC)
2985 {
2986 if (TARGET_64BIT)
2987 {
2988 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
2989 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
2990 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
2991 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
2992 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
2993 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
2994 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
2995 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
2996 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
2997 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
2998 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
2999 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
3000 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
3001 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
3002 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
3003 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
3004 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
3005 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
3006 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
3007 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
3008
3009 if (FLOAT128_VECTOR_P (KFmode))
3010 {
3011 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store;
3012 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load;
3013 }
3014
3015 if (FLOAT128_VECTOR_P (TFmode))
3016 {
3017 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store;
3018 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load;
3019 }
3020
3021 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3022 available. */
3023 if (TARGET_NO_SDMODE_STACK)
3024 {
3025 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
3026 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
3027 }
3028
3029 if (TARGET_VSX)
3030 {
3031 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
3032 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
3033 }
3034
3035 if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128)
3036 {
3037 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
3038 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
3039 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
3040 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
3041 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
3042 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
3043 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
3044 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
3045 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
3046
3047 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
3048 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
3049 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
3050 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
3051 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
3052 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
3053 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
3054 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
3055 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
3056
3057 if (FLOAT128_VECTOR_P (KFmode))
3058 {
3059 reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf;
3060 reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf;
3061 }
3062
3063 if (FLOAT128_VECTOR_P (TFmode))
3064 {
3065 reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf;
3066 reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf;
3067 }
3068
3069 if (TARGET_MMA)
3070 {
3071 reg_addr[OOmode].reload_store = CODE_FOR_reload_oo_di_store;
3072 reg_addr[OOmode].reload_load = CODE_FOR_reload_oo_di_load;
3073 reg_addr[XOmode].reload_store = CODE_FOR_reload_xo_di_store;
3074 reg_addr[XOmode].reload_load = CODE_FOR_reload_xo_di_load;
3075 }
3076 }
3077 }
3078 else
3079 {
3080 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
3081 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
3082 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
3083 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
3084 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
3085 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
3086 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
3087 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
3088 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
3089 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
3090 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
3091 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
3092 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
3093 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
3094 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
3095 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
3096 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
3097 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
3098 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
3099 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
3100
3101 if (FLOAT128_VECTOR_P (KFmode))
3102 {
3103 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store;
3104 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load;
3105 }
3106
3107 if (FLOAT128_IEEE_P (TFmode))
3108 {
3109 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store;
3110 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load;
3111 }
3112
3113 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3114 available. */
3115 if (TARGET_NO_SDMODE_STACK)
3116 {
3117 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
3118 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
3119 }
3120
3121 if (TARGET_VSX)
3122 {
3123 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
3124 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
3125 }
3126
3127 if (TARGET_DIRECT_MOVE)
3128 {
3129 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
3130 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
3131 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
3132 }
3133 }
3134
3135 reg_addr[DFmode].scalar_in_vmx_p = true;
3136 reg_addr[DImode].scalar_in_vmx_p = true;
3137
3138 if (TARGET_P8_VECTOR)
3139 {
3140 reg_addr[SFmode].scalar_in_vmx_p = true;
3141 reg_addr[SImode].scalar_in_vmx_p = true;
3142
3143 if (TARGET_P9_VECTOR)
3144 {
3145 reg_addr[HImode].scalar_in_vmx_p = true;
3146 reg_addr[QImode].scalar_in_vmx_p = true;
3147 }
3148 }
3149 }
3150
3151 /* Precalculate HARD_REGNO_NREGS. */
3152 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3153 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3154 rs6000_hard_regno_nregs[m][r]
3155 = rs6000_hard_regno_nregs_internal (r, (machine_mode) m);
3156
3157 /* Precalculate TARGET_HARD_REGNO_MODE_OK. */
3158 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3159 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3160 rs6000_hard_regno_mode_ok_p[m][r]
3161 = rs6000_hard_regno_mode_ok_uncached (r, (machine_mode) m);
3162
3163 /* Precalculate CLASS_MAX_NREGS sizes. */
3164 for (c = 0; c < LIM_REG_CLASSES; ++c)
3165 {
3166 int reg_size;
3167
3168 if (TARGET_VSX && VSX_REG_CLASS_P (c))
3169 reg_size = UNITS_PER_VSX_WORD;
3170
3171 else if (c == ALTIVEC_REGS)
3172 reg_size = UNITS_PER_ALTIVEC_WORD;
3173
3174 else if (c == FLOAT_REGS)
3175 reg_size = UNITS_PER_FP_WORD;
3176
3177 else
3178 reg_size = UNITS_PER_WORD;
3179
3180 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3181 {
3182 machine_mode m2 = (machine_mode)m;
3183 int reg_size2 = reg_size;
3184
3185 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3186 in VSX. */
3187 if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m))
3188 reg_size2 = UNITS_PER_FP_WORD;
3189
3190 rs6000_class_max_nregs[m][c]
3191 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3192 }
3193 }
3194
3195 /* Calculate which modes to automatically generate code to use a the
3196 reciprocal divide and square root instructions. In the future, possibly
3197 automatically generate the instructions even if the user did not specify
3198 -mrecip. The older machines double precision reciprocal sqrt estimate is
3199 not accurate enough. */
3200 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3201 if (TARGET_FRES)
3202 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3203 if (TARGET_FRE)
3204 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3205 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3206 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3207 if (VECTOR_UNIT_VSX_P (V2DFmode))
3208 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3209
3210 if (TARGET_FRSQRTES)
3211 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3212 if (TARGET_FRSQRTE)
3213 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3214 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3215 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3216 if (VECTOR_UNIT_VSX_P (V2DFmode))
3217 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3218
3219 if (rs6000_recip_control)
3220 {
3221 if (!flag_finite_math_only)
3222 warning (0, "%qs requires %qs or %qs", "-mrecip", "-ffinite-math",
3223 "-ffast-math");
3224 if (flag_trapping_math)
3225 warning (0, "%qs requires %qs or %qs", "-mrecip",
3226 "-fno-trapping-math", "-ffast-math");
3227 if (!flag_reciprocal_math)
3228 warning (0, "%qs requires %qs or %qs", "-mrecip", "-freciprocal-math",
3229 "-ffast-math");
3230 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3231 {
3232 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3233 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3234 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3235
3236 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3237 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3238 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3239
3240 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3241 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3242 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3243
3244 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3245 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3246 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3247
3248 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3249 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3250 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3251
3252 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3253 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3254 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3255
3256 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3257 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3258 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3259
3260 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3261 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3262 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3263 }
3264 }
3265
3266 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3267 legitimate address support to figure out the appropriate addressing to
3268 use. */
3269 rs6000_setup_reg_addr_masks ();
3270
3271 if (global_init_p || TARGET_DEBUG_TARGET)
3272 {
3273 if (TARGET_DEBUG_REG)
3274 rs6000_debug_reg_global ();
3275
3276 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3277 fprintf (stderr,
3278 "SImode variable mult cost = %d\n"
3279 "SImode constant mult cost = %d\n"
3280 "SImode short constant mult cost = %d\n"
3281 "DImode multipliciation cost = %d\n"
3282 "SImode division cost = %d\n"
3283 "DImode division cost = %d\n"
3284 "Simple fp operation cost = %d\n"
3285 "DFmode multiplication cost = %d\n"
3286 "SFmode division cost = %d\n"
3287 "DFmode division cost = %d\n"
3288 "cache line size = %d\n"
3289 "l1 cache size = %d\n"
3290 "l2 cache size = %d\n"
3291 "simultaneous prefetches = %d\n"
3292 "\n",
3293 rs6000_cost->mulsi,
3294 rs6000_cost->mulsi_const,
3295 rs6000_cost->mulsi_const9,
3296 rs6000_cost->muldi,
3297 rs6000_cost->divsi,
3298 rs6000_cost->divdi,
3299 rs6000_cost->fp,
3300 rs6000_cost->dmul,
3301 rs6000_cost->sdiv,
3302 rs6000_cost->ddiv,
3303 rs6000_cost->cache_line_size,
3304 rs6000_cost->l1_cache_size,
3305 rs6000_cost->l2_cache_size,
3306 rs6000_cost->simultaneous_prefetches);
3307 }
3308 }
3309
3310 #if TARGET_MACHO
3311 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3312
3313 static void
3314 darwin_rs6000_override_options (void)
3315 {
3316 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3317 off. */
3318 rs6000_altivec_abi = 1;
3319 TARGET_ALTIVEC_VRSAVE = 1;
3320 rs6000_current_abi = ABI_DARWIN;
3321
3322 if (DEFAULT_ABI == ABI_DARWIN
3323 && TARGET_64BIT)
3324 darwin_one_byte_bool = 1;
3325
3326 if (TARGET_64BIT && ! TARGET_POWERPC64)
3327 {
3328 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3329 warning (0, "%qs requires PowerPC64 architecture, enabling", "-m64");
3330 }
3331
3332 /* The linkers [ld64] that support 64Bit do not need the JBSR longcall
3333 optimisation, and will not work with the most generic case (where the
3334 symbol is undefined external, but there is no symbl stub). */
3335 if (TARGET_64BIT)
3336 rs6000_default_long_calls = 0;
3337
3338 /* ld_classic is (so far) still used for kernel (static) code, and supports
3339 the JBSR longcall / branch islands. */
3340 if (flag_mkernel)
3341 {
3342 rs6000_default_long_calls = 1;
3343
3344 /* Allow a kext author to do -mkernel -mhard-float. */
3345 if (! (rs6000_isa_flags_explicit & OPTION_MASK_SOFT_FLOAT))
3346 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3347 }
3348
3349 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3350 Altivec. */
3351 if (!flag_mkernel && !flag_apple_kext
3352 && TARGET_64BIT
3353 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3354 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3355
3356 /* Unless the user (not the configurer) has explicitly overridden
3357 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3358 G4 unless targeting the kernel. */
3359 if (!flag_mkernel
3360 && !flag_apple_kext
3361 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3362 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3363 && ! OPTION_SET_P (rs6000_cpu_index))
3364 {
3365 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3366 }
3367 }
3368 #endif
3369
3370 /* If not otherwise specified by a target, make 'long double' equivalent to
3371 'double'. */
3372
3373 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3374 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3375 #endif
3376
3377 /* Return the builtin mask of the various options used that could affect which
3378 builtins were used. In the past we used target_flags, but we've run out of
3379 bits, and some options are no longer in target_flags. */
3380
3381 HOST_WIDE_INT
3382 rs6000_builtin_mask_calculate (void)
3383 {
3384 return (((TARGET_ALTIVEC) ? OPTION_MASK_ALTIVEC : 0)
3385 | ((TARGET_CMPB) ? OPTION_MASK_CMPB : 0)
3386 | ((TARGET_VSX) ? OPTION_MASK_VSX : 0)
3387 | ((TARGET_FRE) ? OPTION_MASK_POPCNTB : 0)
3388 | ((TARGET_FRES) ? OPTION_MASK_PPC_GFXOPT : 0)
3389 | ((TARGET_FRSQRTE) ? OPTION_MASK_PPC_GFXOPT : 0)
3390 | ((TARGET_FRSQRTES) ? OPTION_MASK_POPCNTB : 0)
3391 | ((TARGET_POPCNTD) ? OPTION_MASK_POPCNTD : 0)
3392 | ((rs6000_cpu == PROCESSOR_CELL) ? OPTION_MASK_FPRND : 0)
3393 | ((TARGET_P8_VECTOR) ? OPTION_MASK_P8_VECTOR : 0)
3394 | ((TARGET_P9_VECTOR) ? OPTION_MASK_P9_VECTOR : 0)
3395 | ((TARGET_P9_MISC) ? OPTION_MASK_P9_MISC : 0)
3396 | ((TARGET_MODULO) ? OPTION_MASK_MODULO : 0)
3397 | ((TARGET_64BIT) ? MASK_64BIT : 0)
3398 | ((TARGET_POWERPC64) ? MASK_POWERPC64 : 0)
3399 | ((TARGET_CRYPTO) ? OPTION_MASK_CRYPTO : 0)
3400 | ((TARGET_HTM) ? OPTION_MASK_HTM : 0)
3401 | ((TARGET_DFP) ? OPTION_MASK_DFP : 0)
3402 | ((TARGET_HARD_FLOAT) ? OPTION_MASK_SOFT_FLOAT : 0)
3403 | ((TARGET_LONG_DOUBLE_128
3404 && TARGET_HARD_FLOAT
3405 && !TARGET_IEEEQUAD) ? OPTION_MASK_MULTIPLE : 0)
3406 | ((TARGET_FLOAT128_TYPE) ? OPTION_MASK_FLOAT128_KEYWORD : 0)
3407 | ((TARGET_FLOAT128_HW) ? OPTION_MASK_FLOAT128_HW : 0)
3408 | ((TARGET_MMA) ? OPTION_MASK_MMA : 0)
3409 | ((TARGET_POWER10) ? OPTION_MASK_POWER10 : 0));
3410 }
3411
3412 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3413 to clobber the XER[CA] bit because clobbering that bit without telling
3414 the compiler worked just fine with versions of GCC before GCC 5, and
3415 breaking a lot of older code in ways that are hard to track down is
3416 not such a great idea. */
3417
3418 static rtx_insn *
3419 rs6000_md_asm_adjust (vec<rtx> & /*outputs*/, vec<rtx> & /*inputs*/,
3420 vec<machine_mode> & /*input_modes*/,
3421 vec<const char *> & /*constraints*/, vec<rtx> &clobbers,
3422 HARD_REG_SET &clobbered_regs, location_t /*loc*/)
3423 {
3424 clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
3425 SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
3426 return NULL;
3427 }
3428
3429 /* This target function is similar to the hook TARGET_OPTION_OVERRIDE
3430 but is called when the optimize level is changed via an attribute or
3431 pragma or when it is reset at the end of the code affected by the
3432 attribute or pragma. It is not called at the beginning of compilation
3433 when TARGET_OPTION_OVERRIDE is called so if you want to perform these
3434 actions then, you should have TARGET_OPTION_OVERRIDE call
3435 TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE. */
3436
3437 static void
3438 rs6000_override_options_after_change (void)
3439 {
3440 /* Explicit -funroll-loops turns -munroll-only-small-loops off, and
3441 turns -frename-registers on. */
3442 if ((OPTION_SET_P (flag_unroll_loops) && flag_unroll_loops)
3443 || (OPTION_SET_P (flag_unroll_all_loops)
3444 && flag_unroll_all_loops))
3445 {
3446 if (!OPTION_SET_P (unroll_only_small_loops))
3447 unroll_only_small_loops = 0;
3448 if (!OPTION_SET_P (flag_rename_registers))
3449 flag_rename_registers = 1;
3450 if (!OPTION_SET_P (flag_cunroll_grow_size))
3451 flag_cunroll_grow_size = 1;
3452 }
3453 else if (!OPTION_SET_P (flag_cunroll_grow_size))
3454 flag_cunroll_grow_size = flag_peel_loops || optimize >= 3;
3455
3456 /* If we are inserting ROP-protect instructions, disable shrink wrap. */
3457 if (rs6000_rop_protect)
3458 flag_shrink_wrap = 0;
3459 }
3460
3461 #ifdef TARGET_USES_LINUX64_OPT
3462 static void
3463 rs6000_linux64_override_options ()
3464 {
3465 if (!OPTION_SET_P (rs6000_alignment_flags))
3466 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
3467 if (rs6000_isa_flags & OPTION_MASK_64BIT)
3468 {
3469 if (DEFAULT_ABI != ABI_AIX)
3470 {
3471 rs6000_current_abi = ABI_AIX;
3472 error (INVALID_64BIT, "call");
3473 }
3474 dot_symbols = !strcmp (rs6000_abi_name, "aixdesc");
3475 if (ELFv2_ABI_CHECK)
3476 {
3477 rs6000_current_abi = ABI_ELFv2;
3478 if (dot_symbols)
3479 error ("%<-mcall-aixdesc%> incompatible with %<-mabi=elfv2%>");
3480 }
3481 if (rs6000_isa_flags & OPTION_MASK_RELOCATABLE)
3482 {
3483 rs6000_isa_flags &= ~OPTION_MASK_RELOCATABLE;
3484 error (INVALID_64BIT, "relocatable");
3485 }
3486 if (rs6000_isa_flags & OPTION_MASK_EABI)
3487 {
3488 rs6000_isa_flags &= ~OPTION_MASK_EABI;
3489 error (INVALID_64BIT, "eabi");
3490 }
3491 if (TARGET_PROTOTYPE)
3492 {
3493 target_prototype = 0;
3494 error (INVALID_64BIT, "prototype");
3495 }
3496 if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) == 0)
3497 {
3498 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3499 error ("%<-m64%> requires a PowerPC64 cpu");
3500 }
3501 if (!OPTION_SET_P (rs6000_current_cmodel))
3502 SET_CMODEL (CMODEL_MEDIUM);
3503 if ((rs6000_isa_flags_explicit & OPTION_MASK_MINIMAL_TOC) != 0)
3504 {
3505 if (OPTION_SET_P (rs6000_current_cmodel)
3506 && rs6000_current_cmodel != CMODEL_SMALL)
3507 error ("%<-mcmodel%> incompatible with other toc options");
3508 if (TARGET_MINIMAL_TOC)
3509 SET_CMODEL (CMODEL_SMALL);
3510 else if (TARGET_PCREL
3511 || (PCREL_SUPPORTED_BY_OS
3512 && (rs6000_isa_flags_explicit & OPTION_MASK_PCREL) == 0))
3513 /* Ignore -mno-minimal-toc. */
3514 ;
3515 else
3516 SET_CMODEL (CMODEL_SMALL);
3517 }
3518 if (rs6000_current_cmodel != CMODEL_SMALL)
3519 {
3520 if (!OPTION_SET_P (TARGET_NO_FP_IN_TOC))
3521 TARGET_NO_FP_IN_TOC = rs6000_current_cmodel == CMODEL_MEDIUM;
3522 if (!OPTION_SET_P (TARGET_NO_SUM_IN_TOC))
3523 TARGET_NO_SUM_IN_TOC = 0;
3524 }
3525 if (TARGET_PLTSEQ && DEFAULT_ABI != ABI_ELFv2)
3526 {
3527 if (OPTION_SET_P (rs6000_pltseq))
3528 warning (0, "%qs unsupported for this ABI",
3529 "-mpltseq");
3530 rs6000_pltseq = false;
3531 }
3532 }
3533 else if (TARGET_64BIT)
3534 error (INVALID_32BIT, "32");
3535 else
3536 {
3537 if (TARGET_PROFILE_KERNEL)
3538 {
3539 profile_kernel = 0;
3540 error (INVALID_32BIT, "profile-kernel");
3541 }
3542 if (OPTION_SET_P (rs6000_current_cmodel))
3543 {
3544 SET_CMODEL (CMODEL_SMALL);
3545 error (INVALID_32BIT, "cmodel");
3546 }
3547 }
3548 }
3549 #endif
3550
3551 /* Return true if we are using GLIBC, and it supports IEEE 128-bit long double.
3552 This support is only in little endian GLIBC 2.32 or newer. */
3553 static bool
3554 glibc_supports_ieee_128bit (void)
3555 {
3556 #ifdef OPTION_GLIBC
3557 if (OPTION_GLIBC && !BYTES_BIG_ENDIAN
3558 && ((TARGET_GLIBC_MAJOR * 1000) + TARGET_GLIBC_MINOR) >= 2032)
3559 return true;
3560 #endif /* OPTION_GLIBC. */
3561
3562 return false;
3563 }
3564
3565 /* Override command line options.
3566
3567 Combine build-specific configuration information with options
3568 specified on the command line to set various state variables which
3569 influence code generation, optimization, and expansion of built-in
3570 functions. Assure that command-line configuration preferences are
3571 compatible with each other and with the build configuration; issue
3572 warnings while adjusting configuration or error messages while
3573 rejecting configuration.
3574
3575 Upon entry to this function:
3576
3577 This function is called once at the beginning of
3578 compilation, and then again at the start and end of compiling
3579 each section of code that has a different configuration, as
3580 indicated, for example, by adding the
3581
3582 __attribute__((__target__("cpu=power9")))
3583
3584 qualifier to a function definition or, for example, by bracketing
3585 code between
3586
3587 #pragma GCC target("altivec")
3588
3589 and
3590
3591 #pragma GCC reset_options
3592
3593 directives. Parameter global_init_p is true for the initial
3594 invocation, which initializes global variables, and false for all
3595 subsequent invocations.
3596
3597
3598 Various global state information is assumed to be valid. This
3599 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
3600 default CPU specified at build configure time, TARGET_DEFAULT,
3601 representing the default set of option flags for the default
3602 target, and OPTION_SET_P (rs6000_isa_flags), representing
3603 which options were requested on the command line.
3604
3605 Upon return from this function:
3606
3607 rs6000_isa_flags_explicit has a non-zero bit for each flag that
3608 was set by name on the command line. Additionally, if certain
3609 attributes are automatically enabled or disabled by this function
3610 in order to assure compatibility between options and
3611 configuration, the flags associated with those attributes are
3612 also set. By setting these "explicit bits", we avoid the risk
3613 that other code might accidentally overwrite these particular
3614 attributes with "default values".
3615
3616 The various bits of rs6000_isa_flags are set to indicate the
3617 target options that have been selected for the most current
3618 compilation efforts. This has the effect of also turning on the
3619 associated TARGET_XXX values since these are macros which are
3620 generally defined to test the corresponding bit of the
3621 rs6000_isa_flags variable.
3622
3623 The variable rs6000_builtin_mask is set to represent the target
3624 options for the most current compilation efforts, consistent with
3625 the current contents of rs6000_isa_flags. This variable controls
3626 expansion of built-in functions.
3627
3628 Various other global variables and fields of global structures
3629 (over 50 in all) are initialized to reflect the desired options
3630 for the most current compilation efforts. */
3631
3632 static bool
3633 rs6000_option_override_internal (bool global_init_p)
3634 {
3635 bool ret = true;
3636
3637 HOST_WIDE_INT set_masks;
3638 HOST_WIDE_INT ignore_masks;
3639 int cpu_index = -1;
3640 int tune_index;
3641 struct cl_target_option *main_target_opt
3642 = ((global_init_p || target_option_default_node == NULL)
3643 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3644
3645 /* Print defaults. */
3646 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
3647 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
3648
3649 /* Remember the explicit arguments. */
3650 if (global_init_p)
3651 rs6000_isa_flags_explicit = OPTION_SET_P (rs6000_isa_flags);
3652
3653 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3654 library functions, so warn about it. The flag may be useful for
3655 performance studies from time to time though, so don't disable it
3656 entirely. */
3657 if (OPTION_SET_P (rs6000_alignment_flags)
3658 && rs6000_alignment_flags == MASK_ALIGN_POWER
3659 && DEFAULT_ABI == ABI_DARWIN
3660 && TARGET_64BIT)
3661 warning (0, "%qs is not supported for 64-bit Darwin;"
3662 " it is incompatible with the installed C and C++ libraries",
3663 "-malign-power");
3664
3665 /* Numerous experiment shows that IRA based loop pressure
3666 calculation works better for RTL loop invariant motion on targets
3667 with enough (>= 32) registers. It is an expensive optimization.
3668 So it is on only for peak performance. */
3669 if (optimize >= 3 && global_init_p
3670 && !OPTION_SET_P (flag_ira_loop_pressure))
3671 flag_ira_loop_pressure = 1;
3672
3673 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
3674 for tracebacks to be complete but not if any -fasynchronous-unwind-tables
3675 options were already specified. */
3676 if (flag_sanitize & SANITIZE_USER_ADDRESS
3677 && !OPTION_SET_P (flag_asynchronous_unwind_tables))
3678 flag_asynchronous_unwind_tables = 1;
3679
3680 /* -fvariable-expansion-in-unroller is a win for POWER whenever the
3681 loop unroller is active. It is only checked during unrolling, so
3682 we can just set it on by default. */
3683 if (!OPTION_SET_P (flag_variable_expansion_in_unroller))
3684 flag_variable_expansion_in_unroller = 1;
3685
3686 /* Set the pointer size. */
3687 if (TARGET_64BIT)
3688 {
3689 rs6000_pmode = DImode;
3690 rs6000_pointer_size = 64;
3691 }
3692 else
3693 {
3694 rs6000_pmode = SImode;
3695 rs6000_pointer_size = 32;
3696 }
3697
3698 /* Some OSs don't support saving the high part of 64-bit registers on context
3699 switch. Other OSs don't support saving Altivec registers. On those OSs,
3700 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3701 if the user wants either, the user must explicitly specify them and we
3702 won't interfere with the user's specification. */
3703
3704 set_masks = POWERPC_MASKS;
3705 #ifdef OS_MISSING_POWERPC64
3706 if (OS_MISSING_POWERPC64)
3707 set_masks &= ~OPTION_MASK_POWERPC64;
3708 #endif
3709 #ifdef OS_MISSING_ALTIVEC
3710 if (OS_MISSING_ALTIVEC)
3711 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX
3712 | OTHER_VSX_VECTOR_MASKS);
3713 #endif
3714
3715 /* Don't override by the processor default if given explicitly. */
3716 set_masks &= ~rs6000_isa_flags_explicit;
3717
3718 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3719 the cpu in a target attribute or pragma, but did not specify a tuning
3720 option, use the cpu for the tuning option rather than the option specified
3721 with -mtune on the command line. Process a '--with-cpu' configuration
3722 request as an implicit --cpu. */
3723 if (rs6000_cpu_index >= 0)
3724 cpu_index = rs6000_cpu_index;
3725 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3726 cpu_index = main_target_opt->x_rs6000_cpu_index;
3727 else if (OPTION_TARGET_CPU_DEFAULT)
3728 cpu_index = rs6000_cpu_name_lookup (OPTION_TARGET_CPU_DEFAULT);
3729
3730 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3731 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3732 with those from the cpu, except for options that were explicitly set. If
3733 we don't have a cpu, do not override the target bits set in
3734 TARGET_DEFAULT. */
3735 if (cpu_index >= 0)
3736 {
3737 rs6000_cpu_index = cpu_index;
3738 rs6000_isa_flags &= ~set_masks;
3739 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3740 & set_masks);
3741 }
3742 else
3743 {
3744 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3745 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3746 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3747 to using rs6000_isa_flags, we need to do the initialization here.
3748
3749 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
3750 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
3751 HOST_WIDE_INT flags;
3752 if (TARGET_DEFAULT)
3753 flags = TARGET_DEFAULT;
3754 else
3755 {
3756 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3757 const char *default_cpu = (!TARGET_POWERPC64
3758 ? "powerpc"
3759 : (BYTES_BIG_ENDIAN
3760 ? "powerpc64"
3761 : "powerpc64le"));
3762 int default_cpu_index = rs6000_cpu_name_lookup (default_cpu);
3763 flags = processor_target_table[default_cpu_index].target_enable;
3764 }
3765 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
3766 }
3767
3768 if (rs6000_tune_index >= 0)
3769 tune_index = rs6000_tune_index;
3770 else if (cpu_index >= 0)
3771 rs6000_tune_index = tune_index = cpu_index;
3772 else
3773 {
3774 size_t i;
3775 enum processor_type tune_proc
3776 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
3777
3778 tune_index = -1;
3779 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3780 if (processor_target_table[i].processor == tune_proc)
3781 {
3782 tune_index = i;
3783 break;
3784 }
3785 }
3786
3787 if (cpu_index >= 0)
3788 rs6000_cpu = processor_target_table[cpu_index].processor;
3789 else
3790 rs6000_cpu = TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT;
3791
3792 gcc_assert (tune_index >= 0);
3793 rs6000_tune = processor_target_table[tune_index].processor;
3794
3795 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
3796 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
3797 || rs6000_cpu == PROCESSOR_PPCE5500)
3798 {
3799 if (TARGET_ALTIVEC)
3800 error ("AltiVec not supported in this target");
3801 }
3802
3803 /* If we are optimizing big endian systems for space, use the load/store
3804 multiple instructions. */
3805 if (BYTES_BIG_ENDIAN && optimize_size)
3806 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE;
3807
3808 /* Don't allow -mmultiple on little endian systems unless the cpu is a 750,
3809 because the hardware doesn't support the instructions used in little
3810 endian mode, and causes an alignment trap. The 750 does not cause an
3811 alignment trap (except when the target is unaligned). */
3812
3813 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750 && TARGET_MULTIPLE)
3814 {
3815 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
3816 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
3817 warning (0, "%qs is not supported on little endian systems",
3818 "-mmultiple");
3819 }
3820
3821 /* If little-endian, default to -mstrict-align on older processors.
3822 Testing for direct_move matches power8 and later. */
3823 if (!BYTES_BIG_ENDIAN
3824 && !(processor_target_table[tune_index].target_enable
3825 & OPTION_MASK_DIRECT_MOVE))
3826 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
3827
3828 /* Add some warnings for VSX. */
3829 if (TARGET_VSX)
3830 {
3831 const char *msg = NULL;
3832 if (!TARGET_HARD_FLOAT)
3833 {
3834 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3835 msg = N_("%<-mvsx%> requires hardware floating point");
3836 else
3837 {
3838 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3839 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3840 }
3841 }
3842 else if (TARGET_AVOID_XFORM > 0)
3843 msg = N_("%<-mvsx%> needs indexed addressing");
3844 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
3845 & OPTION_MASK_ALTIVEC))
3846 {
3847 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3848 msg = N_("%<-mvsx%> and %<-mno-altivec%> are incompatible");
3849 else
3850 msg = N_("%<-mno-altivec%> disables vsx");
3851 }
3852
3853 if (msg)
3854 {
3855 warning (0, msg);
3856 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3857 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3858 }
3859 }
3860
3861 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
3862 the -mcpu setting to enable options that conflict. */
3863 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
3864 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
3865 | OPTION_MASK_ALTIVEC
3866 | OPTION_MASK_VSX)) != 0)
3867 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
3868 | OPTION_MASK_DIRECT_MOVE)
3869 & ~rs6000_isa_flags_explicit);
3870
3871 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3872 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
3873
3874 #ifdef XCOFF_DEBUGGING_INFO
3875 /* For AIX default to 64-bit DWARF. */
3876 if (!OPTION_SET_P (dwarf_offset_size))
3877 dwarf_offset_size = POINTER_SIZE_UNITS;
3878 #endif
3879
3880 /* Handle explicit -mno-{altivec,vsx,power8-vector,power9-vector} and turn
3881 off all of the options that depend on those flags. */
3882 ignore_masks = rs6000_disable_incompatible_switches ();
3883
3884 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
3885 unless the user explicitly used the -mno-<option> to disable the code. */
3886 if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_MISC)
3887 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3888 else if (TARGET_P9_MINMAX)
3889 {
3890 if (cpu_index >= 0)
3891 {
3892 if (cpu_index == PROCESSOR_POWER9)
3893 {
3894 /* legacy behavior: allow -mcpu=power9 with certain
3895 capabilities explicitly disabled. */
3896 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3897 }
3898 else
3899 error ("power9 target option is incompatible with %<%s=<xxx>%> "
3900 "for <xxx> less than power9", "-mcpu");
3901 }
3902 else if ((ISA_3_0_MASKS_SERVER & rs6000_isa_flags_explicit)
3903 != (ISA_3_0_MASKS_SERVER & rs6000_isa_flags
3904 & rs6000_isa_flags_explicit))
3905 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
3906 were explicitly cleared. */
3907 error ("%qs incompatible with explicitly disabled options",
3908 "-mpower9-minmax");
3909 else
3910 rs6000_isa_flags |= ISA_3_0_MASKS_SERVER;
3911 }
3912 else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
3913 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~ignore_masks);
3914 else if (TARGET_VSX)
3915 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~ignore_masks);
3916 else if (TARGET_POPCNTD)
3917 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~ignore_masks);
3918 else if (TARGET_DFP)
3919 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks);
3920 else if (TARGET_CMPB)
3921 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks);
3922 else if (TARGET_FPRND)
3923 rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks);
3924 else if (TARGET_POPCNTB)
3925 rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks);
3926 else if (TARGET_ALTIVEC)
3927 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~ignore_masks);
3928
3929 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
3930 target attribute or pragma which automatically enables both options,
3931 unless the altivec ABI was set. This is set by default for 64-bit, but
3932 not for 32-bit. Don't move this before the above code using ignore_masks,
3933 since it can reset the cleared VSX/ALTIVEC flag again. */
3934 if (main_target_opt && !main_target_opt->x_rs6000_altivec_abi)
3935 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC)
3936 & ~rs6000_isa_flags_explicit);
3937
3938 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
3939 {
3940 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
3941 error ("%qs requires %qs", "-mcrypto", "-maltivec");
3942 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
3943 }
3944
3945 if (!TARGET_FPRND && TARGET_VSX)
3946 {
3947 if (rs6000_isa_flags_explicit & OPTION_MASK_FPRND)
3948 /* TARGET_VSX = 1 implies Power 7 and newer */
3949 error ("%qs requires %qs", "-mvsx", "-mfprnd");
3950 rs6000_isa_flags &= ~OPTION_MASK_FPRND;
3951 }
3952
3953 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
3954 {
3955 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
3956 error ("%qs requires %qs", "-mdirect-move", "-mvsx");
3957 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
3958 }
3959
3960 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
3961 {
3962 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3963 error ("%qs requires %qs", "-mpower8-vector", "-maltivec");
3964 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3965 }
3966
3967 if (TARGET_P8_VECTOR && !TARGET_VSX)
3968 {
3969 if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3970 && (rs6000_isa_flags_explicit & OPTION_MASK_VSX))
3971 error ("%qs requires %qs", "-mpower8-vector", "-mvsx");
3972 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) == 0)
3973 {
3974 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3975 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3976 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
3977 }
3978 else
3979 {
3980 /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is
3981 not explicit. */
3982 rs6000_isa_flags |= OPTION_MASK_VSX;
3983 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3984 }
3985 }
3986
3987 if (TARGET_DFP && !TARGET_HARD_FLOAT)
3988 {
3989 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
3990 error ("%qs requires %qs", "-mhard-dfp", "-mhard-float");
3991 rs6000_isa_flags &= ~OPTION_MASK_DFP;
3992 }
3993
3994 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
3995 silently turn off quad memory mode. */
3996 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
3997 {
3998 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3999 warning (0, N_("%<-mquad-memory%> requires 64-bit mode"));
4000
4001 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
4002 warning (0, N_("%<-mquad-memory-atomic%> requires 64-bit mode"));
4003
4004 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
4005 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
4006 }
4007
4008 /* Non-atomic quad memory load/store are disabled for little endian, since
4009 the words are reversed, but atomic operations can still be done by
4010 swapping the words. */
4011 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
4012 {
4013 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4014 warning (0, N_("%<-mquad-memory%> is not available in little endian "
4015 "mode"));
4016
4017 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
4018 }
4019
4020 /* Assume if the user asked for normal quad memory instructions, they want
4021 the atomic versions as well, unless they explicity told us not to use quad
4022 word atomic instructions. */
4023 if (TARGET_QUAD_MEMORY
4024 && !TARGET_QUAD_MEMORY_ATOMIC
4025 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
4026 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
4027
4028 /* If we can shrink-wrap the TOC register save separately, then use
4029 -msave-toc-indirect unless explicitly disabled. */
4030 if ((rs6000_isa_flags_explicit & OPTION_MASK_SAVE_TOC_INDIRECT) == 0
4031 && flag_shrink_wrap_separate
4032 && optimize_function_for_speed_p (cfun))
4033 rs6000_isa_flags |= OPTION_MASK_SAVE_TOC_INDIRECT;
4034
4035 /* Enable power8 fusion if we are tuning for power8, even if we aren't
4036 generating power8 instructions. Power9 does not optimize power8 fusion
4037 cases. */
4038 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
4039 {
4040 if (processor_target_table[tune_index].processor == PROCESSOR_POWER8)
4041 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4042 else
4043 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
4044 }
4045
4046 /* Setting additional fusion flags turns on base fusion. */
4047 if (!TARGET_P8_FUSION && TARGET_P8_FUSION_SIGN)
4048 {
4049 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4050 {
4051 if (TARGET_P8_FUSION_SIGN)
4052 error ("%qs requires %qs", "-mpower8-fusion-sign",
4053 "-mpower8-fusion");
4054
4055 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
4056 }
4057 else
4058 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4059 }
4060
4061 /* Power8 does not fuse sign extended loads with the addis. If we are
4062 optimizing at high levels for speed, convert a sign extended load into a
4063 zero extending load, and an explicit sign extension. */
4064 if (TARGET_P8_FUSION
4065 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
4066 && optimize_function_for_speed_p (cfun)
4067 && optimize >= 3)
4068 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
4069
4070 /* ISA 3.0 vector instructions include ISA 2.07. */
4071 if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
4072 {
4073 /* We prefer to not mention undocumented options in
4074 error messages. However, if users have managed to select
4075 power9-vector without selecting power8-vector, they
4076 already know about undocumented flags. */
4077 if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) &&
4078 (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR))
4079 error ("%qs requires %qs", "-mpower9-vector", "-mpower8-vector");
4080 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) == 0)
4081 {
4082 rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR;
4083 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4084 rs6000_isa_flags_explicit |= OPTION_MASK_P9_VECTOR;
4085 }
4086 else
4087 {
4088 /* OPTION_MASK_P9_VECTOR is explicit and
4089 OPTION_MASK_P8_VECTOR is not explicit. */
4090 rs6000_isa_flags |= OPTION_MASK_P8_VECTOR;
4091 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
4092 }
4093 }
4094
4095 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4096 support. If we only have ISA 2.06 support, and the user did not specify
4097 the switch, leave it set to -1 so the movmisalign patterns are enabled,
4098 but we don't enable the full vectorization support */
4099 if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
4100 TARGET_ALLOW_MOVMISALIGN = 1;
4101
4102 else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
4103 {
4104 if (TARGET_ALLOW_MOVMISALIGN > 0
4105 && OPTION_SET_P (TARGET_ALLOW_MOVMISALIGN))
4106 error ("%qs requires %qs", "-mallow-movmisalign", "-mvsx");
4107
4108 TARGET_ALLOW_MOVMISALIGN = 0;
4109 }
4110
4111 /* Determine when unaligned vector accesses are permitted, and when
4112 they are preferred over masked Altivec loads. Note that if
4113 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4114 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
4115 not true. */
4116 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4117 {
4118 if (!TARGET_VSX)
4119 {
4120 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4121 error ("%qs requires %qs", "-mefficient-unaligned-vsx", "-mvsx");
4122
4123 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4124 }
4125
4126 else if (!TARGET_ALLOW_MOVMISALIGN)
4127 {
4128 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4129 error ("%qs requires %qs", "-munefficient-unaligned-vsx",
4130 "-mallow-movmisalign");
4131
4132 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4133 }
4134 }
4135
4136 if (!(rs6000_isa_flags_explicit & OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX))
4137 {
4138 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4139 rs6000_isa_flags |= OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX;
4140 else
4141 rs6000_isa_flags &= ~OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX;
4142 }
4143
4144 /* Use long double size to select the appropriate long double. We use
4145 TYPE_PRECISION to differentiate the 3 different long double types. We map
4146 128 into the precision used for TFmode. */
4147 int default_long_double_size = (RS6000_DEFAULT_LONG_DOUBLE_SIZE == 64
4148 ? 64
4149 : FLOAT_PRECISION_TFmode);
4150
4151 /* Set long double size before the IEEE 128-bit tests. */
4152 if (!OPTION_SET_P (rs6000_long_double_type_size))
4153 {
4154 if (main_target_opt != NULL
4155 && (main_target_opt->x_rs6000_long_double_type_size
4156 != default_long_double_size))
4157 error ("target attribute or pragma changes %<long double%> size");
4158 else
4159 rs6000_long_double_type_size = default_long_double_size;
4160 }
4161 else if (rs6000_long_double_type_size == FLOAT_PRECISION_TFmode)
4162 ; /* The option value can be seen when cl_target_option_restore is called. */
4163 else if (rs6000_long_double_type_size == 128)
4164 rs6000_long_double_type_size = FLOAT_PRECISION_TFmode;
4165
4166 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
4167 systems will also set long double to be IEEE 128-bit. AIX and Darwin
4168 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
4169 those systems will not pick up this default. Warn if the user changes the
4170 default unless -Wno-psabi. */
4171 if (!OPTION_SET_P (rs6000_ieeequad))
4172 rs6000_ieeequad = TARGET_IEEEQUAD_DEFAULT;
4173
4174 else if (TARGET_LONG_DOUBLE_128)
4175 {
4176 if (global_options.x_rs6000_ieeequad
4177 && (!TARGET_POPCNTD || !TARGET_VSX))
4178 error ("%qs requires full ISA 2.06 support", "-mabi=ieeelongdouble");
4179
4180 if (rs6000_ieeequad != TARGET_IEEEQUAD_DEFAULT)
4181 {
4182 /* Determine if the user can change the default long double type at
4183 compilation time. You need GLIBC 2.32 or newer to be able to
4184 change the long double type. Only issue one warning. */
4185 static bool warned_change_long_double;
4186
4187 if (!warned_change_long_double && !glibc_supports_ieee_128bit ())
4188 {
4189 warned_change_long_double = true;
4190 if (TARGET_IEEEQUAD)
4191 warning (OPT_Wpsabi, "Using IEEE extended precision "
4192 "%<long double%>");
4193 else
4194 warning (OPT_Wpsabi, "Using IBM extended precision "
4195 "%<long double%>");
4196 }
4197 }
4198 }
4199
4200 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
4201 sytems. In GCC 7, we would enable the IEEE 128-bit floating point
4202 infrastructure (-mfloat128-type) but not enable the actual __float128 type
4203 unless the user used the explicit -mfloat128. In GCC 8, we enable both
4204 the keyword as well as the type. */
4205 TARGET_FLOAT128_TYPE = TARGET_FLOAT128_ENABLE_TYPE && TARGET_VSX;
4206
4207 /* IEEE 128-bit floating point requires VSX support. */
4208 if (TARGET_FLOAT128_KEYWORD)
4209 {
4210 if (!TARGET_VSX)
4211 {
4212 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
4213 error ("%qs requires VSX support", "-mfloat128");
4214
4215 TARGET_FLOAT128_TYPE = 0;
4216 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_KEYWORD
4217 | OPTION_MASK_FLOAT128_HW);
4218 }
4219 else if (!TARGET_FLOAT128_TYPE)
4220 {
4221 TARGET_FLOAT128_TYPE = 1;
4222 warning (0, "The %<-mfloat128%> option may not be fully supported");
4223 }
4224 }
4225
4226 /* Enable the __float128 keyword under Linux by default. */
4227 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_KEYWORD
4228 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) == 0)
4229 rs6000_isa_flags |= OPTION_MASK_FLOAT128_KEYWORD;
4230
4231 /* If we have are supporting the float128 type and full ISA 3.0 support,
4232 enable -mfloat128-hardware by default. However, don't enable the
4233 __float128 keyword if it was explicitly turned off. 64-bit mode is needed
4234 because sometimes the compiler wants to put things in an integer
4235 container, and if we don't have __int128 support, it is impossible. */
4236 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW && TARGET_64BIT
4237 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) == ISA_3_0_MASKS_IEEE
4238 && !(rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW))
4239 rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
4240
4241 if (TARGET_FLOAT128_HW
4242 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) != ISA_3_0_MASKS_IEEE)
4243 {
4244 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4245 error ("%qs requires full ISA 3.0 support", "%<-mfloat128-hardware%>");
4246
4247 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4248 }
4249
4250 if (TARGET_FLOAT128_HW && !TARGET_64BIT)
4251 {
4252 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4253 error ("%qs requires %qs", "%<-mfloat128-hardware%>", "-m64");
4254
4255 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4256 }
4257
4258 /* Enable -mprefixed by default on power10 systems. */
4259 if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED) == 0)
4260 rs6000_isa_flags |= OPTION_MASK_PREFIXED;
4261
4262 /* -mprefixed requires -mcpu=power10 (or later). */
4263 else if (TARGET_PREFIXED && !TARGET_POWER10)
4264 {
4265 if ((rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED) != 0)
4266 error ("%qs requires %qs", "-mprefixed", "-mcpu=power10");
4267
4268 rs6000_isa_flags &= ~OPTION_MASK_PREFIXED;
4269 }
4270
4271 /* -mpcrel requires prefixed load/store addressing. */
4272 if (TARGET_PCREL && !TARGET_PREFIXED)
4273 {
4274 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4275 error ("%qs requires %qs", "-mpcrel", "-mprefixed");
4276
4277 rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4278 }
4279
4280 /* Print the options after updating the defaults. */
4281 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4282 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
4283
4284 /* E500mc does "better" if we inline more aggressively. Respect the
4285 user's opinion, though. */
4286 if (rs6000_block_move_inline_limit == 0
4287 && (rs6000_tune == PROCESSOR_PPCE500MC
4288 || rs6000_tune == PROCESSOR_PPCE500MC64
4289 || rs6000_tune == PROCESSOR_PPCE5500
4290 || rs6000_tune == PROCESSOR_PPCE6500))
4291 rs6000_block_move_inline_limit = 128;
4292
4293 /* store_one_arg depends on expand_block_move to handle at least the
4294 size of reg_parm_stack_space. */
4295 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
4296 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
4297
4298 if (global_init_p)
4299 {
4300 /* If the appropriate debug option is enabled, replace the target hooks
4301 with debug versions that call the real version and then prints
4302 debugging information. */
4303 if (TARGET_DEBUG_COST)
4304 {
4305 targetm.rtx_costs = rs6000_debug_rtx_costs;
4306 targetm.address_cost = rs6000_debug_address_cost;
4307 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
4308 }
4309
4310 if (TARGET_DEBUG_ADDR)
4311 {
4312 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
4313 targetm.legitimize_address = rs6000_debug_legitimize_address;
4314 rs6000_secondary_reload_class_ptr
4315 = rs6000_debug_secondary_reload_class;
4316 targetm.secondary_memory_needed
4317 = rs6000_debug_secondary_memory_needed;
4318 targetm.can_change_mode_class
4319 = rs6000_debug_can_change_mode_class;
4320 rs6000_preferred_reload_class_ptr
4321 = rs6000_debug_preferred_reload_class;
4322 rs6000_mode_dependent_address_ptr
4323 = rs6000_debug_mode_dependent_address;
4324 }
4325
4326 if (rs6000_veclibabi_name)
4327 {
4328 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
4329 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
4330 else
4331 {
4332 error ("unknown vectorization library ABI type in "
4333 "%<-mveclibabi=%s%>", rs6000_veclibabi_name);
4334 ret = false;
4335 }
4336 }
4337 }
4338
4339 /* Enable Altivec ABI for AIX -maltivec. */
4340 if (TARGET_XCOFF
4341 && (TARGET_ALTIVEC || TARGET_VSX)
4342 && !OPTION_SET_P (rs6000_altivec_abi))
4343 {
4344 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4345 error ("target attribute or pragma changes AltiVec ABI");
4346 else
4347 rs6000_altivec_abi = 1;
4348 }
4349
4350 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4351 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4352 be explicitly overridden in either case. */
4353 if (TARGET_ELF)
4354 {
4355 if (!OPTION_SET_P (rs6000_altivec_abi)
4356 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
4357 {
4358 if (main_target_opt != NULL &&
4359 !main_target_opt->x_rs6000_altivec_abi)
4360 error ("target attribute or pragma changes AltiVec ABI");
4361 else
4362 rs6000_altivec_abi = 1;
4363 }
4364 }
4365
4366 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4367 So far, the only darwin64 targets are also MACH-O. */
4368 if (TARGET_MACHO
4369 && DEFAULT_ABI == ABI_DARWIN
4370 && TARGET_64BIT)
4371 {
4372 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
4373 error ("target attribute or pragma changes darwin64 ABI");
4374 else
4375 {
4376 rs6000_darwin64_abi = 1;
4377 /* Default to natural alignment, for better performance. */
4378 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
4379 }
4380 }
4381
4382 /* Place FP constants in the constant pool instead of TOC
4383 if section anchors enabled. */
4384 if (flag_section_anchors
4385 && !OPTION_SET_P (TARGET_NO_FP_IN_TOC))
4386 TARGET_NO_FP_IN_TOC = 1;
4387
4388 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4389 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
4390
4391 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4392 SUBTARGET_OVERRIDE_OPTIONS;
4393 #endif
4394 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4395 SUBSUBTARGET_OVERRIDE_OPTIONS;
4396 #endif
4397 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4398 SUB3TARGET_OVERRIDE_OPTIONS;
4399 #endif
4400
4401 /* If the ABI has support for PC-relative relocations, enable it by default.
4402 This test depends on the sub-target tests above setting the code model to
4403 medium for ELF v2 systems. */
4404 if (PCREL_SUPPORTED_BY_OS
4405 && (rs6000_isa_flags_explicit & OPTION_MASK_PCREL) == 0)
4406 rs6000_isa_flags |= OPTION_MASK_PCREL;
4407
4408 /* -mpcrel requires -mcmodel=medium, but we can't check TARGET_CMODEL until
4409 after the subtarget override options are done. */
4410 else if (TARGET_PCREL && TARGET_CMODEL != CMODEL_MEDIUM)
4411 {
4412 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4413 error ("%qs requires %qs", "-mpcrel", "-mcmodel=medium");
4414
4415 rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4416 }
4417
4418 /* Enable -mmma by default on power10 systems. */
4419 if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_MMA) == 0)
4420 rs6000_isa_flags |= OPTION_MASK_MMA;
4421
4422 if (TARGET_POWER10
4423 && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION) == 0)
4424 rs6000_isa_flags |= OPTION_MASK_P10_FUSION;
4425
4426 /* Turn off vector pair/mma options on non-power10 systems. */
4427 else if (!TARGET_POWER10 && TARGET_MMA)
4428 {
4429 if ((rs6000_isa_flags_explicit & OPTION_MASK_MMA) != 0)
4430 error ("%qs requires %qs", "-mmma", "-mcpu=power10");
4431
4432 rs6000_isa_flags &= ~OPTION_MASK_MMA;
4433 }
4434
4435 /* MMA requires SIMD support as ISA 3.1 claims and our implementation
4436 such as "*movoo" uses vector pair access which use VSX registers.
4437 So make MMA require VSX support here. */
4438 if (TARGET_MMA && !TARGET_VSX)
4439 {
4440 if ((rs6000_isa_flags_explicit & OPTION_MASK_MMA) != 0)
4441 error ("%qs requires %qs", "-mmma", "-mvsx");
4442 rs6000_isa_flags &= ~OPTION_MASK_MMA;
4443 }
4444
4445 if (!TARGET_PCREL && TARGET_PCREL_OPT)
4446 rs6000_isa_flags &= ~OPTION_MASK_PCREL_OPT;
4447
4448 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4449 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
4450
4451 rs6000_always_hint = (rs6000_tune != PROCESSOR_POWER4
4452 && rs6000_tune != PROCESSOR_POWER5
4453 && rs6000_tune != PROCESSOR_POWER6
4454 && rs6000_tune != PROCESSOR_POWER7
4455 && rs6000_tune != PROCESSOR_POWER8
4456 && rs6000_tune != PROCESSOR_POWER9
4457 && rs6000_tune != PROCESSOR_POWER10
4458 && rs6000_tune != PROCESSOR_PPCA2
4459 && rs6000_tune != PROCESSOR_CELL
4460 && rs6000_tune != PROCESSOR_PPC476);
4461 rs6000_sched_groups = (rs6000_tune == PROCESSOR_POWER4
4462 || rs6000_tune == PROCESSOR_POWER5
4463 || rs6000_tune == PROCESSOR_POWER7
4464 || rs6000_tune == PROCESSOR_POWER8);
4465 rs6000_align_branch_targets = (rs6000_tune == PROCESSOR_POWER4
4466 || rs6000_tune == PROCESSOR_POWER5
4467 || rs6000_tune == PROCESSOR_POWER6
4468 || rs6000_tune == PROCESSOR_POWER7
4469 || rs6000_tune == PROCESSOR_POWER8
4470 || rs6000_tune == PROCESSOR_POWER9
4471 || rs6000_tune == PROCESSOR_POWER10
4472 || rs6000_tune == PROCESSOR_PPCE500MC
4473 || rs6000_tune == PROCESSOR_PPCE500MC64
4474 || rs6000_tune == PROCESSOR_PPCE5500
4475 || rs6000_tune == PROCESSOR_PPCE6500);
4476
4477 /* Allow debug switches to override the above settings. These are set to -1
4478 in rs6000.opt to indicate the user hasn't directly set the switch. */
4479 if (TARGET_ALWAYS_HINT >= 0)
4480 rs6000_always_hint = TARGET_ALWAYS_HINT;
4481
4482 if (TARGET_SCHED_GROUPS >= 0)
4483 rs6000_sched_groups = TARGET_SCHED_GROUPS;
4484
4485 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
4486 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
4487
4488 rs6000_sched_restricted_insns_priority
4489 = (rs6000_sched_groups ? 1 : 0);
4490
4491 /* Handle -msched-costly-dep option. */
4492 rs6000_sched_costly_dep
4493 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
4494
4495 if (rs6000_sched_costly_dep_str)
4496 {
4497 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
4498 rs6000_sched_costly_dep = no_dep_costly;
4499 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
4500 rs6000_sched_costly_dep = all_deps_costly;
4501 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
4502 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
4503 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
4504 rs6000_sched_costly_dep = store_to_load_dep_costly;
4505 else
4506 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
4507 atoi (rs6000_sched_costly_dep_str));
4508 }
4509
4510 /* Handle -minsert-sched-nops option. */
4511 rs6000_sched_insert_nops
4512 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
4513
4514 if (rs6000_sched_insert_nops_str)
4515 {
4516 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
4517 rs6000_sched_insert_nops = sched_finish_none;
4518 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
4519 rs6000_sched_insert_nops = sched_finish_pad_groups;
4520 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
4521 rs6000_sched_insert_nops = sched_finish_regroup_exact;
4522 else
4523 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
4524 atoi (rs6000_sched_insert_nops_str));
4525 }
4526
4527 /* Handle stack protector */
4528 if (!OPTION_SET_P (rs6000_stack_protector_guard))
4529 #ifdef TARGET_THREAD_SSP_OFFSET
4530 rs6000_stack_protector_guard = SSP_TLS;
4531 #else
4532 rs6000_stack_protector_guard = SSP_GLOBAL;
4533 #endif
4534
4535 #ifdef TARGET_THREAD_SSP_OFFSET
4536 rs6000_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET;
4537 rs6000_stack_protector_guard_reg = TARGET_64BIT ? 13 : 2;
4538 #endif
4539
4540 if (OPTION_SET_P (rs6000_stack_protector_guard_offset_str))
4541 {
4542 char *endp;
4543 const char *str = rs6000_stack_protector_guard_offset_str;
4544
4545 errno = 0;
4546 long offset = strtol (str, &endp, 0);
4547 if (!*str || *endp || errno)
4548 error ("%qs is not a valid number in %qs", str,
4549 "-mstack-protector-guard-offset=");
4550
4551 if (!IN_RANGE (offset, -0x8000, 0x7fff)
4552 || (TARGET_64BIT && (offset & 3)))
4553 error ("%qs is not a valid offset in %qs", str,
4554 "-mstack-protector-guard-offset=");
4555
4556 rs6000_stack_protector_guard_offset = offset;
4557 }
4558
4559 if (OPTION_SET_P (rs6000_stack_protector_guard_reg_str))
4560 {
4561 const char *str = rs6000_stack_protector_guard_reg_str;
4562 int reg = decode_reg_name (str);
4563
4564 if (!IN_RANGE (reg, 1, 31))
4565 error ("%qs is not a valid base register in %qs", str,
4566 "-mstack-protector-guard-reg=");
4567
4568 rs6000_stack_protector_guard_reg = reg;
4569 }
4570
4571 if (rs6000_stack_protector_guard == SSP_TLS
4572 && !IN_RANGE (rs6000_stack_protector_guard_reg, 1, 31))
4573 error ("%qs needs a valid base register", "-mstack-protector-guard=tls");
4574
4575 if (global_init_p)
4576 {
4577 #ifdef TARGET_REGNAMES
4578 /* If the user desires alternate register names, copy in the
4579 alternate names now. */
4580 if (TARGET_REGNAMES)
4581 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
4582 #endif
4583
4584 /* Set aix_struct_return last, after the ABI is determined.
4585 If -maix-struct-return or -msvr4-struct-return was explicitly
4586 used, don't override with the ABI default. */
4587 if (!OPTION_SET_P (aix_struct_return))
4588 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
4589
4590 #if 0
4591 /* IBM XL compiler defaults to unsigned bitfields. */
4592 if (TARGET_XL_COMPAT)
4593 flag_signed_bitfields = 0;
4594 #endif
4595
4596 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
4597 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
4598
4599 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
4600
4601 /* We can only guarantee the availability of DI pseudo-ops when
4602 assembling for 64-bit targets. */
4603 if (!TARGET_64BIT)
4604 {
4605 targetm.asm_out.aligned_op.di = NULL;
4606 targetm.asm_out.unaligned_op.di = NULL;
4607 }
4608
4609
4610 /* Set branch target alignment, if not optimizing for size. */
4611 if (!optimize_size)
4612 {
4613 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4614 aligned 8byte to avoid misprediction by the branch predictor. */
4615 if (rs6000_tune == PROCESSOR_TITAN
4616 || rs6000_tune == PROCESSOR_CELL)
4617 {
4618 if (flag_align_functions && !str_align_functions)
4619 str_align_functions = "8";
4620 if (flag_align_jumps && !str_align_jumps)
4621 str_align_jumps = "8";
4622 if (flag_align_loops && !str_align_loops)
4623 str_align_loops = "8";
4624 }
4625 if (rs6000_align_branch_targets)
4626 {
4627 if (flag_align_functions && !str_align_functions)
4628 str_align_functions = "16";
4629 if (flag_align_jumps && !str_align_jumps)
4630 str_align_jumps = "16";
4631 if (flag_align_loops && !str_align_loops)
4632 {
4633 can_override_loop_align = 1;
4634 str_align_loops = "16";
4635 }
4636 }
4637 }
4638
4639 /* Arrange to save and restore machine status around nested functions. */
4640 init_machine_status = rs6000_init_machine_status;
4641
4642 /* We should always be splitting complex arguments, but we can't break
4643 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
4644 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
4645 targetm.calls.split_complex_arg = NULL;
4646
4647 /* The AIX and ELFv1 ABIs define standard function descriptors. */
4648 if (DEFAULT_ABI == ABI_AIX)
4649 targetm.calls.custom_function_descriptors = 0;
4650 }
4651
4652 /* Initialize rs6000_cost with the appropriate target costs. */
4653 if (optimize_size)
4654 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
4655 else
4656 switch (rs6000_tune)
4657 {
4658 case PROCESSOR_RS64A:
4659 rs6000_cost = &rs64a_cost;
4660 break;
4661
4662 case PROCESSOR_MPCCORE:
4663 rs6000_cost = &mpccore_cost;
4664 break;
4665
4666 case PROCESSOR_PPC403:
4667 rs6000_cost = &ppc403_cost;
4668 break;
4669
4670 case PROCESSOR_PPC405:
4671 rs6000_cost = &ppc405_cost;
4672 break;
4673
4674 case PROCESSOR_PPC440:
4675 rs6000_cost = &ppc440_cost;
4676 break;
4677
4678 case PROCESSOR_PPC476:
4679 rs6000_cost = &ppc476_cost;
4680 break;
4681
4682 case PROCESSOR_PPC601:
4683 rs6000_cost = &ppc601_cost;
4684 break;
4685
4686 case PROCESSOR_PPC603:
4687 rs6000_cost = &ppc603_cost;
4688 break;
4689
4690 case PROCESSOR_PPC604:
4691 rs6000_cost = &ppc604_cost;
4692 break;
4693
4694 case PROCESSOR_PPC604e:
4695 rs6000_cost = &ppc604e_cost;
4696 break;
4697
4698 case PROCESSOR_PPC620:
4699 rs6000_cost = &ppc620_cost;
4700 break;
4701
4702 case PROCESSOR_PPC630:
4703 rs6000_cost = &ppc630_cost;
4704 break;
4705
4706 case PROCESSOR_CELL:
4707 rs6000_cost = &ppccell_cost;
4708 break;
4709
4710 case PROCESSOR_PPC750:
4711 case PROCESSOR_PPC7400:
4712 rs6000_cost = &ppc750_cost;
4713 break;
4714
4715 case PROCESSOR_PPC7450:
4716 rs6000_cost = &ppc7450_cost;
4717 break;
4718
4719 case PROCESSOR_PPC8540:
4720 case PROCESSOR_PPC8548:
4721 rs6000_cost = &ppc8540_cost;
4722 break;
4723
4724 case PROCESSOR_PPCE300C2:
4725 case PROCESSOR_PPCE300C3:
4726 rs6000_cost = &ppce300c2c3_cost;
4727 break;
4728
4729 case PROCESSOR_PPCE500MC:
4730 rs6000_cost = &ppce500mc_cost;
4731 break;
4732
4733 case PROCESSOR_PPCE500MC64:
4734 rs6000_cost = &ppce500mc64_cost;
4735 break;
4736
4737 case PROCESSOR_PPCE5500:
4738 rs6000_cost = &ppce5500_cost;
4739 break;
4740
4741 case PROCESSOR_PPCE6500:
4742 rs6000_cost = &ppce6500_cost;
4743 break;
4744
4745 case PROCESSOR_TITAN:
4746 rs6000_cost = &titan_cost;
4747 break;
4748
4749 case PROCESSOR_POWER4:
4750 case PROCESSOR_POWER5:
4751 rs6000_cost = &power4_cost;
4752 break;
4753
4754 case PROCESSOR_POWER6:
4755 rs6000_cost = &power6_cost;
4756 break;
4757
4758 case PROCESSOR_POWER7:
4759 rs6000_cost = &power7_cost;
4760 break;
4761
4762 case PROCESSOR_POWER8:
4763 rs6000_cost = &power8_cost;
4764 break;
4765
4766 case PROCESSOR_POWER9:
4767 rs6000_cost = &power9_cost;
4768 break;
4769
4770 case PROCESSOR_POWER10:
4771 rs6000_cost = &power10_cost;
4772 break;
4773
4774 case PROCESSOR_PPCA2:
4775 rs6000_cost = &ppca2_cost;
4776 break;
4777
4778 default:
4779 gcc_unreachable ();
4780 }
4781
4782 if (global_init_p)
4783 {
4784 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4785 param_simultaneous_prefetches,
4786 rs6000_cost->simultaneous_prefetches);
4787 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4788 param_l1_cache_size,
4789 rs6000_cost->l1_cache_size);
4790 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4791 param_l1_cache_line_size,
4792 rs6000_cost->cache_line_size);
4793 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4794 param_l2_cache_size,
4795 rs6000_cost->l2_cache_size);
4796
4797 /* Increase loop peeling limits based on performance analysis. */
4798 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4799 param_max_peeled_insns, 400);
4800 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4801 param_max_completely_peeled_insns, 400);
4802
4803 /* The lxvl/stxvl instructions don't perform well before Power10. */
4804 if (TARGET_POWER10)
4805 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4806 param_vect_partial_vector_usage, 1);
4807 else
4808 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4809 param_vect_partial_vector_usage, 0);
4810
4811 /* Use the 'model' -fsched-pressure algorithm by default. */
4812 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4813 param_sched_pressure_algorithm,
4814 SCHED_PRESSURE_MODEL);
4815
4816 /* If using typedef char *va_list, signal that
4817 __builtin_va_start (&ap, 0) can be optimized to
4818 ap = __builtin_next_arg (0). */
4819 if (DEFAULT_ABI != ABI_V4)
4820 targetm.expand_builtin_va_start = NULL;
4821 }
4822
4823 rs6000_override_options_after_change ();
4824
4825 /* If not explicitly specified via option, decide whether to generate indexed
4826 load/store instructions. A value of -1 indicates that the
4827 initial value of this variable has not been overwritten. During
4828 compilation, TARGET_AVOID_XFORM is either 0 or 1. */
4829 if (TARGET_AVOID_XFORM == -1)
4830 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4831 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4832 need indexed accesses and the type used is the scalar type of the element
4833 being loaded or stored. */
4834 TARGET_AVOID_XFORM = (rs6000_tune == PROCESSOR_POWER6 && TARGET_CMPB
4835 && !TARGET_ALTIVEC);
4836
4837 /* Set the -mrecip options. */
4838 if (rs6000_recip_name)
4839 {
4840 char *p = ASTRDUP (rs6000_recip_name);
4841 char *q;
4842 unsigned int mask, i;
4843 bool invert;
4844
4845 while ((q = strtok (p, ",")) != NULL)
4846 {
4847 p = NULL;
4848 if (*q == '!')
4849 {
4850 invert = true;
4851 q++;
4852 }
4853 else
4854 invert = false;
4855
4856 if (!strcmp (q, "default"))
4857 mask = ((TARGET_RECIP_PRECISION)
4858 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
4859 else
4860 {
4861 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4862 if (!strcmp (q, recip_options[i].string))
4863 {
4864 mask = recip_options[i].mask;
4865 break;
4866 }
4867
4868 if (i == ARRAY_SIZE (recip_options))
4869 {
4870 error ("unknown option for %<%s=%s%>", "-mrecip", q);
4871 invert = false;
4872 mask = 0;
4873 ret = false;
4874 }
4875 }
4876
4877 if (invert)
4878 rs6000_recip_control &= ~mask;
4879 else
4880 rs6000_recip_control |= mask;
4881 }
4882 }
4883
4884 /* Set the builtin mask of the various options used that could affect which
4885 builtins were used. In the past we used target_flags, but we've run out
4886 of bits, and some options are no longer in target_flags. */
4887 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
4888 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
4889 rs6000_print_builtin_options (stderr, 0, "builtin mask",
4890 rs6000_builtin_mask);
4891
4892 /* Initialize all of the registers. */
4893 rs6000_init_hard_regno_mode_ok (global_init_p);
4894
4895 /* Save the initial options in case the user does function specific options */
4896 if (global_init_p)
4897 target_option_default_node = target_option_current_node
4898 = build_target_option_node (&global_options, &global_options_set);
4899
4900 /* If not explicitly specified via option, decide whether to generate the
4901 extra blr's required to preserve the link stack on some cpus (eg, 476). */
4902 if (TARGET_LINK_STACK == -1)
4903 SET_TARGET_LINK_STACK (rs6000_tune == PROCESSOR_PPC476 && flag_pic);
4904
4905 /* Deprecate use of -mno-speculate-indirect-jumps. */
4906 if (!rs6000_speculate_indirect_jumps)
4907 warning (0, "%qs is deprecated and not recommended in any circumstances",
4908 "-mno-speculate-indirect-jumps");
4909
4910 return ret;
4911 }
4912
4913 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
4914 define the target cpu type. */
4915
4916 static void
4917 rs6000_option_override (void)
4918 {
4919 (void) rs6000_option_override_internal (true);
4920 }
4921
4922 \f
4923 /* Implement LOOP_ALIGN. */
4924 align_flags
4925 rs6000_loop_align (rtx label)
4926 {
4927 basic_block bb;
4928 int ninsns;
4929
4930 /* Don't override loop alignment if -falign-loops was specified. */
4931 if (!can_override_loop_align)
4932 return align_loops;
4933
4934 bb = BLOCK_FOR_INSN (label);
4935 ninsns = num_loop_insns(bb->loop_father);
4936
4937 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
4938 if (ninsns > 4 && ninsns <= 8
4939 && (rs6000_tune == PROCESSOR_POWER4
4940 || rs6000_tune == PROCESSOR_POWER5
4941 || rs6000_tune == PROCESSOR_POWER6
4942 || rs6000_tune == PROCESSOR_POWER7
4943 || rs6000_tune == PROCESSOR_POWER8))
4944 return align_flags (5);
4945 else
4946 return align_loops;
4947 }
4948
4949 /* Return true iff, data reference of TYPE can reach vector alignment (16)
4950 after applying N number of iterations. This routine does not determine
4951 how may iterations are required to reach desired alignment. */
4952
4953 static bool
4954 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
4955 {
4956 if (is_packed)
4957 return false;
4958
4959 if (TARGET_32BIT)
4960 {
4961 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
4962 return true;
4963
4964 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
4965 return true;
4966
4967 return false;
4968 }
4969 else
4970 {
4971 if (TARGET_MACHO)
4972 return false;
4973
4974 /* Assuming that all other types are naturally aligned. CHECKME! */
4975 return true;
4976 }
4977 }
4978
4979 /* Return true if the vector misalignment factor is supported by the
4980 target. */
4981 static bool
4982 rs6000_builtin_support_vector_misalignment (machine_mode mode,
4983 const_tree type,
4984 int misalignment,
4985 bool is_packed)
4986 {
4987 if (TARGET_VSX)
4988 {
4989 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4990 return true;
4991
4992 /* Return if movmisalign pattern is not supported for this mode. */
4993 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
4994 return false;
4995
4996 if (misalignment == -1)
4997 {
4998 /* Misalignment factor is unknown at compile time but we know
4999 it's word aligned. */
5000 if (rs6000_vector_alignment_reachable (type, is_packed))
5001 {
5002 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
5003
5004 if (element_size == 64 || element_size == 32)
5005 return true;
5006 }
5007
5008 return false;
5009 }
5010
5011 /* VSX supports word-aligned vector. */
5012 if (misalignment % 4 == 0)
5013 return true;
5014 }
5015 return false;
5016 }
5017
5018 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5019 static int
5020 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
5021 tree vectype, int misalign)
5022 {
5023 unsigned elements;
5024 tree elem_type;
5025
5026 switch (type_of_cost)
5027 {
5028 case scalar_stmt:
5029 case scalar_store:
5030 case vector_stmt:
5031 case vector_store:
5032 case vec_to_scalar:
5033 case scalar_to_vec:
5034 case cond_branch_not_taken:
5035 return 1;
5036 case scalar_load:
5037 case vector_load:
5038 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5039 return 2;
5040
5041 case vec_perm:
5042 /* Power7 has only one permute unit, make it a bit expensive. */
5043 if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7)
5044 return 3;
5045 else
5046 return 1;
5047
5048 case vec_promote_demote:
5049 /* Power7 has only one permute/pack unit, make it a bit expensive. */
5050 if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7)
5051 return 4;
5052 else
5053 return 1;
5054
5055 case cond_branch_taken:
5056 return 3;
5057
5058 case unaligned_load:
5059 case vector_gather_load:
5060 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5061 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5062 return 2;
5063
5064 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5065 {
5066 elements = TYPE_VECTOR_SUBPARTS (vectype);
5067 /* See PR102767, consider V1TI to keep consistency. */
5068 if (elements == 2 || elements == 1)
5069 /* Double word aligned. */
5070 return 4;
5071
5072 if (elements == 4)
5073 {
5074 switch (misalign)
5075 {
5076 case 8:
5077 /* Double word aligned. */
5078 return 4;
5079
5080 case -1:
5081 /* Unknown misalignment. */
5082 case 4:
5083 case 12:
5084 /* Word aligned. */
5085 return 33;
5086
5087 default:
5088 gcc_unreachable ();
5089 }
5090 }
5091 }
5092
5093 if (TARGET_ALTIVEC)
5094 /* Misaligned loads are not supported. */
5095 gcc_unreachable ();
5096
5097 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5098 return 4;
5099
5100 case unaligned_store:
5101 case vector_scatter_store:
5102 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5103 return 1;
5104
5105 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5106 {
5107 elements = TYPE_VECTOR_SUBPARTS (vectype);
5108 /* See PR102767, consider V1TI to keep consistency. */
5109 if (elements == 2 || elements == 1)
5110 /* Double word aligned. */
5111 return 2;
5112
5113 if (elements == 4)
5114 {
5115 switch (misalign)
5116 {
5117 case 8:
5118 /* Double word aligned. */
5119 return 2;
5120
5121 case -1:
5122 /* Unknown misalignment. */
5123 case 4:
5124 case 12:
5125 /* Word aligned. */
5126 return 23;
5127
5128 default:
5129 gcc_unreachable ();
5130 }
5131 }
5132 }
5133
5134 if (TARGET_ALTIVEC)
5135 /* Misaligned stores are not supported. */
5136 gcc_unreachable ();
5137
5138 return 2;
5139
5140 case vec_construct:
5141 /* This is a rough approximation assuming non-constant elements
5142 constructed into a vector via element insertion. FIXME:
5143 vec_construct is not granular enough for uniformly good
5144 decisions. If the initialization is a splat, this is
5145 cheaper than we estimate. Improve this someday. */
5146 elem_type = TREE_TYPE (vectype);
5147 /* 32-bit vectors loaded into registers are stored as double
5148 precision, so we need 2 permutes, 2 converts, and 1 merge
5149 to construct a vector of short floats from them. */
5150 if (SCALAR_FLOAT_TYPE_P (elem_type)
5151 && TYPE_PRECISION (elem_type) == 32)
5152 return 5;
5153 /* On POWER9, integer vector types are built up in GPRs and then
5154 use a direct move (2 cycles). For POWER8 this is even worse,
5155 as we need two direct moves and a merge, and the direct moves
5156 are five cycles. */
5157 else if (INTEGRAL_TYPE_P (elem_type))
5158 {
5159 if (TARGET_P9_VECTOR)
5160 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 2;
5161 else
5162 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 5;
5163 }
5164 else
5165 /* V2DFmode doesn't need a direct move. */
5166 return 2;
5167
5168 default:
5169 gcc_unreachable ();
5170 }
5171 }
5172
5173 /* Implement targetm.vectorize.preferred_simd_mode. */
5174
5175 static machine_mode
5176 rs6000_preferred_simd_mode (scalar_mode mode)
5177 {
5178 opt_machine_mode vmode = mode_for_vector (mode, 16 / GET_MODE_SIZE (mode));
5179
5180 if (vmode.exists () && !VECTOR_MEM_NONE_P (vmode.require ()))
5181 return vmode.require ();
5182
5183 return word_mode;
5184 }
5185
5186 class rs6000_cost_data : public vector_costs
5187 {
5188 public:
5189 using vector_costs::vector_costs;
5190
5191 unsigned int add_stmt_cost (int count, vect_cost_for_stmt kind,
5192 stmt_vec_info stmt_info, slp_tree, tree vectype,
5193 int misalign,
5194 vect_cost_model_location where) override;
5195 void finish_cost (const vector_costs *) override;
5196
5197 protected:
5198 void update_target_cost_per_stmt (vect_cost_for_stmt, stmt_vec_info,
5199 vect_cost_model_location, unsigned int);
5200 void density_test (loop_vec_info);
5201 void adjust_vect_cost_per_loop (loop_vec_info);
5202
5203 /* Total number of vectorized stmts (loop only). */
5204 unsigned m_nstmts = 0;
5205 /* Total number of loads (loop only). */
5206 unsigned m_nloads = 0;
5207 /* Possible extra penalized cost on vector construction (loop only). */
5208 unsigned m_extra_ctor_cost = 0;
5209 /* For each vectorized loop, this var holds TRUE iff a non-memory vector
5210 instruction is needed by the vectorization. */
5211 bool m_vect_nonmem = false;
5212 };
5213
5214 /* Test for likely overcommitment of vector hardware resources. If a
5215 loop iteration is relatively large, and too large a percentage of
5216 instructions in the loop are vectorized, the cost model may not
5217 adequately reflect delays from unavailable vector resources.
5218 Penalize the loop body cost for this case. */
5219
5220 void
5221 rs6000_cost_data::density_test (loop_vec_info loop_vinfo)
5222 {
5223 /* This density test only cares about the cost of vector version of the
5224 loop, so immediately return if we are passed costing for the scalar
5225 version (namely computing single scalar iteration cost). */
5226 if (m_costing_for_scalar)
5227 return;
5228
5229 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5230 basic_block *bbs = get_loop_body (loop);
5231 int nbbs = loop->num_nodes;
5232 int vec_cost = m_costs[vect_body], not_vec_cost = 0;
5233
5234 for (int i = 0; i < nbbs; i++)
5235 {
5236 basic_block bb = bbs[i];
5237 gimple_stmt_iterator gsi;
5238
5239 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5240 {
5241 gimple *stmt = gsi_stmt (gsi);
5242 if (is_gimple_debug (stmt))
5243 continue;
5244
5245 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
5246
5247 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5248 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
5249 not_vec_cost++;
5250 }
5251 }
5252
5253 free (bbs);
5254 int density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
5255
5256 if (density_pct > rs6000_density_pct_threshold
5257 && vec_cost + not_vec_cost > rs6000_density_size_threshold)
5258 {
5259 m_costs[vect_body] = vec_cost * (100 + rs6000_density_penalty) / 100;
5260 if (dump_enabled_p ())
5261 dump_printf_loc (MSG_NOTE, vect_location,
5262 "density %d%%, cost %d exceeds threshold, penalizing "
5263 "loop body cost by %u%%\n", density_pct,
5264 vec_cost + not_vec_cost, rs6000_density_penalty);
5265 }
5266
5267 /* Check whether we need to penalize the body cost to account
5268 for excess strided or elementwise loads. */
5269 if (m_extra_ctor_cost > 0)
5270 {
5271 gcc_assert (m_nloads <= m_nstmts);
5272 unsigned int load_pct = (m_nloads * 100) / m_nstmts;
5273
5274 /* It's likely to be bounded by latency and execution resources
5275 from many scalar loads which are strided or elementwise loads
5276 into a vector if both conditions below are found:
5277 1. there are many loads, it's easy to result in a long wait
5278 for load units;
5279 2. load has a big proportion of all vectorized statements,
5280 it's not easy to schedule other statements to spread among
5281 the loads.
5282 One typical case is the innermost loop of the hotspot of SPEC2017
5283 503.bwaves_r without loop interchange. */
5284 if (m_nloads > (unsigned int) rs6000_density_load_num_threshold
5285 && load_pct > (unsigned int) rs6000_density_load_pct_threshold)
5286 {
5287 m_costs[vect_body] += m_extra_ctor_cost;
5288 if (dump_enabled_p ())
5289 dump_printf_loc (MSG_NOTE, vect_location,
5290 "Found %u loads and "
5291 "load pct. %u%% exceed "
5292 "the threshold, "
5293 "penalizing loop body "
5294 "cost by extra cost %u "
5295 "for ctor.\n",
5296 m_nloads, load_pct,
5297 m_extra_ctor_cost);
5298 }
5299 }
5300 }
5301
5302 /* Implement targetm.vectorize.create_costs. */
5303
5304 static vector_costs *
5305 rs6000_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar)
5306 {
5307 return new rs6000_cost_data (vinfo, costing_for_scalar);
5308 }
5309
5310 /* Adjust vectorization cost after calling rs6000_builtin_vectorization_cost.
5311 For some statement, we would like to further fine-grain tweak the cost on
5312 top of rs6000_builtin_vectorization_cost handling which doesn't have any
5313 information on statement operation codes etc. One typical case here is
5314 COND_EXPR, it takes the same cost to simple FXU instruction when evaluating
5315 for scalar cost, but it should be priced more whatever transformed to either
5316 compare + branch or compare + isel instructions. */
5317
5318 static unsigned
5319 rs6000_adjust_vect_cost_per_stmt (enum vect_cost_for_stmt kind,
5320 struct _stmt_vec_info *stmt_info)
5321 {
5322 if (kind == scalar_stmt && stmt_info && stmt_info->stmt
5323 && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
5324 {
5325 tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
5326 if (subcode == COND_EXPR)
5327 return 2;
5328 }
5329
5330 return 0;
5331 }
5332
5333 /* Helper function for add_stmt_cost. Check each statement cost
5334 entry, gather information and update the target_cost fields
5335 accordingly. */
5336 void
5337 rs6000_cost_data::update_target_cost_per_stmt (vect_cost_for_stmt kind,
5338 stmt_vec_info stmt_info,
5339 vect_cost_model_location where,
5340 unsigned int orig_count)
5341 {
5342
5343 /* Check whether we're doing something other than just a copy loop.
5344 Not all such loops may be profitably vectorized; see
5345 rs6000_finish_cost. */
5346 if (kind == vec_to_scalar
5347 || kind == vec_perm
5348 || kind == vec_promote_demote
5349 || kind == vec_construct
5350 || kind == scalar_to_vec
5351 || (where == vect_body && kind == vector_stmt))
5352 m_vect_nonmem = true;
5353
5354 /* Gather some information when we are costing the vectorized instruction
5355 for the statements located in a loop body. */
5356 if (!m_costing_for_scalar
5357 && is_a<loop_vec_info> (m_vinfo)
5358 && where == vect_body)
5359 {
5360 m_nstmts += orig_count;
5361
5362 if (kind == scalar_load || kind == vector_load
5363 || kind == unaligned_load || kind == vector_gather_load)
5364 m_nloads += orig_count;
5365
5366 /* Power processors do not currently have instructions for strided
5367 and elementwise loads, and instead we must generate multiple
5368 scalar loads. This leads to undercounting of the cost. We
5369 account for this by scaling the construction cost by the number
5370 of elements involved, and saving this as extra cost that we may
5371 or may not need to apply. When finalizing the cost of the loop,
5372 the extra penalty is applied when the load density heuristics
5373 are satisfied. */
5374 if (kind == vec_construct && stmt_info
5375 && STMT_VINFO_TYPE (stmt_info) == load_vec_info_type
5376 && (STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_ELEMENTWISE
5377 || STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_STRIDED_SLP))
5378 {
5379 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5380 unsigned int nunits = vect_nunits_for_cost (vectype);
5381 /* As PR103702 shows, it's possible that vectorizer wants to do
5382 costings for only one unit here, it's no need to do any
5383 penalization for it, so simply early return here. */
5384 if (nunits == 1)
5385 return;
5386 /* i386 port adopts nunits * stmt_cost as the penalized cost
5387 for this kind of penalization, we used to follow it but
5388 found it could result in an unreliable body cost especially
5389 for V16QI/V8HI modes. To make it better, we choose this
5390 new heuristic: for each scalar load, we use 2 as penalized
5391 cost for the case with 2 nunits and use 1 for the other
5392 cases. It's without much supporting theory, mainly
5393 concluded from the broad performance evaluations on Power8,
5394 Power9 and Power10. One possibly related point is that:
5395 vector construction for more units would use more insns,
5396 it has more chances to schedule them better (even run in
5397 parallelly when enough available units at that time), so
5398 it seems reasonable not to penalize that much for them. */
5399 unsigned int adjusted_cost = (nunits == 2) ? 2 : 1;
5400 unsigned int extra_cost = nunits * adjusted_cost;
5401 m_extra_ctor_cost += extra_cost;
5402 }
5403 }
5404 }
5405
5406 unsigned
5407 rs6000_cost_data::add_stmt_cost (int count, vect_cost_for_stmt kind,
5408 stmt_vec_info stmt_info, slp_tree,
5409 tree vectype, int misalign,
5410 vect_cost_model_location where)
5411 {
5412 unsigned retval = 0;
5413
5414 if (flag_vect_cost_model)
5415 {
5416 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
5417 misalign);
5418 stmt_cost += rs6000_adjust_vect_cost_per_stmt (kind, stmt_info);
5419 /* Statements in an inner loop relative to the loop being
5420 vectorized are weighted more heavily. The value here is
5421 arbitrary and could potentially be improved with analysis. */
5422 unsigned int orig_count = count;
5423 retval = adjust_cost_for_freq (stmt_info, where, count * stmt_cost);
5424 m_costs[where] += retval;
5425
5426 update_target_cost_per_stmt (kind, stmt_info, where, orig_count);
5427 }
5428
5429 return retval;
5430 }
5431
5432 /* For some target specific vectorization cost which can't be handled per stmt,
5433 we check the requisite conditions and adjust the vectorization cost
5434 accordingly if satisfied. One typical example is to model shift cost for
5435 vector with length by counting number of required lengths under condition
5436 LOOP_VINFO_FULLY_WITH_LENGTH_P. */
5437
5438 void
5439 rs6000_cost_data::adjust_vect_cost_per_loop (loop_vec_info loop_vinfo)
5440 {
5441 if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
5442 {
5443 rgroup_controls *rgc;
5444 unsigned int num_vectors_m1;
5445 unsigned int shift_cnt = 0;
5446 FOR_EACH_VEC_ELT (LOOP_VINFO_LENS (loop_vinfo), num_vectors_m1, rgc)
5447 if (rgc->type)
5448 /* Each length needs one shift to fill into bits 0-7. */
5449 shift_cnt += num_vectors_m1 + 1;
5450
5451 add_stmt_cost (shift_cnt, scalar_stmt, NULL, NULL,
5452 NULL_TREE, 0, vect_body);
5453 }
5454 }
5455
5456 void
5457 rs6000_cost_data::finish_cost (const vector_costs *scalar_costs)
5458 {
5459 if (loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (m_vinfo))
5460 {
5461 adjust_vect_cost_per_loop (loop_vinfo);
5462 density_test (loop_vinfo);
5463
5464 /* Don't vectorize minimum-vectorization-factor, simple copy loops
5465 that require versioning for any reason. The vectorization is at
5466 best a wash inside the loop, and the versioning checks make
5467 profitability highly unlikely and potentially quite harmful. */
5468 if (!m_vect_nonmem
5469 && LOOP_VINFO_VECT_FACTOR (loop_vinfo) == 2
5470 && LOOP_REQUIRES_VERSIONING (loop_vinfo))
5471 m_costs[vect_body] += 10000;
5472 }
5473
5474 vector_costs::finish_cost (scalar_costs);
5475 }
5476
5477 /* Implement targetm.loop_unroll_adjust. */
5478
5479 static unsigned
5480 rs6000_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
5481 {
5482 if (unroll_only_small_loops)
5483 {
5484 /* TODO: These are hardcoded values right now. We probably should use
5485 a PARAM here. */
5486 if (loop->ninsns <= 6)
5487 return MIN (4, nunroll);
5488 if (loop->ninsns <= 10)
5489 return MIN (2, nunroll);
5490
5491 return 0;
5492 }
5493
5494 return nunroll;
5495 }
5496
5497 /* Returns a function decl for a vectorized version of the builtin function
5498 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5499 if it is not available.
5500
5501 Implement targetm.vectorize.builtin_vectorized_function. */
5502
5503 static tree
5504 rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
5505 tree type_in)
5506 {
5507 machine_mode in_mode, out_mode;
5508 int in_n, out_n;
5509
5510 if (TARGET_DEBUG_BUILTIN)
5511 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5512 combined_fn_name (combined_fn (fn)),
5513 GET_MODE_NAME (TYPE_MODE (type_out)),
5514 GET_MODE_NAME (TYPE_MODE (type_in)));
5515
5516 /* TODO: Should this be gcc_assert? */
5517 if (TREE_CODE (type_out) != VECTOR_TYPE
5518 || TREE_CODE (type_in) != VECTOR_TYPE)
5519 return NULL_TREE;
5520
5521 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5522 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5523 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5524 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5525
5526 switch (fn)
5527 {
5528 CASE_CFN_COPYSIGN:
5529 if (VECTOR_UNIT_VSX_P (V2DFmode)
5530 && out_mode == DFmode && out_n == 2
5531 && in_mode == DFmode && in_n == 2)
5532 return rs6000_builtin_decls[RS6000_BIF_CPSGNDP];
5533 if (VECTOR_UNIT_VSX_P (V4SFmode)
5534 && out_mode == SFmode && out_n == 4
5535 && in_mode == SFmode && in_n == 4)
5536 return rs6000_builtin_decls[RS6000_BIF_CPSGNSP];
5537 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5538 && out_mode == SFmode && out_n == 4
5539 && in_mode == SFmode && in_n == 4)
5540 return rs6000_builtin_decls[RS6000_BIF_COPYSIGN_V4SF];
5541 break;
5542 CASE_CFN_CEIL:
5543 if (VECTOR_UNIT_VSX_P (V2DFmode)
5544 && out_mode == DFmode && out_n == 2
5545 && in_mode == DFmode && in_n == 2)
5546 return rs6000_builtin_decls[RS6000_BIF_XVRDPIP];
5547 if (VECTOR_UNIT_VSX_P (V4SFmode)
5548 && out_mode == SFmode && out_n == 4
5549 && in_mode == SFmode && in_n == 4)
5550 return rs6000_builtin_decls[RS6000_BIF_XVRSPIP];
5551 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5552 && out_mode == SFmode && out_n == 4
5553 && in_mode == SFmode && in_n == 4)
5554 return rs6000_builtin_decls[RS6000_BIF_VRFIP];
5555 break;
5556 CASE_CFN_FLOOR:
5557 if (VECTOR_UNIT_VSX_P (V2DFmode)
5558 && out_mode == DFmode && out_n == 2
5559 && in_mode == DFmode && in_n == 2)
5560 return rs6000_builtin_decls[RS6000_BIF_XVRDPIM];
5561 if (VECTOR_UNIT_VSX_P (V4SFmode)
5562 && out_mode == SFmode && out_n == 4
5563 && in_mode == SFmode && in_n == 4)
5564 return rs6000_builtin_decls[RS6000_BIF_XVRSPIM];
5565 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5566 && out_mode == SFmode && out_n == 4
5567 && in_mode == SFmode && in_n == 4)
5568 return rs6000_builtin_decls[RS6000_BIF_VRFIM];
5569 break;
5570 CASE_CFN_FMA:
5571 if (VECTOR_UNIT_VSX_P (V2DFmode)
5572 && out_mode == DFmode && out_n == 2
5573 && in_mode == DFmode && in_n == 2)
5574 return rs6000_builtin_decls[RS6000_BIF_XVMADDDP];
5575 if (VECTOR_UNIT_VSX_P (V4SFmode)
5576 && out_mode == SFmode && out_n == 4
5577 && in_mode == SFmode && in_n == 4)
5578 return rs6000_builtin_decls[RS6000_BIF_XVMADDSP];
5579 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5580 && out_mode == SFmode && out_n == 4
5581 && in_mode == SFmode && in_n == 4)
5582 return rs6000_builtin_decls[RS6000_BIF_VMADDFP];
5583 break;
5584 CASE_CFN_TRUNC:
5585 if (VECTOR_UNIT_VSX_P (V2DFmode)
5586 && out_mode == DFmode && out_n == 2
5587 && in_mode == DFmode && in_n == 2)
5588 return rs6000_builtin_decls[RS6000_BIF_XVRDPIZ];
5589 if (VECTOR_UNIT_VSX_P (V4SFmode)
5590 && out_mode == SFmode && out_n == 4
5591 && in_mode == SFmode && in_n == 4)
5592 return rs6000_builtin_decls[RS6000_BIF_XVRSPIZ];
5593 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5594 && out_mode == SFmode && out_n == 4
5595 && in_mode == SFmode && in_n == 4)
5596 return rs6000_builtin_decls[RS6000_BIF_VRFIZ];
5597 break;
5598 CASE_CFN_NEARBYINT:
5599 if (VECTOR_UNIT_VSX_P (V2DFmode)
5600 && flag_unsafe_math_optimizations
5601 && out_mode == DFmode && out_n == 2
5602 && in_mode == DFmode && in_n == 2)
5603 return rs6000_builtin_decls[RS6000_BIF_XVRDPI];
5604 if (VECTOR_UNIT_VSX_P (V4SFmode)
5605 && flag_unsafe_math_optimizations
5606 && out_mode == SFmode && out_n == 4
5607 && in_mode == SFmode && in_n == 4)
5608 return rs6000_builtin_decls[RS6000_BIF_XVRSPI];
5609 break;
5610 CASE_CFN_RINT:
5611 if (VECTOR_UNIT_VSX_P (V2DFmode)
5612 && !flag_trapping_math
5613 && out_mode == DFmode && out_n == 2
5614 && in_mode == DFmode && in_n == 2)
5615 return rs6000_builtin_decls[RS6000_BIF_XVRDPIC];
5616 if (VECTOR_UNIT_VSX_P (V4SFmode)
5617 && !flag_trapping_math
5618 && out_mode == SFmode && out_n == 4
5619 && in_mode == SFmode && in_n == 4)
5620 return rs6000_builtin_decls[RS6000_BIF_XVRSPIC];
5621 break;
5622 default:
5623 break;
5624 }
5625
5626 /* Generate calls to libmass if appropriate. */
5627 if (rs6000_veclib_handler)
5628 return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
5629
5630 return NULL_TREE;
5631 }
5632
5633 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5634 library with vectorized intrinsics. */
5635
5636 static tree
5637 rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out,
5638 tree type_in)
5639 {
5640 char name[32];
5641 const char *suffix = NULL;
5642 tree fntype, new_fndecl, bdecl = NULL_TREE;
5643 int n_args = 1;
5644 const char *bname;
5645 machine_mode el_mode, in_mode;
5646 int n, in_n;
5647
5648 /* Libmass is suitable for unsafe math only as it does not correctly support
5649 parts of IEEE with the required precision such as denormals. Only support
5650 it if we have VSX to use the simd d2 or f4 functions.
5651 XXX: Add variable length support. */
5652 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
5653 return NULL_TREE;
5654
5655 el_mode = TYPE_MODE (TREE_TYPE (type_out));
5656 n = TYPE_VECTOR_SUBPARTS (type_out);
5657 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5658 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5659 if (el_mode != in_mode
5660 || n != in_n)
5661 return NULL_TREE;
5662
5663 switch (fn)
5664 {
5665 CASE_CFN_ATAN2:
5666 CASE_CFN_HYPOT:
5667 CASE_CFN_POW:
5668 n_args = 2;
5669 gcc_fallthrough ();
5670
5671 CASE_CFN_ACOS:
5672 CASE_CFN_ACOSH:
5673 CASE_CFN_ASIN:
5674 CASE_CFN_ASINH:
5675 CASE_CFN_ATAN:
5676 CASE_CFN_ATANH:
5677 CASE_CFN_CBRT:
5678 CASE_CFN_COS:
5679 CASE_CFN_COSH:
5680 CASE_CFN_ERF:
5681 CASE_CFN_ERFC:
5682 CASE_CFN_EXP2:
5683 CASE_CFN_EXP:
5684 CASE_CFN_EXPM1:
5685 CASE_CFN_LGAMMA:
5686 CASE_CFN_LOG10:
5687 CASE_CFN_LOG1P:
5688 CASE_CFN_LOG2:
5689 CASE_CFN_LOG:
5690 CASE_CFN_SIN:
5691 CASE_CFN_SINH:
5692 CASE_CFN_SQRT:
5693 CASE_CFN_TAN:
5694 CASE_CFN_TANH:
5695 if (el_mode == DFmode && n == 2)
5696 {
5697 bdecl = mathfn_built_in (double_type_node, fn);
5698 suffix = "d2"; /* pow -> powd2 */
5699 }
5700 else if (el_mode == SFmode && n == 4)
5701 {
5702 bdecl = mathfn_built_in (float_type_node, fn);
5703 suffix = "4"; /* powf -> powf4 */
5704 }
5705 else
5706 return NULL_TREE;
5707 if (!bdecl)
5708 return NULL_TREE;
5709 break;
5710
5711 default:
5712 return NULL_TREE;
5713 }
5714
5715 gcc_assert (suffix != NULL);
5716 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
5717 if (!bname)
5718 return NULL_TREE;
5719
5720 strcpy (name, bname + strlen ("__builtin_"));
5721 strcat (name, suffix);
5722
5723 if (n_args == 1)
5724 fntype = build_function_type_list (type_out, type_in, NULL);
5725 else if (n_args == 2)
5726 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
5727 else
5728 gcc_unreachable ();
5729
5730 /* Build a function declaration for the vectorized function. */
5731 new_fndecl = build_decl (BUILTINS_LOCATION,
5732 FUNCTION_DECL, get_identifier (name), fntype);
5733 TREE_PUBLIC (new_fndecl) = 1;
5734 DECL_EXTERNAL (new_fndecl) = 1;
5735 DECL_IS_NOVOPS (new_fndecl) = 1;
5736 TREE_READONLY (new_fndecl) = 1;
5737
5738 return new_fndecl;
5739 }
5740
5741 \f
5742 /* Default CPU string for rs6000*_file_start functions. */
5743 static const char *rs6000_default_cpu;
5744
5745 #ifdef USING_ELFOS_H
5746 const char *rs6000_machine;
5747
5748 const char *
5749 rs6000_machine_from_flags (void)
5750 {
5751 /* e300 and e500 */
5752 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3)
5753 return "e300";
5754 if (rs6000_cpu == PROCESSOR_PPC8540 || rs6000_cpu == PROCESSOR_PPC8548)
5755 return "e500";
5756 if (rs6000_cpu == PROCESSOR_PPCE500MC)
5757 return "e500mc";
5758 if (rs6000_cpu == PROCESSOR_PPCE500MC64)
5759 return "e500mc64";
5760 if (rs6000_cpu == PROCESSOR_PPCE5500)
5761 return "e5500";
5762 if (rs6000_cpu == PROCESSOR_PPCE6500)
5763 return "e6500";
5764
5765 /* 400 series */
5766 if (rs6000_cpu == PROCESSOR_PPC403)
5767 return "\"403\"";
5768 if (rs6000_cpu == PROCESSOR_PPC405)
5769 return "\"405\"";
5770 if (rs6000_cpu == PROCESSOR_PPC440)
5771 return "\"440\"";
5772 if (rs6000_cpu == PROCESSOR_PPC476)
5773 return "\"476\"";
5774
5775 /* A2 */
5776 if (rs6000_cpu == PROCESSOR_PPCA2)
5777 return "a2";
5778
5779 /* Cell BE */
5780 if (rs6000_cpu == PROCESSOR_CELL)
5781 return "cell";
5782
5783 /* Titan */
5784 if (rs6000_cpu == PROCESSOR_TITAN)
5785 return "titan";
5786
5787 /* 500 series and 800 series */
5788 if (rs6000_cpu == PROCESSOR_MPCCORE)
5789 return "\"821\"";
5790
5791 #if 0
5792 /* This (and ppc64 below) are disabled here (for now at least) because
5793 PROCESSOR_POWERPC, PROCESSOR_POWERPC64, and PROCESSOR_COMMON
5794 are #define'd as some of these. Untangling that is a job for later. */
5795
5796 /* 600 series and 700 series, "classic" */
5797 if (rs6000_cpu == PROCESSOR_PPC601 || rs6000_cpu == PROCESSOR_PPC603
5798 || rs6000_cpu == PROCESSOR_PPC604 || rs6000_cpu == PROCESSOR_PPC604e
5799 || rs6000_cpu == PROCESSOR_PPC750)
5800 return "ppc";
5801 #endif
5802
5803 /* Classic with AltiVec, "G4" */
5804 if (rs6000_cpu == PROCESSOR_PPC7400 || rs6000_cpu == PROCESSOR_PPC7450)
5805 return "\"7450\"";
5806
5807 #if 0
5808 /* The older 64-bit CPUs */
5809 if (rs6000_cpu == PROCESSOR_PPC620 || rs6000_cpu == PROCESSOR_PPC630
5810 || rs6000_cpu == PROCESSOR_RS64A)
5811 return "ppc64";
5812 #endif
5813
5814 HOST_WIDE_INT flags = rs6000_isa_flags;
5815
5816 /* Disable the flags that should never influence the .machine selection. */
5817 flags &= ~(OPTION_MASK_PPC_GFXOPT | OPTION_MASK_PPC_GPOPT | OPTION_MASK_ISEL);
5818
5819 if ((flags & (ISA_3_1_MASKS_SERVER & ~ISA_3_0_MASKS_SERVER)) != 0)
5820 return "power10";
5821 if ((flags & (ISA_3_0_MASKS_SERVER & ~ISA_2_7_MASKS_SERVER)) != 0)
5822 return "power9";
5823 if ((flags & (ISA_2_7_MASKS_SERVER & ~ISA_2_6_MASKS_SERVER)) != 0)
5824 return "power8";
5825 if ((flags & (ISA_2_6_MASKS_SERVER & ~ISA_2_5_MASKS_SERVER)) != 0)
5826 return "power7";
5827 if ((flags & (ISA_2_5_MASKS_SERVER & ~ISA_2_4_MASKS)) != 0)
5828 return "power6";
5829 if ((flags & (ISA_2_4_MASKS & ~ISA_2_1_MASKS)) != 0)
5830 return "power5";
5831 if ((flags & ISA_2_1_MASKS) != 0)
5832 return "power4";
5833 if ((flags & OPTION_MASK_POWERPC64) != 0)
5834 return "ppc64";
5835 return "ppc";
5836 }
5837
5838 void
5839 emit_asm_machine (void)
5840 {
5841 fprintf (asm_out_file, "\t.machine %s\n", rs6000_machine);
5842 }
5843 #endif
5844
5845 /* Do anything needed at the start of the asm file. */
5846
5847 static void
5848 rs6000_file_start (void)
5849 {
5850 char buffer[80];
5851 const char *start = buffer;
5852 FILE *file = asm_out_file;
5853
5854 rs6000_default_cpu = TARGET_CPU_DEFAULT;
5855
5856 default_file_start ();
5857
5858 if (flag_verbose_asm)
5859 {
5860 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
5861
5862 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
5863 {
5864 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
5865 start = "";
5866 }
5867
5868 if (OPTION_SET_P (rs6000_cpu_index))
5869 {
5870 fprintf (file, "%s -mcpu=%s", start,
5871 processor_target_table[rs6000_cpu_index].name);
5872 start = "";
5873 }
5874
5875 if (OPTION_SET_P (rs6000_tune_index))
5876 {
5877 fprintf (file, "%s -mtune=%s", start,
5878 processor_target_table[rs6000_tune_index].name);
5879 start = "";
5880 }
5881
5882 if (PPC405_ERRATUM77)
5883 {
5884 fprintf (file, "%s PPC405CR_ERRATUM77", start);
5885 start = "";
5886 }
5887
5888 #ifdef USING_ELFOS_H
5889 switch (rs6000_sdata)
5890 {
5891 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
5892 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
5893 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
5894 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
5895 }
5896
5897 if (rs6000_sdata && g_switch_value)
5898 {
5899 fprintf (file, "%s -G %d", start,
5900 g_switch_value);
5901 start = "";
5902 }
5903 #endif
5904
5905 if (*start == '\0')
5906 putc ('\n', file);
5907 }
5908
5909 #ifdef USING_ELFOS_H
5910 rs6000_machine = rs6000_machine_from_flags ();
5911 emit_asm_machine ();
5912 #endif
5913
5914 if (DEFAULT_ABI == ABI_ELFv2)
5915 fprintf (file, "\t.abiversion 2\n");
5916 }
5917
5918 \f
5919 /* Return nonzero if this function is known to have a null epilogue. */
5920
5921 int
5922 direct_return (void)
5923 {
5924 if (reload_completed)
5925 {
5926 rs6000_stack_t *info = rs6000_stack_info ();
5927
5928 if (info->first_gp_reg_save == 32
5929 && info->first_fp_reg_save == 64
5930 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
5931 && ! info->lr_save_p
5932 && ! info->cr_save_p
5933 && info->vrsave_size == 0
5934 && ! info->push_p)
5935 return 1;
5936 }
5937
5938 return 0;
5939 }
5940
5941 /* Helper for num_insns_constant. Calculate number of instructions to
5942 load VALUE to a single gpr using combinations of addi, addis, ori,
5943 oris, sldi and rldimi instructions. */
5944
5945 static int
5946 num_insns_constant_gpr (HOST_WIDE_INT value)
5947 {
5948 /* signed constant loadable with addi */
5949 if (SIGNED_INTEGER_16BIT_P (value))
5950 return 1;
5951
5952 /* constant loadable with addis */
5953 else if ((value & 0xffff) == 0
5954 && (value >> 31 == -1 || value >> 31 == 0))
5955 return 1;
5956
5957 /* PADDI can support up to 34 bit signed integers. */
5958 else if (TARGET_PREFIXED && SIGNED_INTEGER_34BIT_P (value))
5959 return 1;
5960
5961 else if (TARGET_POWERPC64)
5962 {
5963 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
5964 HOST_WIDE_INT high = value >> 31;
5965
5966 if (high == 0 || high == -1)
5967 return 2;
5968
5969 high >>= 1;
5970
5971 if (low == 0 || low == high)
5972 return num_insns_constant_gpr (high) + 1;
5973 else if (high == 0)
5974 return num_insns_constant_gpr (low) + 1;
5975 else
5976 return (num_insns_constant_gpr (high)
5977 + num_insns_constant_gpr (low) + 1);
5978 }
5979
5980 else
5981 return 2;
5982 }
5983
5984 /* Helper for num_insns_constant. Allow constants formed by the
5985 num_insns_constant_gpr sequences, plus li -1, rldicl/rldicr/rlwinm,
5986 and handle modes that require multiple gprs. */
5987
5988 static int
5989 num_insns_constant_multi (HOST_WIDE_INT value, machine_mode mode)
5990 {
5991 int nregs = (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5992 int total = 0;
5993 while (nregs-- > 0)
5994 {
5995 HOST_WIDE_INT low = sext_hwi (value, BITS_PER_WORD);
5996 int insns = num_insns_constant_gpr (low);
5997 if (insns > 2
5998 /* We won't get more than 2 from num_insns_constant_gpr
5999 except when TARGET_POWERPC64 and mode is DImode or
6000 wider, so the register mode must be DImode. */
6001 && rs6000_is_valid_and_mask (GEN_INT (low), DImode))
6002 insns = 2;
6003 total += insns;
6004 /* If BITS_PER_WORD is the number of bits in HOST_WIDE_INT, doing
6005 it all at once would be UB. */
6006 value >>= (BITS_PER_WORD - 1);
6007 value >>= 1;
6008 }
6009 return total;
6010 }
6011
6012 /* Return the number of instructions it takes to form a constant in as
6013 many gprs are needed for MODE. */
6014
6015 int
6016 num_insns_constant (rtx op, machine_mode mode)
6017 {
6018 HOST_WIDE_INT val;
6019
6020 switch (GET_CODE (op))
6021 {
6022 case CONST_INT:
6023 val = INTVAL (op);
6024 break;
6025
6026 case CONST_WIDE_INT:
6027 {
6028 int insns = 0;
6029 for (int i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
6030 insns += num_insns_constant_multi (CONST_WIDE_INT_ELT (op, i),
6031 DImode);
6032 return insns;
6033 }
6034
6035 case CONST_DOUBLE:
6036 {
6037 const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (op);
6038
6039 if (mode == SFmode || mode == SDmode)
6040 {
6041 long l;
6042
6043 if (mode == SDmode)
6044 REAL_VALUE_TO_TARGET_DECIMAL32 (*rv, l);
6045 else
6046 REAL_VALUE_TO_TARGET_SINGLE (*rv, l);
6047 /* See the first define_split in rs6000.md handling a
6048 const_double_operand. */
6049 val = l;
6050 mode = SImode;
6051 }
6052 else if (mode == DFmode || mode == DDmode)
6053 {
6054 long l[2];
6055
6056 if (mode == DDmode)
6057 REAL_VALUE_TO_TARGET_DECIMAL64 (*rv, l);
6058 else
6059 REAL_VALUE_TO_TARGET_DOUBLE (*rv, l);
6060
6061 /* See the second (32-bit) and third (64-bit) define_split
6062 in rs6000.md handling a const_double_operand. */
6063 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 1] << 32;
6064 val |= l[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffffUL;
6065 mode = DImode;
6066 }
6067 else if (mode == TFmode || mode == TDmode
6068 || mode == KFmode || mode == IFmode)
6069 {
6070 long l[4];
6071 int insns;
6072
6073 if (mode == TDmode)
6074 REAL_VALUE_TO_TARGET_DECIMAL128 (*rv, l);
6075 else
6076 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*rv, l);
6077
6078 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 3] << 32;
6079 val |= l[WORDS_BIG_ENDIAN ? 1 : 2] & 0xffffffffUL;
6080 insns = num_insns_constant_multi (val, DImode);
6081 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 2 : 1] << 32;
6082 val |= l[WORDS_BIG_ENDIAN ? 3 : 0] & 0xffffffffUL;
6083 insns += num_insns_constant_multi (val, DImode);
6084 return insns;
6085 }
6086 else
6087 gcc_unreachable ();
6088 }
6089 break;
6090
6091 default:
6092 gcc_unreachable ();
6093 }
6094
6095 return num_insns_constant_multi (val, mode);
6096 }
6097
6098 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
6099 If the mode of OP is MODE_VECTOR_INT, this simply returns the
6100 corresponding element of the vector, but for V4SFmode, the
6101 corresponding "float" is interpreted as an SImode integer. */
6102
6103 HOST_WIDE_INT
6104 const_vector_elt_as_int (rtx op, unsigned int elt)
6105 {
6106 rtx tmp;
6107
6108 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
6109 gcc_assert (GET_MODE (op) != V2DImode
6110 && GET_MODE (op) != V2DFmode);
6111
6112 tmp = CONST_VECTOR_ELT (op, elt);
6113 if (GET_MODE (op) == V4SFmode)
6114 tmp = gen_lowpart (SImode, tmp);
6115 return INTVAL (tmp);
6116 }
6117
6118 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
6119 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
6120 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
6121 all items are set to the same value and contain COPIES replicas of the
6122 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
6123 operand and the others are set to the value of the operand's msb. */
6124
6125 static bool
6126 vspltis_constant (rtx op, unsigned step, unsigned copies)
6127 {
6128 machine_mode mode = GET_MODE (op);
6129 machine_mode inner = GET_MODE_INNER (mode);
6130
6131 unsigned i;
6132 unsigned nunits;
6133 unsigned bitsize;
6134 unsigned mask;
6135
6136 HOST_WIDE_INT val;
6137 HOST_WIDE_INT splat_val;
6138 HOST_WIDE_INT msb_val;
6139
6140 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
6141 return false;
6142
6143 nunits = GET_MODE_NUNITS (mode);
6144 bitsize = GET_MODE_BITSIZE (inner);
6145 mask = GET_MODE_MASK (inner);
6146
6147 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6148 splat_val = val;
6149 msb_val = val >= 0 ? 0 : -1;
6150
6151 if (val == 0 && step > 1)
6152 {
6153 /* Special case for loading most significant bit with step > 1.
6154 In that case, match 0s in all but step-1s elements, where match
6155 EASY_VECTOR_MSB. */
6156 for (i = 1; i < nunits; ++i)
6157 {
6158 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
6159 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
6160 if ((i & (step - 1)) == step - 1)
6161 {
6162 if (!EASY_VECTOR_MSB (elt_val, inner))
6163 break;
6164 }
6165 else if (elt_val)
6166 break;
6167 }
6168 if (i == nunits)
6169 return true;
6170 }
6171
6172 /* Construct the value to be splatted, if possible. If not, return 0. */
6173 for (i = 2; i <= copies; i *= 2)
6174 {
6175 HOST_WIDE_INT small_val;
6176 bitsize /= 2;
6177 small_val = splat_val >> bitsize;
6178 mask >>= bitsize;
6179 if (splat_val != ((HOST_WIDE_INT)
6180 ((unsigned HOST_WIDE_INT) small_val << bitsize)
6181 | (small_val & mask)))
6182 return false;
6183 splat_val = small_val;
6184 inner = smallest_int_mode_for_size (bitsize);
6185 }
6186
6187 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
6188 if (EASY_VECTOR_15 (splat_val))
6189 ;
6190
6191 /* Also check if we can splat, and then add the result to itself. Do so if
6192 the value is positive, of if the splat instruction is using OP's mode;
6193 for splat_val < 0, the splat and the add should use the same mode. */
6194 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
6195 && (splat_val >= 0 || (step == 1 && copies == 1)))
6196 ;
6197
6198 /* Also check if are loading up the most significant bit which can be done by
6199 loading up -1 and shifting the value left by -1. Only do this for
6200 step 1 here, for larger steps it is done earlier. */
6201 else if (EASY_VECTOR_MSB (splat_val, inner) && step == 1)
6202 ;
6203
6204 else
6205 return false;
6206
6207 /* Check if VAL is present in every STEP-th element, and the
6208 other elements are filled with its most significant bit. */
6209 for (i = 1; i < nunits; ++i)
6210 {
6211 HOST_WIDE_INT desired_val;
6212 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
6213 if ((i & (step - 1)) == 0)
6214 desired_val = val;
6215 else
6216 desired_val = msb_val;
6217
6218 if (desired_val != const_vector_elt_as_int (op, elt))
6219 return false;
6220 }
6221
6222 return true;
6223 }
6224
6225 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
6226 instruction, filling in the bottom elements with 0 or -1.
6227
6228 Return 0 if the constant cannot be generated with VSLDOI. Return positive
6229 for the number of zeroes to shift in, or negative for the number of 0xff
6230 bytes to shift in.
6231
6232 OP is a CONST_VECTOR. */
6233
6234 int
6235 vspltis_shifted (rtx op)
6236 {
6237 machine_mode mode = GET_MODE (op);
6238 machine_mode inner = GET_MODE_INNER (mode);
6239
6240 unsigned i, j;
6241 unsigned nunits;
6242 unsigned mask;
6243
6244 HOST_WIDE_INT val;
6245
6246 if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
6247 return false;
6248
6249 /* We need to create pseudo registers to do the shift, so don't recognize
6250 shift vector constants after reload. Don't match it even before RA
6251 after split1 is done, because there won't be further splitting pass
6252 before RA to do the splitting. */
6253 if (!can_create_pseudo_p ()
6254 || (cfun->curr_properties & PROP_rtl_split_insns))
6255 return false;
6256
6257 nunits = GET_MODE_NUNITS (mode);
6258 mask = GET_MODE_MASK (inner);
6259
6260 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
6261
6262 /* Check if the value can really be the operand of a vspltis[bhw]. */
6263 if (EASY_VECTOR_15 (val))
6264 ;
6265
6266 /* Also check if we are loading up the most significant bit which can be done
6267 by loading up -1 and shifting the value left by -1. */
6268 else if (EASY_VECTOR_MSB (val, inner))
6269 ;
6270
6271 else
6272 return 0;
6273
6274 /* Check if VAL is present in every STEP-th element until we find elements
6275 that are 0 or all 1 bits. */
6276 for (i = 1; i < nunits; ++i)
6277 {
6278 unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
6279 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
6280
6281 /* If the value isn't the splat value, check for the remaining elements
6282 being 0/-1. */
6283 if (val != elt_val)
6284 {
6285 if (elt_val == 0)
6286 {
6287 for (j = i+1; j < nunits; ++j)
6288 {
6289 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6290 if (const_vector_elt_as_int (op, elt2) != 0)
6291 return 0;
6292 }
6293
6294 return (nunits - i) * GET_MODE_SIZE (inner);
6295 }
6296
6297 else if ((elt_val & mask) == mask)
6298 {
6299 for (j = i+1; j < nunits; ++j)
6300 {
6301 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6302 if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
6303 return 0;
6304 }
6305
6306 return -((nunits - i) * GET_MODE_SIZE (inner));
6307 }
6308
6309 else
6310 return 0;
6311 }
6312 }
6313
6314 /* If all elements are equal, we don't need to do VSLDOI. */
6315 return 0;
6316 }
6317
6318
6319 /* Return non-zero (element mode byte size) if OP is of the given MODE
6320 and can be synthesized with a vspltisb, vspltish or vspltisw. */
6321
6322 int
6323 easy_altivec_constant (rtx op, machine_mode mode)
6324 {
6325 unsigned step, copies;
6326
6327 if (mode == VOIDmode)
6328 mode = GET_MODE (op);
6329 else if (mode != GET_MODE (op))
6330 return 0;
6331
6332 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
6333 constants. */
6334 if (mode == V2DFmode)
6335 return zero_constant (op, mode) ? 8 : 0;
6336
6337 else if (mode == V2DImode)
6338 {
6339 if (!CONST_INT_P (CONST_VECTOR_ELT (op, 0))
6340 || !CONST_INT_P (CONST_VECTOR_ELT (op, 1)))
6341 return 0;
6342
6343 if (zero_constant (op, mode))
6344 return 8;
6345
6346 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
6347 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
6348 return 8;
6349
6350 return 0;
6351 }
6352
6353 /* V1TImode is a special container for TImode. Ignore for now. */
6354 else if (mode == V1TImode)
6355 return 0;
6356
6357 /* Start with a vspltisw. */
6358 step = GET_MODE_NUNITS (mode) / 4;
6359 copies = 1;
6360
6361 if (vspltis_constant (op, step, copies))
6362 return 4;
6363
6364 /* Then try with a vspltish. */
6365 if (step == 1)
6366 copies <<= 1;
6367 else
6368 step >>= 1;
6369
6370 if (vspltis_constant (op, step, copies))
6371 return 2;
6372
6373 /* And finally a vspltisb. */
6374 if (step == 1)
6375 copies <<= 1;
6376 else
6377 step >>= 1;
6378
6379 if (vspltis_constant (op, step, copies))
6380 return 1;
6381
6382 if (vspltis_shifted (op) != 0)
6383 return GET_MODE_SIZE (GET_MODE_INNER (mode));
6384
6385 return 0;
6386 }
6387
6388 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6389 result is OP. Abort if it is not possible. */
6390
6391 rtx
6392 gen_easy_altivec_constant (rtx op)
6393 {
6394 machine_mode mode = GET_MODE (op);
6395 int nunits = GET_MODE_NUNITS (mode);
6396 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6397 unsigned step = nunits / 4;
6398 unsigned copies = 1;
6399
6400 /* Start with a vspltisw. */
6401 if (vspltis_constant (op, step, copies))
6402 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
6403
6404 /* Then try with a vspltish. */
6405 if (step == 1)
6406 copies <<= 1;
6407 else
6408 step >>= 1;
6409
6410 if (vspltis_constant (op, step, copies))
6411 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
6412
6413 /* And finally a vspltisb. */
6414 if (step == 1)
6415 copies <<= 1;
6416 else
6417 step >>= 1;
6418
6419 if (vspltis_constant (op, step, copies))
6420 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
6421
6422 gcc_unreachable ();
6423 }
6424
6425 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6426 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6427
6428 Return the number of instructions needed (1 or 2) into the address pointed
6429 via NUM_INSNS_PTR.
6430
6431 Return the constant that is being split via CONSTANT_PTR. */
6432
6433 bool
6434 xxspltib_constant_p (rtx op,
6435 machine_mode mode,
6436 int *num_insns_ptr,
6437 int *constant_ptr)
6438 {
6439 size_t nunits = GET_MODE_NUNITS (mode);
6440 size_t i;
6441 HOST_WIDE_INT value;
6442 rtx element;
6443
6444 /* Set the returned values to out of bound values. */
6445 *num_insns_ptr = -1;
6446 *constant_ptr = 256;
6447
6448 if (!TARGET_P9_VECTOR)
6449 return false;
6450
6451 if (mode == VOIDmode)
6452 mode = GET_MODE (op);
6453
6454 else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode)
6455 return false;
6456
6457 /* Handle (vec_duplicate <constant>). */
6458 if (GET_CODE (op) == VEC_DUPLICATE)
6459 {
6460 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6461 && mode != V2DImode)
6462 return false;
6463
6464 element = XEXP (op, 0);
6465 if (!CONST_INT_P (element))
6466 return false;
6467
6468 value = INTVAL (element);
6469 if (!IN_RANGE (value, -128, 127))
6470 return false;
6471 }
6472
6473 /* Handle (const_vector [...]). */
6474 else if (GET_CODE (op) == CONST_VECTOR)
6475 {
6476 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6477 && mode != V2DImode)
6478 return false;
6479
6480 element = CONST_VECTOR_ELT (op, 0);
6481 if (!CONST_INT_P (element))
6482 return false;
6483
6484 value = INTVAL (element);
6485 if (!IN_RANGE (value, -128, 127))
6486 return false;
6487
6488 for (i = 1; i < nunits; i++)
6489 {
6490 element = CONST_VECTOR_ELT (op, i);
6491 if (!CONST_INT_P (element))
6492 return false;
6493
6494 if (value != INTVAL (element))
6495 return false;
6496 }
6497 }
6498
6499 /* Handle integer constants being loaded into the upper part of the VSX
6500 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6501 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6502 else if (CONST_INT_P (op))
6503 {
6504 if (!SCALAR_INT_MODE_P (mode))
6505 return false;
6506
6507 value = INTVAL (op);
6508 if (!IN_RANGE (value, -128, 127))
6509 return false;
6510
6511 if (!IN_RANGE (value, -1, 0))
6512 {
6513 if (!(reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID))
6514 return false;
6515
6516 if (EASY_VECTOR_15 (value))
6517 return false;
6518 }
6519 }
6520
6521 else
6522 return false;
6523
6524 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6525 sign extend. Special case 0/-1 to allow getting any VSX register instead
6526 of an Altivec register. */
6527 if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0)
6528 && EASY_VECTOR_15 (value))
6529 return false;
6530
6531 /* Return # of instructions and the constant byte for XXSPLTIB. */
6532 if (mode == V16QImode)
6533 *num_insns_ptr = 1;
6534
6535 else if (IN_RANGE (value, -1, 0))
6536 *num_insns_ptr = 1;
6537
6538 /* Do not generate XXSPLTIB and a sign extend operation if we can generate a
6539 single XXSPLTIW or XXSPLTIDP instruction. */
6540 else if (vsx_prefixed_constant (op, mode))
6541 return false;
6542
6543 /* Return XXSPLITB followed by a sign extend operation to convert the
6544 constant to V8HImode or V4SImode. */
6545 else
6546 *num_insns_ptr = 2;
6547
6548 *constant_ptr = (int) value;
6549 return true;
6550 }
6551
6552 const char *
6553 output_vec_const_move (rtx *operands)
6554 {
6555 int shift;
6556 machine_mode mode;
6557 rtx dest, vec;
6558
6559 dest = operands[0];
6560 vec = operands[1];
6561 mode = GET_MODE (dest);
6562
6563 if (TARGET_VSX)
6564 {
6565 bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest));
6566 int xxspltib_value = 256;
6567 int num_insns = -1;
6568
6569 if (zero_constant (vec, mode))
6570 {
6571 if (TARGET_P9_VECTOR)
6572 return "xxspltib %x0,0";
6573
6574 else if (dest_vmx_p)
6575 return "vspltisw %0,0";
6576
6577 else
6578 return "xxlxor %x0,%x0,%x0";
6579 }
6580
6581 if (all_ones_constant (vec, mode))
6582 {
6583 if (TARGET_P9_VECTOR)
6584 return "xxspltib %x0,255";
6585
6586 else if (dest_vmx_p)
6587 return "vspltisw %0,-1";
6588
6589 else if (TARGET_P8_VECTOR)
6590 return "xxlorc %x0,%x0,%x0";
6591
6592 else
6593 gcc_unreachable ();
6594 }
6595
6596 vec_const_128bit_type vsx_const;
6597 if (TARGET_POWER10 && vec_const_128bit_to_bytes (vec, mode, &vsx_const))
6598 {
6599 unsigned imm = constant_generates_lxvkq (&vsx_const);
6600 if (imm)
6601 {
6602 operands[2] = GEN_INT (imm);
6603 return "lxvkq %x0,%2";
6604 }
6605
6606 imm = constant_generates_xxspltiw (&vsx_const);
6607 if (imm)
6608 {
6609 operands[2] = GEN_INT (imm);
6610 return "xxspltiw %x0,%2";
6611 }
6612
6613 imm = constant_generates_xxspltidp (&vsx_const);
6614 if (imm)
6615 {
6616 operands[2] = GEN_INT (imm);
6617 return "xxspltidp %x0,%2";
6618 }
6619 }
6620
6621 if (TARGET_P9_VECTOR
6622 && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
6623 {
6624 if (num_insns == 1)
6625 {
6626 operands[2] = GEN_INT (xxspltib_value & 0xff);
6627 return "xxspltib %x0,%2";
6628 }
6629
6630 return "#";
6631 }
6632 }
6633
6634 if (TARGET_ALTIVEC)
6635 {
6636 rtx splat_vec;
6637
6638 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest)));
6639 if (zero_constant (vec, mode))
6640 return "vspltisw %0,0";
6641
6642 if (all_ones_constant (vec, mode))
6643 return "vspltisw %0,-1";
6644
6645 /* Do we need to construct a value using VSLDOI? */
6646 shift = vspltis_shifted (vec);
6647 if (shift != 0)
6648 return "#";
6649
6650 splat_vec = gen_easy_altivec_constant (vec);
6651 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
6652 operands[1] = XEXP (splat_vec, 0);
6653 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
6654 return "#";
6655
6656 switch (GET_MODE (splat_vec))
6657 {
6658 case E_V4SImode:
6659 return "vspltisw %0,%1";
6660
6661 case E_V8HImode:
6662 return "vspltish %0,%1";
6663
6664 case E_V16QImode:
6665 return "vspltisb %0,%1";
6666
6667 default:
6668 gcc_unreachable ();
6669 }
6670 }
6671
6672 gcc_unreachable ();
6673 }
6674
6675 /* Initialize vector TARGET to VALS. */
6676
6677 void
6678 rs6000_expand_vector_init (rtx target, rtx vals)
6679 {
6680 machine_mode mode = GET_MODE (target);
6681 machine_mode inner_mode = GET_MODE_INNER (mode);
6682 unsigned int n_elts = GET_MODE_NUNITS (mode);
6683 int n_var = 0, one_var = -1;
6684 bool all_same = true, all_const_zero = true;
6685 rtx x, mem;
6686 unsigned int i;
6687
6688 for (i = 0; i < n_elts; ++i)
6689 {
6690 x = XVECEXP (vals, 0, i);
6691 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6692 ++n_var, one_var = i;
6693 else if (x != CONST0_RTX (inner_mode))
6694 all_const_zero = false;
6695
6696 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6697 all_same = false;
6698 }
6699
6700 if (n_var == 0)
6701 {
6702 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6703 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
6704 if ((int_vector_p || TARGET_VSX) && all_const_zero)
6705 {
6706 /* Zero register. */
6707 emit_move_insn (target, CONST0_RTX (mode));
6708 return;
6709 }
6710 else if (int_vector_p && easy_vector_constant (const_vec, mode))
6711 {
6712 /* Splat immediate. */
6713 emit_insn (gen_rtx_SET (target, const_vec));
6714 return;
6715 }
6716 else
6717 {
6718 /* Load from constant pool. */
6719 emit_move_insn (target, const_vec);
6720 return;
6721 }
6722 }
6723
6724 /* Double word values on VSX can use xxpermdi or lxvdsx. */
6725 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
6726 {
6727 rtx op[2];
6728 size_t i;
6729 size_t num_elements = all_same ? 1 : 2;
6730 for (i = 0; i < num_elements; i++)
6731 {
6732 op[i] = XVECEXP (vals, 0, i);
6733 /* Just in case there is a SUBREG with a smaller mode, do a
6734 conversion. */
6735 if (GET_MODE (op[i]) != inner_mode)
6736 {
6737 rtx tmp = gen_reg_rtx (inner_mode);
6738 convert_move (tmp, op[i], 0);
6739 op[i] = tmp;
6740 }
6741 /* Allow load with splat double word. */
6742 else if (MEM_P (op[i]))
6743 {
6744 if (!all_same)
6745 op[i] = force_reg (inner_mode, op[i]);
6746 }
6747 else if (!REG_P (op[i]))
6748 op[i] = force_reg (inner_mode, op[i]);
6749 }
6750
6751 if (all_same)
6752 {
6753 if (mode == V2DFmode)
6754 emit_insn (gen_vsx_splat_v2df (target, op[0]));
6755 else
6756 emit_insn (gen_vsx_splat_v2di (target, op[0]));
6757 }
6758 else
6759 {
6760 if (mode == V2DFmode)
6761 emit_insn (gen_vsx_concat_v2df (target, op[0], op[1]));
6762 else
6763 emit_insn (gen_vsx_concat_v2di (target, op[0], op[1]));
6764 }
6765 return;
6766 }
6767
6768 /* Special case initializing vector int if we are on 64-bit systems with
6769 direct move or we have the ISA 3.0 instructions. */
6770 if (mode == V4SImode && VECTOR_MEM_VSX_P (V4SImode)
6771 && TARGET_DIRECT_MOVE_64BIT)
6772 {
6773 if (all_same)
6774 {
6775 rtx element0 = XVECEXP (vals, 0, 0);
6776 if (MEM_P (element0))
6777 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6778 else
6779 element0 = force_reg (SImode, element0);
6780
6781 if (TARGET_P9_VECTOR)
6782 emit_insn (gen_vsx_splat_v4si (target, element0));
6783 else
6784 {
6785 rtx tmp = gen_reg_rtx (DImode);
6786 emit_insn (gen_zero_extendsidi2 (tmp, element0));
6787 emit_insn (gen_vsx_splat_v4si_di (target, tmp));
6788 }
6789 return;
6790 }
6791 else
6792 {
6793 rtx elements[4];
6794 size_t i;
6795
6796 for (i = 0; i < 4; i++)
6797 elements[i] = force_reg (SImode, XVECEXP (vals, 0, i));
6798
6799 emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1],
6800 elements[2], elements[3]));
6801 return;
6802 }
6803 }
6804
6805 /* With single precision floating point on VSX, know that internally single
6806 precision is actually represented as a double, and either make 2 V2DF
6807 vectors, and convert these vectors to single precision, or do one
6808 conversion, and splat the result to the other elements. */
6809 if (mode == V4SFmode && VECTOR_MEM_VSX_P (V4SFmode))
6810 {
6811 if (all_same)
6812 {
6813 rtx element0 = XVECEXP (vals, 0, 0);
6814
6815 if (TARGET_P9_VECTOR)
6816 {
6817 if (MEM_P (element0))
6818 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6819
6820 emit_insn (gen_vsx_splat_v4sf (target, element0));
6821 }
6822
6823 else
6824 {
6825 rtx freg = gen_reg_rtx (V4SFmode);
6826 rtx sreg = force_reg (SFmode, element0);
6827 rtx cvt = (TARGET_XSCVDPSPN
6828 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
6829 : gen_vsx_xscvdpsp_scalar (freg, sreg));
6830
6831 emit_insn (cvt);
6832 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg,
6833 const0_rtx));
6834 }
6835 }
6836 else
6837 {
6838 if (TARGET_P8_VECTOR && TARGET_POWERPC64)
6839 {
6840 rtx tmp_sf[4];
6841 rtx tmp_si[4];
6842 rtx tmp_di[4];
6843 rtx mrg_di[4];
6844 for (i = 0; i < 4; i++)
6845 {
6846 tmp_si[i] = gen_reg_rtx (SImode);
6847 tmp_di[i] = gen_reg_rtx (DImode);
6848 mrg_di[i] = gen_reg_rtx (DImode);
6849 tmp_sf[i] = force_reg (SFmode, XVECEXP (vals, 0, i));
6850 emit_insn (gen_movsi_from_sf (tmp_si[i], tmp_sf[i]));
6851 emit_insn (gen_zero_extendsidi2 (tmp_di[i], tmp_si[i]));
6852 }
6853
6854 if (!BYTES_BIG_ENDIAN)
6855 {
6856 std::swap (tmp_di[0], tmp_di[1]);
6857 std::swap (tmp_di[2], tmp_di[3]);
6858 }
6859
6860 emit_insn (gen_ashldi3 (mrg_di[0], tmp_di[0], GEN_INT (32)));
6861 emit_insn (gen_iordi3 (mrg_di[1], mrg_di[0], tmp_di[1]));
6862 emit_insn (gen_ashldi3 (mrg_di[2], tmp_di[2], GEN_INT (32)));
6863 emit_insn (gen_iordi3 (mrg_di[3], mrg_di[2], tmp_di[3]));
6864
6865 rtx tmp_v2di = gen_reg_rtx (V2DImode);
6866 emit_insn (gen_vsx_concat_v2di (tmp_v2di, mrg_di[1], mrg_di[3]));
6867 emit_move_insn (target, gen_lowpart (V4SFmode, tmp_v2di));
6868 }
6869 else
6870 {
6871 rtx dbl_even = gen_reg_rtx (V2DFmode);
6872 rtx dbl_odd = gen_reg_rtx (V2DFmode);
6873 rtx flt_even = gen_reg_rtx (V4SFmode);
6874 rtx flt_odd = gen_reg_rtx (V4SFmode);
6875 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
6876 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
6877 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
6878 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
6879
6880 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
6881 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
6882 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6883 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6884 rs6000_expand_extract_even (target, flt_even, flt_odd);
6885 }
6886 }
6887 return;
6888 }
6889
6890 /* Special case initializing vector short/char that are splats if we are on
6891 64-bit systems with direct move. */
6892 if (all_same && TARGET_DIRECT_MOVE_64BIT
6893 && (mode == V16QImode || mode == V8HImode))
6894 {
6895 rtx op0 = XVECEXP (vals, 0, 0);
6896 rtx di_tmp = gen_reg_rtx (DImode);
6897
6898 if (!REG_P (op0))
6899 op0 = force_reg (GET_MODE_INNER (mode), op0);
6900
6901 if (mode == V16QImode)
6902 {
6903 emit_insn (gen_zero_extendqidi2 (di_tmp, op0));
6904 emit_insn (gen_vsx_vspltb_di (target, di_tmp));
6905 return;
6906 }
6907
6908 if (mode == V8HImode)
6909 {
6910 emit_insn (gen_zero_extendhidi2 (di_tmp, op0));
6911 emit_insn (gen_vsx_vsplth_di (target, di_tmp));
6912 return;
6913 }
6914 }
6915
6916 /* Store value to stack temp. Load vector element. Splat. However, splat
6917 of 64-bit items is not supported on Altivec. */
6918 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
6919 {
6920 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6921 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
6922 XVECEXP (vals, 0, 0));
6923 x = gen_rtx_UNSPEC (VOIDmode,
6924 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6925 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6926 gen_rtvec (2,
6927 gen_rtx_SET (target, mem),
6928 x)));
6929 x = gen_rtx_VEC_SELECT (inner_mode, target,
6930 gen_rtx_PARALLEL (VOIDmode,
6931 gen_rtvec (1, const0_rtx)));
6932 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
6933 return;
6934 }
6935
6936 /* One field is non-constant. Load constant then overwrite
6937 varying field. */
6938 if (n_var == 1)
6939 {
6940 rtx copy = copy_rtx (vals);
6941
6942 /* Load constant part of vector, substitute neighboring value for
6943 varying element. */
6944 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
6945 rs6000_expand_vector_init (target, copy);
6946
6947 /* Insert variable. */
6948 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var),
6949 GEN_INT (one_var));
6950 return;
6951 }
6952
6953 if (TARGET_DIRECT_MOVE && (mode == V16QImode || mode == V8HImode))
6954 {
6955 rtx op[16];
6956 /* Force the values into word_mode registers. */
6957 for (i = 0; i < n_elts; i++)
6958 {
6959 rtx tmp = force_reg (inner_mode, XVECEXP (vals, 0, i));
6960 machine_mode tmode = TARGET_POWERPC64 ? DImode : SImode;
6961 op[i] = simplify_gen_subreg (tmode, tmp, inner_mode, 0);
6962 }
6963
6964 /* Take unsigned char big endianness on 64bit as example for below
6965 construction, the input values are: A, B, C, D, ..., O, P. */
6966
6967 if (TARGET_DIRECT_MOVE_128)
6968 {
6969 /* Move to VSX register with vec_concat, each has 2 values.
6970 eg: vr1[0] = { xxxxxxxA, xxxxxxxB };
6971 vr1[1] = { xxxxxxxC, xxxxxxxD };
6972 ...
6973 vr1[7] = { xxxxxxxO, xxxxxxxP }; */
6974 rtx vr1[8];
6975 for (i = 0; i < n_elts / 2; i++)
6976 {
6977 vr1[i] = gen_reg_rtx (V2DImode);
6978 emit_insn (gen_vsx_concat_v2di (vr1[i], op[i * 2],
6979 op[i * 2 + 1]));
6980 }
6981
6982 /* Pack vectors with 2 values into vectors with 4 values.
6983 eg: vr2[0] = { xxxAxxxB, xxxCxxxD };
6984 vr2[1] = { xxxExxxF, xxxGxxxH };
6985 vr2[1] = { xxxIxxxJ, xxxKxxxL };
6986 vr2[3] = { xxxMxxxN, xxxOxxxP }; */
6987 rtx vr2[4];
6988 for (i = 0; i < n_elts / 4; i++)
6989 {
6990 vr2[i] = gen_reg_rtx (V4SImode);
6991 emit_insn (gen_altivec_vpkudum (vr2[i], vr1[i * 2],
6992 vr1[i * 2 + 1]));
6993 }
6994
6995 /* Pack vectors with 4 values into vectors with 8 values.
6996 eg: vr3[0] = { xAxBxCxD, xExFxGxH };
6997 vr3[1] = { xIxJxKxL, xMxNxOxP }; */
6998 rtx vr3[2];
6999 for (i = 0; i < n_elts / 8; i++)
7000 {
7001 vr3[i] = gen_reg_rtx (V8HImode);
7002 emit_insn (gen_altivec_vpkuwum (vr3[i], vr2[i * 2],
7003 vr2[i * 2 + 1]));
7004 }
7005
7006 /* If it's V8HImode, it's done and return it. */
7007 if (mode == V8HImode)
7008 {
7009 emit_insn (gen_rtx_SET (target, vr3[0]));
7010 return;
7011 }
7012
7013 /* Pack vectors with 8 values into 16 values. */
7014 rtx res = gen_reg_rtx (V16QImode);
7015 emit_insn (gen_altivec_vpkuhum (res, vr3[0], vr3[1]));
7016 emit_insn (gen_rtx_SET (target, res));
7017 }
7018 else
7019 {
7020 rtx (*merge_v16qi) (rtx, rtx, rtx) = NULL;
7021 rtx (*merge_v8hi) (rtx, rtx, rtx) = NULL;
7022 rtx (*merge_v4si) (rtx, rtx, rtx) = NULL;
7023 rtx perm_idx;
7024
7025 /* Set up some common gen routines and values. */
7026 if (BYTES_BIG_ENDIAN)
7027 {
7028 if (mode == V16QImode)
7029 {
7030 merge_v16qi = gen_altivec_vmrghb;
7031 merge_v8hi = gen_altivec_vmrglh;
7032 }
7033 else
7034 merge_v8hi = gen_altivec_vmrghh;
7035
7036 merge_v4si = gen_altivec_vmrglw;
7037 perm_idx = GEN_INT (3);
7038 }
7039 else
7040 {
7041 if (mode == V16QImode)
7042 {
7043 merge_v16qi = gen_altivec_vmrglb;
7044 merge_v8hi = gen_altivec_vmrghh;
7045 }
7046 else
7047 merge_v8hi = gen_altivec_vmrglh;
7048
7049 merge_v4si = gen_altivec_vmrghw;
7050 perm_idx = GEN_INT (0);
7051 }
7052
7053 /* Move to VSX register with direct move.
7054 eg: vr_qi[0] = { xxxxxxxA, xxxxxxxx };
7055 vr_qi[1] = { xxxxxxxB, xxxxxxxx };
7056 ...
7057 vr_qi[15] = { xxxxxxxP, xxxxxxxx }; */
7058 rtx vr_qi[16];
7059 for (i = 0; i < n_elts; i++)
7060 {
7061 vr_qi[i] = gen_reg_rtx (V16QImode);
7062 if (TARGET_POWERPC64)
7063 emit_insn (gen_p8_mtvsrd_v16qidi2 (vr_qi[i], op[i]));
7064 else
7065 emit_insn (gen_p8_mtvsrwz_v16qisi2 (vr_qi[i], op[i]));
7066 }
7067
7068 /* Merge/move to vector short.
7069 eg: vr_hi[0] = { xxxxxxxx, xxxxxxAB };
7070 vr_hi[1] = { xxxxxxxx, xxxxxxCD };
7071 ...
7072 vr_hi[7] = { xxxxxxxx, xxxxxxOP }; */
7073 rtx vr_hi[8];
7074 for (i = 0; i < 8; i++)
7075 {
7076 rtx tmp = vr_qi[i];
7077 if (mode == V16QImode)
7078 {
7079 tmp = gen_reg_rtx (V16QImode);
7080 emit_insn (merge_v16qi (tmp, vr_qi[2 * i], vr_qi[2 * i + 1]));
7081 }
7082 vr_hi[i] = gen_reg_rtx (V8HImode);
7083 emit_move_insn (vr_hi[i], gen_lowpart (V8HImode, tmp));
7084 }
7085
7086 /* Merge vector short to vector int.
7087 eg: vr_si[0] = { xxxxxxxx, xxxxABCD };
7088 vr_si[1] = { xxxxxxxx, xxxxEFGH };
7089 ...
7090 vr_si[3] = { xxxxxxxx, xxxxMNOP }; */
7091 rtx vr_si[4];
7092 for (i = 0; i < 4; i++)
7093 {
7094 rtx tmp = gen_reg_rtx (V8HImode);
7095 emit_insn (merge_v8hi (tmp, vr_hi[2 * i], vr_hi[2 * i + 1]));
7096 vr_si[i] = gen_reg_rtx (V4SImode);
7097 emit_move_insn (vr_si[i], gen_lowpart (V4SImode, tmp));
7098 }
7099
7100 /* Merge vector int to vector long.
7101 eg: vr_di[0] = { xxxxxxxx, ABCDEFGH };
7102 vr_di[1] = { xxxxxxxx, IJKLMNOP }; */
7103 rtx vr_di[2];
7104 for (i = 0; i < 2; i++)
7105 {
7106 rtx tmp = gen_reg_rtx (V4SImode);
7107 emit_insn (merge_v4si (tmp, vr_si[2 * i], vr_si[2 * i + 1]));
7108 vr_di[i] = gen_reg_rtx (V2DImode);
7109 emit_move_insn (vr_di[i], gen_lowpart (V2DImode, tmp));
7110 }
7111
7112 rtx res = gen_reg_rtx (V2DImode);
7113 emit_insn (gen_vsx_xxpermdi_v2di (res, vr_di[0], vr_di[1], perm_idx));
7114 emit_insn (gen_rtx_SET (target, gen_lowpart (mode, res)));
7115 }
7116
7117 return;
7118 }
7119
7120 /* Construct the vector in memory one field at a time
7121 and load the whole vector. */
7122 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7123 for (i = 0; i < n_elts; i++)
7124 emit_move_insn (adjust_address_nv (mem, inner_mode,
7125 i * GET_MODE_SIZE (inner_mode)),
7126 XVECEXP (vals, 0, i));
7127 emit_move_insn (target, mem);
7128 }
7129
7130 /* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX
7131 is variable and also counts by vector element size for p9 and above. */
7132
7133 static void
7134 rs6000_expand_vector_set_var_p9 (rtx target, rtx val, rtx idx)
7135 {
7136 machine_mode mode = GET_MODE (target);
7137
7138 gcc_assert (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (idx));
7139
7140 machine_mode inner_mode = GET_MODE (val);
7141
7142 int width = GET_MODE_SIZE (inner_mode);
7143
7144 gcc_assert (width >= 1 && width <= 8);
7145
7146 int shift = exact_log2 (width);
7147
7148 machine_mode idx_mode = GET_MODE (idx);
7149
7150 machine_mode shift_mode;
7151 rtx (*gen_ashl)(rtx, rtx, rtx);
7152 rtx (*gen_lvsl)(rtx, rtx);
7153 rtx (*gen_lvsr)(rtx, rtx);
7154
7155 if (TARGET_POWERPC64)
7156 {
7157 shift_mode = DImode;
7158 gen_ashl = gen_ashldi3;
7159 gen_lvsl = gen_altivec_lvsl_reg_di;
7160 gen_lvsr = gen_altivec_lvsr_reg_di;
7161 }
7162 else
7163 {
7164 shift_mode = SImode;
7165 gen_ashl = gen_ashlsi3;
7166 gen_lvsl = gen_altivec_lvsl_reg_si;
7167 gen_lvsr = gen_altivec_lvsr_reg_si;
7168 }
7169 /* Generate the IDX for permute shift, width is the vector element size.
7170 idx = idx * width. */
7171 rtx tmp = gen_reg_rtx (shift_mode);
7172 idx = convert_modes (shift_mode, idx_mode, idx, 1);
7173
7174 emit_insn (gen_ashl (tmp, idx, GEN_INT (shift)));
7175
7176 /* lvsr v1,0,idx. */
7177 rtx pcvr = gen_reg_rtx (V16QImode);
7178 emit_insn (gen_lvsr (pcvr, tmp));
7179
7180 /* lvsl v2,0,idx. */
7181 rtx pcvl = gen_reg_rtx (V16QImode);
7182 emit_insn (gen_lvsl (pcvl, tmp));
7183
7184 rtx sub_target = simplify_gen_subreg (V16QImode, target, mode, 0);
7185
7186 rtx permr
7187 = gen_altivec_vperm_v8hiv16qi (sub_target, sub_target, sub_target, pcvr);
7188 emit_insn (permr);
7189
7190 rs6000_expand_vector_set (target, val, const0_rtx);
7191
7192 rtx perml
7193 = gen_altivec_vperm_v8hiv16qi (sub_target, sub_target, sub_target, pcvl);
7194 emit_insn (perml);
7195 }
7196
7197 /* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX
7198 is variable and also counts by vector element size for p7 & p8. */
7199
7200 static void
7201 rs6000_expand_vector_set_var_p7 (rtx target, rtx val, rtx idx)
7202 {
7203 machine_mode mode = GET_MODE (target);
7204
7205 gcc_assert (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (idx));
7206
7207 machine_mode inner_mode = GET_MODE (val);
7208 HOST_WIDE_INT mode_mask = GET_MODE_MASK (inner_mode);
7209
7210 int width = GET_MODE_SIZE (inner_mode);
7211 gcc_assert (width >= 1 && width <= 4);
7212
7213 int shift = exact_log2 (width);
7214
7215 machine_mode idx_mode = GET_MODE (idx);
7216
7217 machine_mode shift_mode;
7218 rtx (*gen_ashl)(rtx, rtx, rtx);
7219 rtx (*gen_add)(rtx, rtx, rtx);
7220 rtx (*gen_sub)(rtx, rtx, rtx);
7221 rtx (*gen_lvsl)(rtx, rtx);
7222
7223 if (TARGET_POWERPC64)
7224 {
7225 shift_mode = DImode;
7226 gen_ashl = gen_ashldi3;
7227 gen_add = gen_adddi3;
7228 gen_sub = gen_subdi3;
7229 gen_lvsl = gen_altivec_lvsl_reg_di;
7230 }
7231 else
7232 {
7233 shift_mode = SImode;
7234 gen_ashl = gen_ashlsi3;
7235 gen_add = gen_addsi3;
7236 gen_sub = gen_subsi3;
7237 gen_lvsl = gen_altivec_lvsl_reg_si;
7238 }
7239
7240 /* idx = idx * width. */
7241 rtx tmp = gen_reg_rtx (shift_mode);
7242 idx = convert_modes (shift_mode, idx_mode, idx, 1);
7243
7244 emit_insn (gen_ashl (tmp, idx, GEN_INT (shift)));
7245
7246 /* For LE: idx = idx + 8. */
7247 if (!BYTES_BIG_ENDIAN)
7248 emit_insn (gen_add (tmp, tmp, GEN_INT (8)));
7249 else
7250 emit_insn (gen_sub (tmp, GEN_INT (24 - width), tmp));
7251
7252 /* lxv vs33, mask.
7253 DImode: 0xffffffffffffffff0000000000000000
7254 SImode: 0x00000000ffffffff0000000000000000
7255 HImode: 0x000000000000ffff0000000000000000.
7256 QImode: 0x00000000000000ff0000000000000000. */
7257 rtx mask = gen_reg_rtx (V16QImode);
7258 rtx mask_v2di = gen_reg_rtx (V2DImode);
7259 rtvec v = rtvec_alloc (2);
7260 if (!BYTES_BIG_ENDIAN)
7261 {
7262 RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, 0);
7263 RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, mode_mask);
7264 }
7265 else
7266 {
7267 RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, mode_mask);
7268 RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, 0);
7269 }
7270 emit_insn (gen_vec_initv2didi (mask_v2di, gen_rtx_PARALLEL (V2DImode, v)));
7271 rtx sub_mask = simplify_gen_subreg (V16QImode, mask_v2di, V2DImode, 0);
7272 emit_insn (gen_rtx_SET (mask, sub_mask));
7273
7274 /* mtvsrd[wz] f0,tmp_val. */
7275 rtx tmp_val = gen_reg_rtx (SImode);
7276 if (inner_mode == E_SFmode)
7277 if (TARGET_DIRECT_MOVE_64BIT)
7278 emit_insn (gen_movsi_from_sf (tmp_val, val));
7279 else
7280 {
7281 rtx stack = rs6000_allocate_stack_temp (SFmode, false, true);
7282 emit_insn (gen_movsf_hardfloat (stack, val));
7283 rtx stack2 = copy_rtx (stack);
7284 PUT_MODE (stack2, SImode);
7285 emit_move_insn (tmp_val, stack2);
7286 }
7287 else
7288 tmp_val = force_reg (SImode, val);
7289
7290 rtx val_v16qi = gen_reg_rtx (V16QImode);
7291 rtx val_v2di = gen_reg_rtx (V2DImode);
7292 rtvec vec_val = rtvec_alloc (2);
7293 if (!BYTES_BIG_ENDIAN)
7294 {
7295 RTVEC_ELT (vec_val, 0) = gen_rtx_CONST_INT (DImode, 0);
7296 RTVEC_ELT (vec_val, 1) = tmp_val;
7297 }
7298 else
7299 {
7300 RTVEC_ELT (vec_val, 0) = tmp_val;
7301 RTVEC_ELT (vec_val, 1) = gen_rtx_CONST_INT (DImode, 0);
7302 }
7303 emit_insn (
7304 gen_vec_initv2didi (val_v2di, gen_rtx_PARALLEL (V2DImode, vec_val)));
7305 rtx sub_val = simplify_gen_subreg (V16QImode, val_v2di, V2DImode, 0);
7306 emit_insn (gen_rtx_SET (val_v16qi, sub_val));
7307
7308 /* lvsl 13,0,idx. */
7309 rtx pcv = gen_reg_rtx (V16QImode);
7310 emit_insn (gen_lvsl (pcv, tmp));
7311
7312 /* vperm 1,1,1,13. */
7313 /* vperm 0,0,0,13. */
7314 rtx val_perm = gen_reg_rtx (V16QImode);
7315 rtx mask_perm = gen_reg_rtx (V16QImode);
7316 emit_insn (gen_altivec_vperm_v8hiv16qi (val_perm, val_v16qi, val_v16qi, pcv));
7317 emit_insn (gen_altivec_vperm_v8hiv16qi (mask_perm, mask, mask, pcv));
7318
7319 rtx target_v16qi = simplify_gen_subreg (V16QImode, target, mode, 0);
7320
7321 /* xxsel 34,34,32,33. */
7322 emit_insn (
7323 gen_vector_select_v16qi (target_v16qi, target_v16qi, val_perm, mask_perm));
7324 }
7325
7326 /* Set field ELT_RTX of TARGET to VAL. */
7327
7328 void
7329 rs6000_expand_vector_set (rtx target, rtx val, rtx elt_rtx)
7330 {
7331 machine_mode mode = GET_MODE (target);
7332 machine_mode inner_mode = GET_MODE_INNER (mode);
7333 rtx reg = gen_reg_rtx (mode);
7334 rtx mask, mem, x;
7335 int width = GET_MODE_SIZE (inner_mode);
7336 int i;
7337
7338 val = force_reg (GET_MODE (val), val);
7339
7340 if (VECTOR_MEM_VSX_P (mode))
7341 {
7342 if (!CONST_INT_P (elt_rtx))
7343 {
7344 /* For V2DI/V2DF, could leverage the P9 version to generate xxpermdi
7345 when elt_rtx is variable. */
7346 if ((TARGET_P9_VECTOR && TARGET_POWERPC64) || width == 8)
7347 {
7348 rs6000_expand_vector_set_var_p9 (target, val, elt_rtx);
7349 return;
7350 }
7351 else if (TARGET_VSX)
7352 {
7353 rs6000_expand_vector_set_var_p7 (target, val, elt_rtx);
7354 return;
7355 }
7356 else
7357 gcc_assert (CONST_INT_P (elt_rtx));
7358 }
7359
7360 rtx insn = NULL_RTX;
7361
7362 if (mode == V2DFmode)
7363 insn = gen_vsx_set_v2df (target, target, val, elt_rtx);
7364
7365 else if (mode == V2DImode)
7366 insn = gen_vsx_set_v2di (target, target, val, elt_rtx);
7367
7368 else if (TARGET_P9_VECTOR && TARGET_POWERPC64)
7369 {
7370 if (mode == V4SImode)
7371 insn = gen_vsx_set_v4si_p9 (target, target, val, elt_rtx);
7372 else if (mode == V8HImode)
7373 insn = gen_vsx_set_v8hi_p9 (target, target, val, elt_rtx);
7374 else if (mode == V16QImode)
7375 insn = gen_vsx_set_v16qi_p9 (target, target, val, elt_rtx);
7376 else if (mode == V4SFmode)
7377 insn = gen_vsx_set_v4sf_p9 (target, target, val, elt_rtx);
7378 }
7379
7380 if (insn)
7381 {
7382 emit_insn (insn);
7383 return;
7384 }
7385 }
7386
7387 /* Simplify setting single element vectors like V1TImode. */
7388 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode)
7389 && INTVAL (elt_rtx) == 0)
7390 {
7391 emit_move_insn (target, gen_lowpart (mode, val));
7392 return;
7393 }
7394
7395 /* Load single variable value. */
7396 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
7397 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
7398 x = gen_rtx_UNSPEC (VOIDmode,
7399 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
7400 emit_insn (gen_rtx_PARALLEL (VOIDmode,
7401 gen_rtvec (2,
7402 gen_rtx_SET (reg, mem),
7403 x)));
7404
7405 /* Linear sequence. */
7406 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
7407 for (i = 0; i < 16; ++i)
7408 XVECEXP (mask, 0, i) = GEN_INT (i);
7409
7410 /* Set permute mask to insert element into target. */
7411 for (i = 0; i < width; ++i)
7412 XVECEXP (mask, 0, INTVAL (elt_rtx) * width + i) = GEN_INT (i + 0x10);
7413 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
7414
7415 if (BYTES_BIG_ENDIAN)
7416 x = gen_rtx_UNSPEC (mode,
7417 gen_rtvec (3, target, reg,
7418 force_reg (V16QImode, x)),
7419 UNSPEC_VPERM);
7420 else
7421 {
7422 if (TARGET_P9_VECTOR)
7423 x = gen_rtx_UNSPEC (mode,
7424 gen_rtvec (3, reg, target,
7425 force_reg (V16QImode, x)),
7426 UNSPEC_VPERMR);
7427 else
7428 {
7429 /* Invert selector. We prefer to generate VNAND on P8 so
7430 that future fusion opportunities can kick in, but must
7431 generate VNOR elsewhere. */
7432 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
7433 rtx iorx = (TARGET_P8_VECTOR
7434 ? gen_rtx_IOR (V16QImode, notx, notx)
7435 : gen_rtx_AND (V16QImode, notx, notx));
7436 rtx tmp = gen_reg_rtx (V16QImode);
7437 emit_insn (gen_rtx_SET (tmp, iorx));
7438
7439 /* Permute with operands reversed and adjusted selector. */
7440 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
7441 UNSPEC_VPERM);
7442 }
7443 }
7444
7445 emit_insn (gen_rtx_SET (target, x));
7446 }
7447
7448 /* Extract field ELT from VEC into TARGET. */
7449
7450 void
7451 rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
7452 {
7453 machine_mode mode = GET_MODE (vec);
7454 machine_mode inner_mode = GET_MODE_INNER (mode);
7455 rtx mem;
7456
7457 if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (elt))
7458 {
7459 switch (mode)
7460 {
7461 default:
7462 break;
7463 case E_V1TImode:
7464 emit_move_insn (target, gen_lowpart (TImode, vec));
7465 break;
7466 case E_V2DFmode:
7467 emit_insn (gen_vsx_extract_v2df (target, vec, elt));
7468 return;
7469 case E_V2DImode:
7470 emit_insn (gen_vsx_extract_v2di (target, vec, elt));
7471 return;
7472 case E_V4SFmode:
7473 emit_insn (gen_vsx_extract_v4sf (target, vec, elt));
7474 return;
7475 case E_V16QImode:
7476 if (TARGET_DIRECT_MOVE_64BIT)
7477 {
7478 emit_insn (gen_vsx_extract_v16qi (target, vec, elt));
7479 return;
7480 }
7481 else
7482 break;
7483 case E_V8HImode:
7484 if (TARGET_DIRECT_MOVE_64BIT)
7485 {
7486 emit_insn (gen_vsx_extract_v8hi (target, vec, elt));
7487 return;
7488 }
7489 else
7490 break;
7491 case E_V4SImode:
7492 if (TARGET_DIRECT_MOVE_64BIT)
7493 {
7494 emit_insn (gen_vsx_extract_v4si (target, vec, elt));
7495 return;
7496 }
7497 break;
7498 }
7499 }
7500 else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt)
7501 && TARGET_DIRECT_MOVE_64BIT)
7502 {
7503 if (GET_MODE (elt) != DImode)
7504 {
7505 rtx tmp = gen_reg_rtx (DImode);
7506 convert_move (tmp, elt, 0);
7507 elt = tmp;
7508 }
7509 else if (!REG_P (elt))
7510 elt = force_reg (DImode, elt);
7511
7512 switch (mode)
7513 {
7514 case E_V1TImode:
7515 emit_move_insn (target, gen_lowpart (TImode, vec));
7516 return;
7517
7518 case E_V2DFmode:
7519 emit_insn (gen_vsx_extract_v2df_var (target, vec, elt));
7520 return;
7521
7522 case E_V2DImode:
7523 emit_insn (gen_vsx_extract_v2di_var (target, vec, elt));
7524 return;
7525
7526 case E_V4SFmode:
7527 emit_insn (gen_vsx_extract_v4sf_var (target, vec, elt));
7528 return;
7529
7530 case E_V4SImode:
7531 emit_insn (gen_vsx_extract_v4si_var (target, vec, elt));
7532 return;
7533
7534 case E_V8HImode:
7535 emit_insn (gen_vsx_extract_v8hi_var (target, vec, elt));
7536 return;
7537
7538 case E_V16QImode:
7539 emit_insn (gen_vsx_extract_v16qi_var (target, vec, elt));
7540 return;
7541
7542 default:
7543 gcc_unreachable ();
7544 }
7545 }
7546
7547 /* Allocate mode-sized buffer. */
7548 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7549
7550 emit_move_insn (mem, vec);
7551 if (CONST_INT_P (elt))
7552 {
7553 int modulo_elt = INTVAL (elt) % GET_MODE_NUNITS (mode);
7554
7555 /* Add offset to field within buffer matching vector element. */
7556 mem = adjust_address_nv (mem, inner_mode,
7557 modulo_elt * GET_MODE_SIZE (inner_mode));
7558 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
7559 }
7560 else
7561 {
7562 unsigned int ele_size = GET_MODE_SIZE (inner_mode);
7563 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1);
7564 rtx new_addr = gen_reg_rtx (Pmode);
7565
7566 elt = gen_rtx_AND (Pmode, elt, num_ele_m1);
7567 if (ele_size > 1)
7568 elt = gen_rtx_MULT (Pmode, elt, GEN_INT (ele_size));
7569 new_addr = gen_rtx_PLUS (Pmode, XEXP (mem, 0), elt);
7570 new_addr = change_address (mem, inner_mode, new_addr);
7571 emit_move_insn (target, new_addr);
7572 }
7573 }
7574
7575 /* Return the offset within a memory object (MEM) of a vector type to a given
7576 element within the vector (ELEMENT) with an element size (SCALAR_SIZE). If
7577 the element is constant, we return a constant integer.
7578
7579 Otherwise, we use a base register temporary to calculate the offset after
7580 masking it to fit within the bounds of the vector and scaling it. The
7581 masking is required by the 64-bit ELF version 2 ABI for the vec_extract
7582 built-in function. */
7583
7584 static rtx
7585 get_vector_offset (rtx mem, rtx element, rtx base_tmp, unsigned scalar_size)
7586 {
7587 if (CONST_INT_P (element))
7588 return GEN_INT (INTVAL (element) * scalar_size);
7589
7590 /* All insns should use the 'Q' constraint (address is a single register) if
7591 the element number is not a constant. */
7592 gcc_assert (satisfies_constraint_Q (mem));
7593
7594 /* Mask the element to make sure the element number is between 0 and the
7595 maximum number of elements - 1 so that we don't generate an address
7596 outside the vector. */
7597 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (GET_MODE (mem)) - 1);
7598 rtx and_op = gen_rtx_AND (Pmode, element, num_ele_m1);
7599 emit_insn (gen_rtx_SET (base_tmp, and_op));
7600
7601 /* Shift the element to get the byte offset from the element number. */
7602 int shift = exact_log2 (scalar_size);
7603 gcc_assert (shift >= 0);
7604
7605 if (shift > 0)
7606 {
7607 rtx shift_op = gen_rtx_ASHIFT (Pmode, base_tmp, GEN_INT (shift));
7608 emit_insn (gen_rtx_SET (base_tmp, shift_op));
7609 }
7610
7611 return base_tmp;
7612 }
7613
7614 /* Helper function update PC-relative addresses when we are adjusting a memory
7615 address (ADDR) to a vector to point to a scalar field within the vector with
7616 a constant offset (ELEMENT_OFFSET). If the address is not valid, we can
7617 use the base register temporary (BASE_TMP) to form the address. */
7618
7619 static rtx
7620 adjust_vec_address_pcrel (rtx addr, rtx element_offset, rtx base_tmp)
7621 {
7622 rtx new_addr = NULL;
7623
7624 gcc_assert (CONST_INT_P (element_offset));
7625
7626 if (GET_CODE (addr) == CONST)
7627 addr = XEXP (addr, 0);
7628
7629 if (GET_CODE (addr) == PLUS)
7630 {
7631 rtx op0 = XEXP (addr, 0);
7632 rtx op1 = XEXP (addr, 1);
7633
7634 if (CONST_INT_P (op1))
7635 {
7636 HOST_WIDE_INT offset
7637 = INTVAL (XEXP (addr, 1)) + INTVAL (element_offset);
7638
7639 if (offset == 0)
7640 new_addr = op0;
7641
7642 else
7643 {
7644 rtx plus = gen_rtx_PLUS (Pmode, op0, GEN_INT (offset));
7645 new_addr = gen_rtx_CONST (Pmode, plus);
7646 }
7647 }
7648
7649 else
7650 {
7651 emit_move_insn (base_tmp, addr);
7652 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7653 }
7654 }
7655
7656 else if (SYMBOL_REF_P (addr) || LABEL_REF_P (addr))
7657 {
7658 rtx plus = gen_rtx_PLUS (Pmode, addr, element_offset);
7659 new_addr = gen_rtx_CONST (Pmode, plus);
7660 }
7661
7662 else
7663 gcc_unreachable ();
7664
7665 return new_addr;
7666 }
7667
7668 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
7669 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
7670 temporary (BASE_TMP) to fixup the address. Return the new memory address
7671 that is valid for reads or writes to a given register (SCALAR_REG).
7672
7673 This function is expected to be called after reload is completed when we are
7674 splitting insns. The temporary BASE_TMP might be set multiple times with
7675 this code. */
7676
7677 rtx
7678 rs6000_adjust_vec_address (rtx scalar_reg,
7679 rtx mem,
7680 rtx element,
7681 rtx base_tmp,
7682 machine_mode scalar_mode)
7683 {
7684 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7685 rtx addr = XEXP (mem, 0);
7686 rtx new_addr;
7687
7688 gcc_assert (!reg_mentioned_p (base_tmp, addr));
7689 gcc_assert (!reg_mentioned_p (base_tmp, element));
7690
7691 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
7692 gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
7693
7694 /* Calculate what we need to add to the address to get the element
7695 address. */
7696 rtx element_offset = get_vector_offset (mem, element, base_tmp, scalar_size);
7697
7698 /* Create the new address pointing to the element within the vector. If we
7699 are adding 0, we don't have to change the address. */
7700 if (element_offset == const0_rtx)
7701 new_addr = addr;
7702
7703 /* A simple indirect address can be converted into a reg + offset
7704 address. */
7705 else if (REG_P (addr) || SUBREG_P (addr))
7706 new_addr = gen_rtx_PLUS (Pmode, addr, element_offset);
7707
7708 /* For references to local static variables, fold a constant offset into the
7709 address. */
7710 else if (pcrel_local_address (addr, Pmode) && CONST_INT_P (element_offset))
7711 new_addr = adjust_vec_address_pcrel (addr, element_offset, base_tmp);
7712
7713 /* Optimize D-FORM addresses with constant offset with a constant element, to
7714 include the element offset in the address directly. */
7715 else if (GET_CODE (addr) == PLUS)
7716 {
7717 rtx op0 = XEXP (addr, 0);
7718 rtx op1 = XEXP (addr, 1);
7719
7720 gcc_assert (REG_P (op0) || SUBREG_P (op0));
7721 if (CONST_INT_P (op1) && CONST_INT_P (element_offset))
7722 {
7723 /* op0 should never be r0, because r0+offset is not valid. But it
7724 doesn't hurt to make sure it is not r0. */
7725 gcc_assert (reg_or_subregno (op0) != 0);
7726
7727 /* D-FORM address with constant element number. */
7728 HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset);
7729 rtx offset_rtx = GEN_INT (offset);
7730 new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
7731 }
7732 else
7733 {
7734 /* If we don't have a D-FORM address with a constant element number,
7735 add the two elements in the current address. Then add the offset.
7736
7737 Previously, we tried to add the offset to OP1 and change the
7738 address to an X-FORM format adding OP0 and BASE_TMP, but it became
7739 complicated because we had to verify that op1 was not GPR0 and we
7740 had a constant element offset (due to the way ADDI is defined).
7741 By doing the add of OP0 and OP1 first, and then adding in the
7742 offset, it has the benefit that if D-FORM instructions are
7743 allowed, the offset is part of the memory access to the vector
7744 element. */
7745 emit_insn (gen_rtx_SET (base_tmp, gen_rtx_PLUS (Pmode, op0, op1)));
7746 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7747 }
7748 }
7749
7750 else
7751 {
7752 emit_move_insn (base_tmp, addr);
7753 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7754 }
7755
7756 /* If the address isn't valid, move the address into the temporary base
7757 register. Some reasons it could not be valid include:
7758
7759 The address offset overflowed the 16 or 34 bit offset size;
7760 We need to use a DS-FORM load, and the bottom 2 bits are non-zero;
7761 We need to use a DQ-FORM load, and the bottom 4 bits are non-zero;
7762 Only X_FORM loads can be done, and the address is D_FORM. */
7763
7764 enum insn_form iform
7765 = address_to_insn_form (new_addr, scalar_mode,
7766 reg_to_non_prefixed (scalar_reg, scalar_mode));
7767
7768 if (iform == INSN_FORM_BAD)
7769 {
7770 emit_move_insn (base_tmp, new_addr);
7771 new_addr = base_tmp;
7772 }
7773
7774 return change_address (mem, scalar_mode, new_addr);
7775 }
7776
7777 /* Split a variable vec_extract operation into the component instructions. */
7778
7779 void
7780 rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
7781 rtx tmp_altivec)
7782 {
7783 machine_mode mode = GET_MODE (src);
7784 machine_mode scalar_mode = GET_MODE_INNER (GET_MODE (src));
7785 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7786 int byte_shift = exact_log2 (scalar_size);
7787
7788 gcc_assert (byte_shift >= 0);
7789
7790 /* If we are given a memory address, optimize to load just the element. We
7791 don't have to adjust the vector element number on little endian
7792 systems. */
7793 if (MEM_P (src))
7794 {
7795 emit_move_insn (dest,
7796 rs6000_adjust_vec_address (dest, src, element, tmp_gpr,
7797 scalar_mode));
7798 return;
7799 }
7800
7801 else if (REG_P (src) || SUBREG_P (src))
7802 {
7803 int num_elements = GET_MODE_NUNITS (mode);
7804 int bits_in_element = mode_to_bits (GET_MODE_INNER (mode));
7805 int bit_shift = 7 - exact_log2 (num_elements);
7806 rtx element2;
7807 unsigned int dest_regno = reg_or_subregno (dest);
7808 unsigned int src_regno = reg_or_subregno (src);
7809 unsigned int element_regno = reg_or_subregno (element);
7810
7811 gcc_assert (REG_P (tmp_gpr));
7812
7813 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
7814 a general purpose register. */
7815 if (TARGET_P9_VECTOR
7816 && (mode == V16QImode || mode == V8HImode || mode == V4SImode)
7817 && INT_REGNO_P (dest_regno)
7818 && ALTIVEC_REGNO_P (src_regno)
7819 && INT_REGNO_P (element_regno))
7820 {
7821 rtx dest_si = gen_rtx_REG (SImode, dest_regno);
7822 rtx element_si = gen_rtx_REG (SImode, element_regno);
7823
7824 if (mode == V16QImode)
7825 emit_insn (BYTES_BIG_ENDIAN
7826 ? gen_vextublx (dest_si, element_si, src)
7827 : gen_vextubrx (dest_si, element_si, src));
7828
7829 else if (mode == V8HImode)
7830 {
7831 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7832 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const1_rtx));
7833 emit_insn (BYTES_BIG_ENDIAN
7834 ? gen_vextuhlx (dest_si, tmp_gpr_si, src)
7835 : gen_vextuhrx (dest_si, tmp_gpr_si, src));
7836 }
7837
7838
7839 else
7840 {
7841 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7842 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const2_rtx));
7843 emit_insn (BYTES_BIG_ENDIAN
7844 ? gen_vextuwlx (dest_si, tmp_gpr_si, src)
7845 : gen_vextuwrx (dest_si, tmp_gpr_si, src));
7846 }
7847
7848 return;
7849 }
7850
7851
7852 gcc_assert (REG_P (tmp_altivec));
7853
7854 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
7855 an XOR, otherwise we need to subtract. The shift amount is so VSLO
7856 will shift the element into the upper position (adding 3 to convert a
7857 byte shift into a bit shift). */
7858 if (scalar_size == 8)
7859 {
7860 if (!BYTES_BIG_ENDIAN)
7861 {
7862 emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx));
7863 element2 = tmp_gpr;
7864 }
7865 else
7866 element2 = element;
7867
7868 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
7869 bit. */
7870 emit_insn (gen_rtx_SET (tmp_gpr,
7871 gen_rtx_AND (DImode,
7872 gen_rtx_ASHIFT (DImode,
7873 element2,
7874 GEN_INT (6)),
7875 GEN_INT (64))));
7876 }
7877 else
7878 {
7879 if (!BYTES_BIG_ENDIAN)
7880 {
7881 rtx num_ele_m1 = GEN_INT (num_elements - 1);
7882
7883 emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1));
7884 emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr));
7885 element2 = tmp_gpr;
7886 }
7887 else
7888 element2 = element;
7889
7890 emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift)));
7891 }
7892
7893 /* Get the value into the lower byte of the Altivec register where VSLO
7894 expects it. */
7895 if (TARGET_P9_VECTOR)
7896 emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr));
7897 else if (can_create_pseudo_p ())
7898 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr));
7899 else
7900 {
7901 rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7902 emit_move_insn (tmp_di, tmp_gpr);
7903 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di));
7904 }
7905
7906 /* Do the VSLO to get the value into the final location. */
7907 switch (mode)
7908 {
7909 case E_V2DFmode:
7910 emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec));
7911 return;
7912
7913 case E_V2DImode:
7914 emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec));
7915 return;
7916
7917 case E_V4SFmode:
7918 {
7919 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7920 rtx tmp_altivec_v4sf = gen_rtx_REG (V4SFmode, REGNO (tmp_altivec));
7921 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7922 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7923 tmp_altivec));
7924
7925 emit_insn (gen_vsx_xscvspdp_scalar2 (dest, tmp_altivec_v4sf));
7926 return;
7927 }
7928
7929 case E_V4SImode:
7930 case E_V8HImode:
7931 case E_V16QImode:
7932 {
7933 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7934 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7935 rtx tmp_gpr_di = gen_rtx_REG (DImode, REGNO (dest));
7936 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7937 tmp_altivec));
7938 emit_move_insn (tmp_gpr_di, tmp_altivec_di);
7939 emit_insn (gen_lshrdi3 (tmp_gpr_di, tmp_gpr_di,
7940 GEN_INT (64 - bits_in_element)));
7941 return;
7942 }
7943
7944 default:
7945 gcc_unreachable ();
7946 }
7947
7948 return;
7949 }
7950 else
7951 gcc_unreachable ();
7952 }
7953
7954 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
7955 selects whether the alignment is abi mandated, optional, or
7956 both abi and optional alignment. */
7957
7958 unsigned int
7959 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
7960 {
7961 if (how != align_opt)
7962 {
7963 if (TREE_CODE (type) == VECTOR_TYPE && align < 128)
7964 align = 128;
7965 }
7966
7967 if (how != align_abi)
7968 {
7969 if (TREE_CODE (type) == ARRAY_TYPE
7970 && TYPE_MODE (TREE_TYPE (type)) == QImode)
7971 {
7972 if (align < BITS_PER_WORD)
7973 align = BITS_PER_WORD;
7974 }
7975 }
7976
7977 return align;
7978 }
7979
7980 /* Implement TARGET_SLOW_UNALIGNED_ACCESS. Altivec vector memory
7981 instructions simply ignore the low bits; VSX memory instructions
7982 are aligned to 4 or 8 bytes. */
7983
7984 static bool
7985 rs6000_slow_unaligned_access (machine_mode mode, unsigned int align)
7986 {
7987 return (STRICT_ALIGNMENT
7988 || (!TARGET_EFFICIENT_UNALIGNED_VSX
7989 && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && align < 32)
7990 || ((VECTOR_MODE_P (mode) || VECTOR_ALIGNMENT_P (mode))
7991 && (int) align < VECTOR_ALIGN (mode)))));
7992 }
7993
7994 /* AIX word-aligns FP doubles but doubleword-aligns 64-bit ints. */
7995
7996 unsigned int
7997 rs6000_special_adjust_field_align (tree type, unsigned int computed)
7998 {
7999 if (computed <= 32 || TYPE_PACKED (type))
8000 return computed;
8001
8002 /* Strip initial arrays. */
8003 while (TREE_CODE (type) == ARRAY_TYPE)
8004 type = TREE_TYPE (type);
8005
8006 /* If RECORD or UNION, recursively find the first field. */
8007 while (AGGREGATE_TYPE_P (type))
8008 {
8009 tree field = TYPE_FIELDS (type);
8010
8011 /* Skip all non field decls */
8012 while (field != NULL
8013 && (TREE_CODE (field) != FIELD_DECL
8014 || DECL_FIELD_ABI_IGNORED (field)))
8015 field = DECL_CHAIN (field);
8016
8017 if (! field)
8018 break;
8019
8020 /* A packed field does not contribute any extra alignment. */
8021 if (DECL_PACKED (field))
8022 return computed;
8023
8024 type = TREE_TYPE (field);
8025
8026 /* Strip arrays. */
8027 while (TREE_CODE (type) == ARRAY_TYPE)
8028 type = TREE_TYPE (type);
8029 }
8030
8031 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node
8032 && (TYPE_MODE (type) == DFmode || TYPE_MODE (type) == DCmode))
8033 computed = MIN (computed, 32);
8034
8035 return computed;
8036 }
8037
8038 /* AIX increases natural record alignment to doubleword if the innermost first
8039 field is an FP double while the FP fields remain word aligned.
8040 Only called if TYPE initially is a RECORD or UNION. */
8041
8042 unsigned int
8043 rs6000_special_round_type_align (tree type, unsigned int computed,
8044 unsigned int specified)
8045 {
8046 unsigned int align = MAX (computed, specified);
8047
8048 if (TYPE_PACKED (type) || align >= 64)
8049 return align;
8050
8051 /* If RECORD or UNION, recursively find the first field. */
8052 do
8053 {
8054 tree field = TYPE_FIELDS (type);
8055
8056 /* Skip all non field decls */
8057 while (field != NULL
8058 && (TREE_CODE (field) != FIELD_DECL
8059 || DECL_FIELD_ABI_IGNORED (field)))
8060 field = DECL_CHAIN (field);
8061
8062 if (! field)
8063 break;
8064
8065 /* A packed field does not contribute any extra alignment. */
8066 if (DECL_PACKED (field))
8067 return align;
8068
8069 type = TREE_TYPE (field);
8070
8071 /* Strip arrays. */
8072 while (TREE_CODE (type) == ARRAY_TYPE)
8073 type = TREE_TYPE (type);
8074 } while (AGGREGATE_TYPE_P (type));
8075
8076 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node
8077 && (TYPE_MODE (type) == DFmode || TYPE_MODE (type) == DCmode))
8078 align = MAX (align, 64);
8079
8080 return align;
8081 }
8082
8083 /* Darwin increases record alignment to the natural alignment of
8084 the first field. */
8085
8086 unsigned int
8087 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
8088 unsigned int specified)
8089 {
8090 unsigned int align = MAX (computed, specified);
8091
8092 if (TYPE_PACKED (type))
8093 return align;
8094
8095 /* Find the first field, looking down into aggregates. */
8096 do {
8097 tree field = TYPE_FIELDS (type);
8098 /* Skip all non field decls */
8099 while (field != NULL
8100 && (TREE_CODE (field) != FIELD_DECL
8101 || DECL_FIELD_ABI_IGNORED (field)))
8102 field = DECL_CHAIN (field);
8103 if (! field)
8104 break;
8105 /* A packed field does not contribute any extra alignment. */
8106 if (DECL_PACKED (field))
8107 return align;
8108 type = TREE_TYPE (field);
8109 while (TREE_CODE (type) == ARRAY_TYPE)
8110 type = TREE_TYPE (type);
8111 } while (AGGREGATE_TYPE_P (type));
8112
8113 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
8114 align = MAX (align, TYPE_ALIGN (type));
8115
8116 return align;
8117 }
8118
8119 /* Return 1 for an operand in small memory on V.4/eabi. */
8120
8121 int
8122 small_data_operand (rtx op ATTRIBUTE_UNUSED,
8123 machine_mode mode ATTRIBUTE_UNUSED)
8124 {
8125 #if TARGET_ELF
8126 rtx sym_ref;
8127
8128 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
8129 return 0;
8130
8131 if (DEFAULT_ABI != ABI_V4)
8132 return 0;
8133
8134 if (SYMBOL_REF_P (op))
8135 sym_ref = op;
8136
8137 else if (GET_CODE (op) != CONST
8138 || GET_CODE (XEXP (op, 0)) != PLUS
8139 || !SYMBOL_REF_P (XEXP (XEXP (op, 0), 0))
8140 || !CONST_INT_P (XEXP (XEXP (op, 0), 1)))
8141 return 0;
8142
8143 else
8144 {
8145 rtx sum = XEXP (op, 0);
8146 HOST_WIDE_INT summand;
8147
8148 /* We have to be careful here, because it is the referenced address
8149 that must be 32k from _SDA_BASE_, not just the symbol. */
8150 summand = INTVAL (XEXP (sum, 1));
8151 if (summand < 0 || summand > g_switch_value)
8152 return 0;
8153
8154 sym_ref = XEXP (sum, 0);
8155 }
8156
8157 return SYMBOL_REF_SMALL_P (sym_ref);
8158 #else
8159 return 0;
8160 #endif
8161 }
8162
8163 /* Return true if either operand is a general purpose register. */
8164
8165 bool
8166 gpr_or_gpr_p (rtx op0, rtx op1)
8167 {
8168 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
8169 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
8170 }
8171
8172 /* Return true if this is a move direct operation between GPR registers and
8173 floating point/VSX registers. */
8174
8175 bool
8176 direct_move_p (rtx op0, rtx op1)
8177 {
8178 if (!REG_P (op0) || !REG_P (op1))
8179 return false;
8180
8181 if (!TARGET_DIRECT_MOVE)
8182 return false;
8183
8184 int regno0 = REGNO (op0);
8185 int regno1 = REGNO (op1);
8186 if (!HARD_REGISTER_NUM_P (regno0) || !HARD_REGISTER_NUM_P (regno1))
8187 return false;
8188
8189 if (INT_REGNO_P (regno0) && VSX_REGNO_P (regno1))
8190 return true;
8191
8192 if (VSX_REGNO_P (regno0) && INT_REGNO_P (regno1))
8193 return true;
8194
8195 return false;
8196 }
8197
8198 /* Return true if the ADDR is an acceptable address for a quad memory
8199 operation of mode MODE (either LQ/STQ for general purpose registers, or
8200 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
8201 is intended for LQ/STQ. If it is false, the address is intended for the ISA
8202 3.0 LXV/STXV instruction. */
8203
8204 bool
8205 quad_address_p (rtx addr, machine_mode mode, bool strict)
8206 {
8207 rtx op0, op1;
8208
8209 if (GET_MODE_SIZE (mode) < 16)
8210 return false;
8211
8212 if (legitimate_indirect_address_p (addr, strict))
8213 return true;
8214
8215 if (VECTOR_MODE_P (mode) && !mode_supports_dq_form (mode))
8216 return false;
8217
8218 /* Is this a valid prefixed address? If the bottom four bits of the offset
8219 are non-zero, we could use a prefixed instruction (which does not have the
8220 DQ-form constraint that the traditional instruction had) instead of
8221 forcing the unaligned offset to a GPR. */
8222 if (address_is_prefixed (addr, mode, NON_PREFIXED_DQ))
8223 return true;
8224
8225 if (GET_CODE (addr) != PLUS)
8226 return false;
8227
8228 op0 = XEXP (addr, 0);
8229 if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, strict))
8230 return false;
8231
8232 op1 = XEXP (addr, 1);
8233 if (!CONST_INT_P (op1))
8234 return false;
8235
8236 return quad_address_offset_p (INTVAL (op1));
8237 }
8238
8239 /* Return true if this is a load or store quad operation. This function does
8240 not handle the atomic quad memory instructions. */
8241
8242 bool
8243 quad_load_store_p (rtx op0, rtx op1)
8244 {
8245 bool ret;
8246
8247 if (!TARGET_QUAD_MEMORY)
8248 ret = false;
8249
8250 else if (REG_P (op0) && MEM_P (op1))
8251 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
8252 && quad_memory_operand (op1, GET_MODE (op1))
8253 && !reg_overlap_mentioned_p (op0, op1));
8254
8255 else if (MEM_P (op0) && REG_P (op1))
8256 ret = (quad_memory_operand (op0, GET_MODE (op0))
8257 && quad_int_reg_operand (op1, GET_MODE (op1)));
8258
8259 else
8260 ret = false;
8261
8262 if (TARGET_DEBUG_ADDR)
8263 {
8264 fprintf (stderr, "\n========== quad_load_store, return %s\n",
8265 ret ? "true" : "false");
8266 debug_rtx (gen_rtx_SET (op0, op1));
8267 }
8268
8269 return ret;
8270 }
8271
8272 /* Given an address, return a constant offset term if one exists. */
8273
8274 static rtx
8275 address_offset (rtx op)
8276 {
8277 if (GET_CODE (op) == PRE_INC
8278 || GET_CODE (op) == PRE_DEC)
8279 op = XEXP (op, 0);
8280 else if (GET_CODE (op) == PRE_MODIFY
8281 || GET_CODE (op) == LO_SUM)
8282 op = XEXP (op, 1);
8283
8284 if (GET_CODE (op) == CONST)
8285 op = XEXP (op, 0);
8286
8287 if (GET_CODE (op) == PLUS)
8288 op = XEXP (op, 1);
8289
8290 if (CONST_INT_P (op))
8291 return op;
8292
8293 return NULL_RTX;
8294 }
8295
8296 /* This tests that a lo_sum {constant, symbol, symbol+offset} is valid for
8297 the mode. If we can't find (or don't know) the alignment of the symbol
8298 we assume (optimistically) that it's sufficiently aligned [??? maybe we
8299 should be pessimistic]. Offsets are validated in the same way as for
8300 reg + offset. */
8301 static bool
8302 darwin_rs6000_legitimate_lo_sum_const_p (rtx x, machine_mode mode)
8303 {
8304 /* We should not get here with this. */
8305 gcc_checking_assert (! mode_supports_dq_form (mode));
8306
8307 if (GET_CODE (x) == CONST)
8308 x = XEXP (x, 0);
8309
8310 /* If we are building PIC code, then any symbol must be wrapped in an
8311 UNSPEC_MACHOPIC_OFFSET so that it will get the picbase subtracted. */
8312 bool machopic_offs_p = false;
8313 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_MACHOPIC_OFFSET)
8314 {
8315 x = XVECEXP (x, 0, 0);
8316 machopic_offs_p = true;
8317 }
8318
8319 rtx sym = NULL_RTX;
8320 unsigned HOST_WIDE_INT offset = 0;
8321
8322 if (GET_CODE (x) == PLUS)
8323 {
8324 sym = XEXP (x, 0);
8325 if (! SYMBOL_REF_P (sym))
8326 return false;
8327 if (!CONST_INT_P (XEXP (x, 1)))
8328 return false;
8329 offset = INTVAL (XEXP (x, 1));
8330 }
8331 else if (SYMBOL_REF_P (x))
8332 sym = x;
8333 else if (CONST_INT_P (x))
8334 offset = INTVAL (x);
8335 else if (GET_CODE (x) == LABEL_REF)
8336 offset = 0; // We assume code labels are Pmode aligned
8337 else
8338 return false; // not sure what we have here.
8339
8340 /* If we don't know the alignment of the thing to which the symbol refers,
8341 we assume optimistically it is "enough".
8342 ??? maybe we should be pessimistic instead. */
8343 unsigned align = 0;
8344
8345 if (sym)
8346 {
8347 tree decl = SYMBOL_REF_DECL (sym);
8348 /* As noted above, PIC code cannot use a bare SYMBOL_REF. */
8349 if (TARGET_MACHO && flag_pic && !machopic_offs_p)
8350 return false;
8351 #if TARGET_MACHO
8352 if (MACHO_SYMBOL_INDIRECTION_P (sym))
8353 /* The decl in an indirection symbol is the original one, which might
8354 be less aligned than the indirection. Our indirections are always
8355 pointer-aligned. */
8356 ;
8357 else
8358 #endif
8359 if (decl && DECL_ALIGN (decl))
8360 align = DECL_ALIGN_UNIT (decl);
8361 }
8362
8363 unsigned int extra = 0;
8364 switch (mode)
8365 {
8366 case E_DFmode:
8367 case E_DDmode:
8368 case E_DImode:
8369 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8370 addressing. */
8371 if (VECTOR_MEM_VSX_P (mode))
8372 return false;
8373
8374 if (!TARGET_POWERPC64)
8375 extra = 4;
8376 else if ((offset & 3) || (align & 3))
8377 return false;
8378 break;
8379
8380 case E_TFmode:
8381 case E_IFmode:
8382 case E_KFmode:
8383 case E_TDmode:
8384 case E_TImode:
8385 case E_PTImode:
8386 extra = 8;
8387 if (!TARGET_POWERPC64)
8388 extra = 12;
8389 else if ((offset & 3) || (align & 3))
8390 return false;
8391 break;
8392
8393 default:
8394 break;
8395 }
8396
8397 /* We only care if the access(es) would cause a change to the high part. */
8398 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
8399 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8400 }
8401
8402 /* Return true if the MEM operand is a memory operand suitable for use
8403 with a (full width, possibly multiple) gpr load/store. On
8404 powerpc64 this means the offset must be divisible by 4.
8405 Implements 'Y' constraint.
8406
8407 Accept direct, indexed, offset, lo_sum and tocref. Since this is
8408 a constraint function we know the operand has satisfied a suitable
8409 memory predicate.
8410
8411 Offsetting a lo_sum should not be allowed, except where we know by
8412 alignment that a 32k boundary is not crossed. Note that by
8413 "offsetting" here we mean a further offset to access parts of the
8414 MEM. It's fine to have a lo_sum where the inner address is offset
8415 from a sym, since the same sym+offset will appear in the high part
8416 of the address calculation. */
8417
8418 bool
8419 mem_operand_gpr (rtx op, machine_mode mode)
8420 {
8421 unsigned HOST_WIDE_INT offset;
8422 int extra;
8423 rtx addr = XEXP (op, 0);
8424
8425 /* PR85755: Allow PRE_INC and PRE_DEC addresses. */
8426 if (TARGET_UPDATE
8427 && (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
8428 && mode_supports_pre_incdec_p (mode)
8429 && legitimate_indirect_address_p (XEXP (addr, 0), false))
8430 return true;
8431
8432 /* Allow prefixed instructions if supported. If the bottom two bits of the
8433 offset are non-zero, we could use a prefixed instruction (which does not
8434 have the DS-form constraint that the traditional instruction had) instead
8435 of forcing the unaligned offset to a GPR. */
8436 if (address_is_prefixed (addr, mode, NON_PREFIXED_DS))
8437 return true;
8438
8439 /* We need to look through Mach-O PIC unspecs to determine if a lo_sum is
8440 really OK. Doing this early avoids teaching all the other machinery
8441 about them. */
8442 if (TARGET_MACHO && GET_CODE (addr) == LO_SUM)
8443 return darwin_rs6000_legitimate_lo_sum_const_p (XEXP (addr, 1), mode);
8444
8445 /* Only allow offsettable addresses. See PRs 83969 and 84279. */
8446 if (!rs6000_offsettable_memref_p (op, mode, false))
8447 return false;
8448
8449 op = address_offset (addr);
8450 if (op == NULL_RTX)
8451 return true;
8452
8453 offset = INTVAL (op);
8454 if (TARGET_POWERPC64 && (offset & 3) != 0)
8455 return false;
8456
8457 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8458 if (extra < 0)
8459 extra = 0;
8460
8461 if (GET_CODE (addr) == LO_SUM)
8462 /* For lo_sum addresses, we must allow any offset except one that
8463 causes a wrap, so test only the low 16 bits. */
8464 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
8465
8466 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8467 }
8468
8469 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
8470 enforce an offset divisible by 4 even for 32-bit. */
8471
8472 bool
8473 mem_operand_ds_form (rtx op, machine_mode mode)
8474 {
8475 unsigned HOST_WIDE_INT offset;
8476 int extra;
8477 rtx addr = XEXP (op, 0);
8478
8479 /* Allow prefixed instructions if supported. If the bottom two bits of the
8480 offset are non-zero, we could use a prefixed instruction (which does not
8481 have the DS-form constraint that the traditional instruction had) instead
8482 of forcing the unaligned offset to a GPR. */
8483 if (address_is_prefixed (addr, mode, NON_PREFIXED_DS))
8484 return true;
8485
8486 if (!offsettable_address_p (false, mode, addr))
8487 return false;
8488
8489 op = address_offset (addr);
8490 if (op == NULL_RTX)
8491 return true;
8492
8493 offset = INTVAL (op);
8494 if ((offset & 3) != 0)
8495 return false;
8496
8497 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8498 if (extra < 0)
8499 extra = 0;
8500
8501 if (GET_CODE (addr) == LO_SUM)
8502 /* For lo_sum addresses, we must allow any offset except one that
8503 causes a wrap, so test only the low 16 bits. */
8504 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
8505
8506 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8507 }
8508 \f
8509 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
8510
8511 static bool
8512 reg_offset_addressing_ok_p (machine_mode mode)
8513 {
8514 switch (mode)
8515 {
8516 case E_IFmode:
8517 return true;
8518
8519 case E_TFmode:
8520 if (FLOAT128_IBM_P (TFmode))
8521 return true;
8522
8523 /* If TFmode is IEEE 128-bit, treat it like a vector. */
8524 /* fall through */
8525
8526 case E_V16QImode:
8527 case E_V8HImode:
8528 case E_V4SFmode:
8529 case E_V4SImode:
8530 case E_V2DFmode:
8531 case E_V2DImode:
8532 case E_V1TImode:
8533 case E_TImode:
8534 case E_KFmode:
8535 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
8536 ISA 3.0 vector d-form addressing mode was added. While TImode is not
8537 a vector mode, if we want to use the VSX registers to move it around,
8538 we need to restrict ourselves to reg+reg addressing. Similarly for
8539 IEEE 128-bit floating point that is passed in a single vector
8540 register. */
8541 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
8542 return mode_supports_dq_form (mode);
8543 break;
8544
8545 /* The vector pair/quad types support offset addressing if the
8546 underlying vectors support offset addressing. */
8547 case E_OOmode:
8548 case E_XOmode:
8549 return TARGET_MMA;
8550
8551 case E_SDmode:
8552 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
8553 addressing for the LFIWZX and STFIWX instructions. */
8554 if (TARGET_NO_SDMODE_STACK)
8555 return false;
8556 break;
8557
8558 default:
8559 break;
8560 }
8561
8562 return true;
8563 }
8564
8565 static bool
8566 virtual_stack_registers_memory_p (rtx op)
8567 {
8568 int regnum;
8569
8570 if (REG_P (op))
8571 regnum = REGNO (op);
8572
8573 else if (GET_CODE (op) == PLUS
8574 && REG_P (XEXP (op, 0))
8575 && CONST_INT_P (XEXP (op, 1)))
8576 regnum = REGNO (XEXP (op, 0));
8577
8578 else
8579 return false;
8580
8581 return (regnum >= FIRST_VIRTUAL_REGISTER
8582 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
8583 }
8584
8585 /* Return true if a MODE sized memory accesses to OP plus OFFSET
8586 is known to not straddle a 32k boundary. This function is used
8587 to determine whether -mcmodel=medium code can use TOC pointer
8588 relative addressing for OP. This means the alignment of the TOC
8589 pointer must also be taken into account, and unfortunately that is
8590 only 8 bytes. */
8591
8592 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
8593 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
8594 #endif
8595
8596 static bool
8597 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
8598 machine_mode mode)
8599 {
8600 tree decl;
8601 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
8602
8603 if (!SYMBOL_REF_P (op))
8604 return false;
8605
8606 /* ISA 3.0 vector d-form addressing is restricted, don't allow
8607 SYMBOL_REF. */
8608 if (mode_supports_dq_form (mode))
8609 return false;
8610
8611 dsize = GET_MODE_SIZE (mode);
8612 decl = SYMBOL_REF_DECL (op);
8613 if (!decl)
8614 {
8615 if (dsize == 0)
8616 return false;
8617
8618 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
8619 replacing memory addresses with an anchor plus offset. We
8620 could find the decl by rummaging around in the block->objects
8621 VEC for the given offset but that seems like too much work. */
8622 dalign = BITS_PER_UNIT;
8623 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
8624 && SYMBOL_REF_ANCHOR_P (op)
8625 && SYMBOL_REF_BLOCK (op) != NULL)
8626 {
8627 struct object_block *block = SYMBOL_REF_BLOCK (op);
8628
8629 dalign = block->alignment;
8630 offset += SYMBOL_REF_BLOCK_OFFSET (op);
8631 }
8632 else if (CONSTANT_POOL_ADDRESS_P (op))
8633 {
8634 /* It would be nice to have get_pool_align().. */
8635 machine_mode cmode = get_pool_mode (op);
8636
8637 dalign = GET_MODE_ALIGNMENT (cmode);
8638 }
8639 }
8640 else if (DECL_P (decl))
8641 {
8642 dalign = DECL_ALIGN (decl);
8643
8644 if (dsize == 0)
8645 {
8646 /* Allow BLKmode when the entire object is known to not
8647 cross a 32k boundary. */
8648 if (!DECL_SIZE_UNIT (decl))
8649 return false;
8650
8651 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
8652 return false;
8653
8654 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
8655 if (dsize > 32768)
8656 return false;
8657
8658 dalign /= BITS_PER_UNIT;
8659 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8660 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8661 return dalign >= dsize;
8662 }
8663 }
8664 else
8665 gcc_unreachable ();
8666
8667 /* Find how many bits of the alignment we know for this access. */
8668 dalign /= BITS_PER_UNIT;
8669 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8670 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8671 mask = dalign - 1;
8672 lsb = offset & -offset;
8673 mask &= lsb - 1;
8674 dalign = mask + 1;
8675
8676 return dalign >= dsize;
8677 }
8678
8679 static bool
8680 constant_pool_expr_p (rtx op)
8681 {
8682 rtx base, offset;
8683
8684 split_const (op, &base, &offset);
8685 return (SYMBOL_REF_P (base)
8686 && CONSTANT_POOL_ADDRESS_P (base)
8687 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
8688 }
8689
8690 /* Create a TOC reference for symbol_ref SYMBOL. If LARGETOC_REG is non-null,
8691 use that as the register to put the HIGH value into if register allocation
8692 is already done. */
8693
8694 rtx
8695 create_TOC_reference (rtx symbol, rtx largetoc_reg)
8696 {
8697 rtx tocrel, tocreg, hi;
8698
8699 gcc_assert (TARGET_TOC);
8700
8701 if (TARGET_DEBUG_ADDR)
8702 {
8703 if (SYMBOL_REF_P (symbol))
8704 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
8705 XSTR (symbol, 0));
8706 else
8707 {
8708 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
8709 GET_RTX_NAME (GET_CODE (symbol)));
8710 debug_rtx (symbol);
8711 }
8712 }
8713
8714 if (!can_create_pseudo_p ())
8715 df_set_regs_ever_live (TOC_REGISTER, true);
8716
8717 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
8718 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
8719 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
8720 return tocrel;
8721
8722 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
8723 if (largetoc_reg != NULL)
8724 {
8725 emit_move_insn (largetoc_reg, hi);
8726 hi = largetoc_reg;
8727 }
8728 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
8729 }
8730
8731 /* These are only used to pass through from print_operand/print_operand_address
8732 to rs6000_output_addr_const_extra over the intervening function
8733 output_addr_const which is not target code. */
8734 static const_rtx tocrel_base_oac, tocrel_offset_oac;
8735
8736 /* Return true if OP is a toc pointer relative address (the output
8737 of create_TOC_reference). If STRICT, do not match non-split
8738 -mcmodel=large/medium toc pointer relative addresses. If the pointers
8739 are non-NULL, place base and offset pieces in TOCREL_BASE_RET and
8740 TOCREL_OFFSET_RET respectively. */
8741
8742 bool
8743 toc_relative_expr_p (const_rtx op, bool strict, const_rtx *tocrel_base_ret,
8744 const_rtx *tocrel_offset_ret)
8745 {
8746 if (!TARGET_TOC)
8747 return false;
8748
8749 if (TARGET_CMODEL != CMODEL_SMALL)
8750 {
8751 /* When strict ensure we have everything tidy. */
8752 if (strict
8753 && !(GET_CODE (op) == LO_SUM
8754 && REG_P (XEXP (op, 0))
8755 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict)))
8756 return false;
8757
8758 /* When not strict, allow non-split TOC addresses and also allow
8759 (lo_sum (high ..)) TOC addresses created during reload. */
8760 if (GET_CODE (op) == LO_SUM)
8761 op = XEXP (op, 1);
8762 }
8763
8764 const_rtx tocrel_base = op;
8765 const_rtx tocrel_offset = const0_rtx;
8766
8767 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
8768 {
8769 tocrel_base = XEXP (op, 0);
8770 tocrel_offset = XEXP (op, 1);
8771 }
8772
8773 if (tocrel_base_ret)
8774 *tocrel_base_ret = tocrel_base;
8775 if (tocrel_offset_ret)
8776 *tocrel_offset_ret = tocrel_offset;
8777
8778 return (GET_CODE (tocrel_base) == UNSPEC
8779 && XINT (tocrel_base, 1) == UNSPEC_TOCREL
8780 && REG_P (XVECEXP (tocrel_base, 0, 1))
8781 && REGNO (XVECEXP (tocrel_base, 0, 1)) == TOC_REGISTER);
8782 }
8783
8784 /* Return true if X is a constant pool address, and also for cmodel=medium
8785 if X is a toc-relative address known to be offsettable within MODE. */
8786
8787 bool
8788 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
8789 bool strict)
8790 {
8791 const_rtx tocrel_base, tocrel_offset;
8792 return (toc_relative_expr_p (x, strict, &tocrel_base, &tocrel_offset)
8793 && (TARGET_CMODEL != CMODEL_MEDIUM
8794 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
8795 || mode == QImode
8796 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
8797 INTVAL (tocrel_offset), mode)));
8798 }
8799
8800 static bool
8801 legitimate_small_data_p (machine_mode mode, rtx x)
8802 {
8803 return (DEFAULT_ABI == ABI_V4
8804 && !flag_pic && !TARGET_TOC
8805 && (SYMBOL_REF_P (x) || GET_CODE (x) == CONST)
8806 && small_data_operand (x, mode));
8807 }
8808
8809 bool
8810 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
8811 bool strict, bool worst_case)
8812 {
8813 unsigned HOST_WIDE_INT offset;
8814 unsigned int extra;
8815
8816 if (GET_CODE (x) != PLUS)
8817 return false;
8818 if (!REG_P (XEXP (x, 0)))
8819 return false;
8820 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8821 return false;
8822 if (mode_supports_dq_form (mode))
8823 return quad_address_p (x, mode, strict);
8824 if (!reg_offset_addressing_ok_p (mode))
8825 return virtual_stack_registers_memory_p (x);
8826 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
8827 return true;
8828 if (!CONST_INT_P (XEXP (x, 1)))
8829 return false;
8830
8831 offset = INTVAL (XEXP (x, 1));
8832 extra = 0;
8833 switch (mode)
8834 {
8835 case E_DFmode:
8836 case E_DDmode:
8837 case E_DImode:
8838 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8839 addressing. */
8840 if (VECTOR_MEM_VSX_P (mode))
8841 return false;
8842
8843 if (!worst_case)
8844 break;
8845 if (!TARGET_POWERPC64)
8846 extra = 4;
8847 else if (offset & 3)
8848 return false;
8849 break;
8850
8851 case E_TFmode:
8852 case E_IFmode:
8853 case E_KFmode:
8854 case E_TDmode:
8855 case E_TImode:
8856 case E_PTImode:
8857 extra = 8;
8858 if (!worst_case)
8859 break;
8860 if (!TARGET_POWERPC64)
8861 extra = 12;
8862 else if (offset & 3)
8863 return false;
8864 break;
8865
8866 default:
8867 break;
8868 }
8869
8870 if (TARGET_PREFIXED)
8871 return SIGNED_34BIT_OFFSET_EXTRA_P (offset, extra);
8872 else
8873 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8874 }
8875
8876 bool
8877 legitimate_indexed_address_p (rtx x, int strict)
8878 {
8879 rtx op0, op1;
8880
8881 if (GET_CODE (x) != PLUS)
8882 return false;
8883
8884 op0 = XEXP (x, 0);
8885 op1 = XEXP (x, 1);
8886
8887 return (REG_P (op0) && REG_P (op1)
8888 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
8889 && INT_REG_OK_FOR_INDEX_P (op1, strict))
8890 || (INT_REG_OK_FOR_BASE_P (op1, strict)
8891 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
8892 }
8893
8894 bool
8895 avoiding_indexed_address_p (machine_mode mode)
8896 {
8897 unsigned int msize = GET_MODE_SIZE (mode);
8898
8899 /* Avoid indexed addressing for modes that have non-indexed load/store
8900 instruction forms. On power10, vector pairs have an indexed
8901 form, but vector quads don't. */
8902 if (msize > 16)
8903 return msize != 32;
8904
8905 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
8906 }
8907
8908 bool
8909 legitimate_indirect_address_p (rtx x, int strict)
8910 {
8911 return REG_P (x) && INT_REG_OK_FOR_BASE_P (x, strict);
8912 }
8913
8914 bool
8915 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
8916 {
8917 if (!TARGET_MACHO || !flag_pic
8918 || mode != SImode || !MEM_P (x))
8919 return false;
8920 x = XEXP (x, 0);
8921
8922 if (GET_CODE (x) != LO_SUM)
8923 return false;
8924 if (!REG_P (XEXP (x, 0)))
8925 return false;
8926 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
8927 return false;
8928 x = XEXP (x, 1);
8929
8930 return CONSTANT_P (x);
8931 }
8932
8933 static bool
8934 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
8935 {
8936 if (GET_CODE (x) != LO_SUM)
8937 return false;
8938 if (!REG_P (XEXP (x, 0)))
8939 return false;
8940 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8941 return false;
8942 /* quad word addresses are restricted, and we can't use LO_SUM. */
8943 if (mode_supports_dq_form (mode))
8944 return false;
8945 x = XEXP (x, 1);
8946
8947 if (TARGET_ELF)
8948 {
8949 bool large_toc_ok;
8950
8951 if (DEFAULT_ABI == ABI_V4 && flag_pic)
8952 return false;
8953 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
8954 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
8955 recognizes some LO_SUM addresses as valid although this
8956 function says opposite. In most cases, LRA through different
8957 transformations can generate correct code for address reloads.
8958 It cannot manage only some LO_SUM cases. So we need to add
8959 code here saying that some addresses are still valid. */
8960 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
8961 && small_toc_ref (x, VOIDmode));
8962 if (TARGET_TOC && ! large_toc_ok)
8963 return false;
8964 if (GET_MODE_NUNITS (mode) != 1)
8965 return false;
8966 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
8967 && !(/* ??? Assume floating point reg based on mode? */
8968 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
8969 return false;
8970
8971 return CONSTANT_P (x) || large_toc_ok;
8972 }
8973 else if (TARGET_MACHO)
8974 {
8975 if (GET_MODE_NUNITS (mode) != 1)
8976 return false;
8977 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
8978 && !(/* see above */
8979 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
8980 return false;
8981 #if TARGET_MACHO
8982 if (MACHO_DYNAMIC_NO_PIC_P || !flag_pic)
8983 return CONSTANT_P (x);
8984 #endif
8985 /* Macho-O PIC code from here. */
8986 if (GET_CODE (x) == CONST)
8987 x = XEXP (x, 0);
8988
8989 /* SYMBOL_REFs need to be wrapped in an UNSPEC_MACHOPIC_OFFSET. */
8990 if (SYMBOL_REF_P (x))
8991 return false;
8992
8993 /* So this is OK if the wrapped object is const. */
8994 if (GET_CODE (x) == UNSPEC
8995 && XINT (x, 1) == UNSPEC_MACHOPIC_OFFSET)
8996 return CONSTANT_P (XVECEXP (x, 0, 0));
8997 return CONSTANT_P (x);
8998 }
8999 return false;
9000 }
9001
9002
9003 /* Try machine-dependent ways of modifying an illegitimate address
9004 to be legitimate. If we find one, return the new, valid address.
9005 This is used from only one place: `memory_address' in explow.cc.
9006
9007 OLDX is the address as it was before break_out_memory_refs was
9008 called. In some cases it is useful to look at this to decide what
9009 needs to be done.
9010
9011 It is always safe for this function to do nothing. It exists to
9012 recognize opportunities to optimize the output.
9013
9014 On RS/6000, first check for the sum of a register with a constant
9015 integer that is out of range. If so, generate code to add the
9016 constant with the low-order 16 bits masked to the register and force
9017 this result into another register (this can be done with `cau').
9018 Then generate an address of REG+(CONST&0xffff), allowing for the
9019 possibility of bit 16 being a one.
9020
9021 Then check for the sum of a register and something not constant, try to
9022 load the other things into a register and return the sum. */
9023
9024 static rtx
9025 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
9026 machine_mode mode)
9027 {
9028 unsigned int extra;
9029
9030 if (!reg_offset_addressing_ok_p (mode)
9031 || mode_supports_dq_form (mode))
9032 {
9033 if (virtual_stack_registers_memory_p (x))
9034 return x;
9035
9036 /* In theory we should not be seeing addresses of the form reg+0,
9037 but just in case it is generated, optimize it away. */
9038 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
9039 return force_reg (Pmode, XEXP (x, 0));
9040
9041 /* For TImode with load/store quad, restrict addresses to just a single
9042 pointer, so it works with both GPRs and VSX registers. */
9043 /* Make sure both operands are registers. */
9044 else if (GET_CODE (x) == PLUS
9045 && (mode != TImode || !TARGET_VSX))
9046 return gen_rtx_PLUS (Pmode,
9047 force_reg (Pmode, XEXP (x, 0)),
9048 force_reg (Pmode, XEXP (x, 1)));
9049 else
9050 return force_reg (Pmode, x);
9051 }
9052 if (SYMBOL_REF_P (x) && !TARGET_MACHO)
9053 {
9054 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
9055 if (model != 0)
9056 return rs6000_legitimize_tls_address (x, model);
9057 }
9058
9059 extra = 0;
9060 switch (mode)
9061 {
9062 case E_TFmode:
9063 case E_TDmode:
9064 case E_TImode:
9065 case E_PTImode:
9066 case E_IFmode:
9067 case E_KFmode:
9068 /* As in legitimate_offset_address_p we do not assume
9069 worst-case. The mode here is just a hint as to the registers
9070 used. A TImode is usually in gprs, but may actually be in
9071 fprs. Leave worst-case scenario for reload to handle via
9072 insn constraints. PTImode is only GPRs. */
9073 extra = 8;
9074 break;
9075 default:
9076 break;
9077 }
9078
9079 if (GET_CODE (x) == PLUS
9080 && REG_P (XEXP (x, 0))
9081 && CONST_INT_P (XEXP (x, 1))
9082 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
9083 >= 0x10000 - extra))
9084 {
9085 HOST_WIDE_INT high_int, low_int;
9086 rtx sum;
9087 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
9088 if (low_int >= 0x8000 - extra)
9089 low_int = 0;
9090 high_int = INTVAL (XEXP (x, 1)) - low_int;
9091 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
9092 gen_int_mode (high_int, Pmode)), 0);
9093 return plus_constant (Pmode, sum, low_int);
9094 }
9095 else if (GET_CODE (x) == PLUS
9096 && REG_P (XEXP (x, 0))
9097 && !CONST_INT_P (XEXP (x, 1))
9098 && GET_MODE_NUNITS (mode) == 1
9099 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
9100 || (/* ??? Assume floating point reg based on mode? */
9101 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
9102 && !avoiding_indexed_address_p (mode))
9103 {
9104 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
9105 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
9106 }
9107 else if ((TARGET_ELF
9108 #if TARGET_MACHO
9109 || !MACHO_DYNAMIC_NO_PIC_P
9110 #endif
9111 )
9112 && TARGET_32BIT
9113 && TARGET_NO_TOC_OR_PCREL
9114 && !flag_pic
9115 && !CONST_INT_P (x)
9116 && !CONST_WIDE_INT_P (x)
9117 && !CONST_DOUBLE_P (x)
9118 && CONSTANT_P (x)
9119 && GET_MODE_NUNITS (mode) == 1
9120 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
9121 || (/* ??? Assume floating point reg based on mode? */
9122 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode))))
9123 {
9124 rtx reg = gen_reg_rtx (Pmode);
9125 if (TARGET_ELF)
9126 emit_insn (gen_elf_high (reg, x));
9127 else
9128 emit_insn (gen_macho_high (Pmode, reg, x));
9129 return gen_rtx_LO_SUM (Pmode, reg, x);
9130 }
9131 else if (TARGET_TOC
9132 && SYMBOL_REF_P (x)
9133 && constant_pool_expr_p (x)
9134 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
9135 return create_TOC_reference (x, NULL_RTX);
9136 else
9137 return x;
9138 }
9139
9140 /* Debug version of rs6000_legitimize_address. */
9141 static rtx
9142 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
9143 {
9144 rtx ret;
9145 rtx_insn *insns;
9146
9147 start_sequence ();
9148 ret = rs6000_legitimize_address (x, oldx, mode);
9149 insns = get_insns ();
9150 end_sequence ();
9151
9152 if (ret != x)
9153 {
9154 fprintf (stderr,
9155 "\nrs6000_legitimize_address: mode %s, old code %s, "
9156 "new code %s, modified\n",
9157 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
9158 GET_RTX_NAME (GET_CODE (ret)));
9159
9160 fprintf (stderr, "Original address:\n");
9161 debug_rtx (x);
9162
9163 fprintf (stderr, "oldx:\n");
9164 debug_rtx (oldx);
9165
9166 fprintf (stderr, "New address:\n");
9167 debug_rtx (ret);
9168
9169 if (insns)
9170 {
9171 fprintf (stderr, "Insns added:\n");
9172 debug_rtx_list (insns, 20);
9173 }
9174 }
9175 else
9176 {
9177 fprintf (stderr,
9178 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
9179 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
9180
9181 debug_rtx (x);
9182 }
9183
9184 if (insns)
9185 emit_insn (insns);
9186
9187 return ret;
9188 }
9189
9190 /* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL.
9191 We need to emit DTP-relative relocations. */
9192
9193 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
9194 static void
9195 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
9196 {
9197 switch (size)
9198 {
9199 case 4:
9200 fputs ("\t.long\t", file);
9201 break;
9202 case 8:
9203 fputs (DOUBLE_INT_ASM_OP, file);
9204 break;
9205 default:
9206 gcc_unreachable ();
9207 }
9208 output_addr_const (file, x);
9209 if (TARGET_ELF)
9210 fputs ("@dtprel+0x8000", file);
9211 }
9212
9213 /* Return true if X is a symbol that refers to real (rather than emulated)
9214 TLS. */
9215
9216 static bool
9217 rs6000_real_tls_symbol_ref_p (rtx x)
9218 {
9219 return (SYMBOL_REF_P (x)
9220 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
9221 }
9222
9223 /* In the name of slightly smaller debug output, and to cater to
9224 general assembler lossage, recognize various UNSPEC sequences
9225 and turn them back into a direct symbol reference. */
9226
9227 static rtx
9228 rs6000_delegitimize_address (rtx orig_x)
9229 {
9230 rtx x, y, offset;
9231
9232 /* UNSPEC_FUSION_GPR is created by the peephole2 for power8 fusion. It
9233 encodes loading up the high part of the address of a TOC reference along
9234 with a load of a GPR using the same base register used for the load. We
9235 return the original SYMBOL_REF.
9236
9237 (set (reg:INT1 <reg>
9238 (unspec:INT1 [<combined-address>] UNSPEC_FUSION_GPR)))
9239
9240 UNSPEC_PCREL_OPT_LD_ADDR is used by the power10 PCREL_OPT pass. These
9241 UNSPECs include the external SYMBOL_REF along with the value being loaded.
9242 We return the original SYMBOL_REF.
9243
9244 (parallel [(set (reg:DI <base-reg>)
9245 (unspec:DI [(symbol_ref <symbol>)
9246 (const_int <marker>)]
9247 UNSPEC_PCREL_OPT_LD_ADDR))
9248 (set (reg:DI <load-reg>)
9249 (unspec:DI [(const_int 0)]
9250 UNSPEC_PCREL_OPT_LD_DATA))])
9251
9252 UNSPEC_PCREL_OPT_LD_SAME_REG is an alternative that is used if the
9253 GPR being loaded is the same as the GPR used to hold the external address.
9254
9255 (set (reg:DI <base-reg>)
9256 (unspec:DI [(symbol_ref <symbol>)
9257 (const_int <marker>)]
9258 UNSPEC_PCREL_OPT_LD_SAME_REG))
9259
9260 UNSPEC_PCREL_OPT_ST_ADDR is used by the power10 PCREL_OPT pass. This
9261 UNSPEC include the external SYMBOL_REF along with the value being loaded.
9262 We return the original SYMBOL_REF.
9263
9264 (parallel [(set (reg:DI <base-reg>)
9265 (unspec:DI [(symbol_ref <symbol>)
9266 (const_int <marker>)]
9267 UNSPEC_PCREL_OPT_ST_ADDR))
9268 (use (reg <store-reg>))]) */
9269
9270 if (GET_CODE (orig_x) == UNSPEC)
9271 switch (XINT (orig_x, 1))
9272 {
9273 case UNSPEC_FUSION_GPR:
9274 case UNSPEC_PCREL_OPT_LD_ADDR:
9275 case UNSPEC_PCREL_OPT_LD_SAME_REG:
9276 case UNSPEC_PCREL_OPT_ST_ADDR:
9277 orig_x = XVECEXP (orig_x, 0, 0);
9278 break;
9279
9280 default:
9281 break;
9282 }
9283
9284 orig_x = delegitimize_mem_from_attrs (orig_x);
9285
9286 x = orig_x;
9287 if (MEM_P (x))
9288 x = XEXP (x, 0);
9289
9290 y = x;
9291 if (TARGET_CMODEL != CMODEL_SMALL && GET_CODE (y) == LO_SUM)
9292 y = XEXP (y, 1);
9293
9294 offset = NULL_RTX;
9295 if (GET_CODE (y) == PLUS
9296 && GET_MODE (y) == Pmode
9297 && CONST_INT_P (XEXP (y, 1)))
9298 {
9299 offset = XEXP (y, 1);
9300 y = XEXP (y, 0);
9301 }
9302
9303 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_TOCREL)
9304 {
9305 y = XVECEXP (y, 0, 0);
9306
9307 #ifdef HAVE_AS_TLS
9308 /* Do not associate thread-local symbols with the original
9309 constant pool symbol. */
9310 if (TARGET_XCOFF
9311 && SYMBOL_REF_P (y)
9312 && CONSTANT_POOL_ADDRESS_P (y)
9313 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
9314 return orig_x;
9315 #endif
9316
9317 if (offset != NULL_RTX)
9318 y = gen_rtx_PLUS (Pmode, y, offset);
9319 if (!MEM_P (orig_x))
9320 return y;
9321 else
9322 return replace_equiv_address_nv (orig_x, y);
9323 }
9324
9325 if (TARGET_MACHO
9326 && GET_CODE (orig_x) == LO_SUM
9327 && GET_CODE (XEXP (orig_x, 1)) == CONST)
9328 {
9329 y = XEXP (XEXP (orig_x, 1), 0);
9330 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
9331 return XVECEXP (y, 0, 0);
9332 }
9333
9334 return orig_x;
9335 }
9336
9337 /* Return true if X shouldn't be emitted into the debug info.
9338 The linker doesn't like .toc section references from
9339 .debug_* sections, so reject .toc section symbols. */
9340
9341 static bool
9342 rs6000_const_not_ok_for_debug_p (rtx x)
9343 {
9344 if (GET_CODE (x) == UNSPEC)
9345 return true;
9346 if (SYMBOL_REF_P (x)
9347 && CONSTANT_POOL_ADDRESS_P (x))
9348 {
9349 rtx c = get_pool_constant (x);
9350 machine_mode cmode = get_pool_mode (x);
9351 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
9352 return true;
9353 }
9354
9355 return false;
9356 }
9357
9358 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
9359
9360 static bool
9361 rs6000_legitimate_combined_insn (rtx_insn *insn)
9362 {
9363 int icode = INSN_CODE (insn);
9364
9365 /* Reject creating doloop insns. Combine should not be allowed
9366 to create these for a number of reasons:
9367 1) In a nested loop, if combine creates one of these in an
9368 outer loop and the register allocator happens to allocate ctr
9369 to the outer loop insn, then the inner loop can't use ctr.
9370 Inner loops ought to be more highly optimized.
9371 2) Combine often wants to create one of these from what was
9372 originally a three insn sequence, first combining the three
9373 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not
9374 allocated ctr, the splitter takes use back to the three insn
9375 sequence. It's better to stop combine at the two insn
9376 sequence.
9377 3) Faced with not being able to allocate ctr for ctrsi/crtdi
9378 insns, the register allocator sometimes uses floating point
9379 or vector registers for the pseudo. Since ctrsi/ctrdi is a
9380 jump insn and output reloads are not implemented for jumps,
9381 the ctrsi/ctrdi splitters need to handle all possible cases.
9382 That's a pain, and it gets to be seriously difficult when a
9383 splitter that runs after reload needs memory to transfer from
9384 a gpr to fpr. See PR70098 and PR71763 which are not fixed
9385 for the difficult case. It's better to not create problems
9386 in the first place. */
9387 if (icode != CODE_FOR_nothing
9388 && (icode == CODE_FOR_bdz_si
9389 || icode == CODE_FOR_bdz_di
9390 || icode == CODE_FOR_bdnz_si
9391 || icode == CODE_FOR_bdnz_di
9392 || icode == CODE_FOR_bdztf_si
9393 || icode == CODE_FOR_bdztf_di
9394 || icode == CODE_FOR_bdnztf_si
9395 || icode == CODE_FOR_bdnztf_di))
9396 return false;
9397
9398 return true;
9399 }
9400
9401 /* Construct the SYMBOL_REF for the tls_get_addr function. */
9402
9403 static GTY(()) rtx rs6000_tls_symbol;
9404 static rtx
9405 rs6000_tls_get_addr (void)
9406 {
9407 if (!rs6000_tls_symbol)
9408 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
9409
9410 return rs6000_tls_symbol;
9411 }
9412
9413 /* Construct the SYMBOL_REF for TLS GOT references. */
9414
9415 static GTY(()) rtx rs6000_got_symbol;
9416 rtx
9417 rs6000_got_sym (void)
9418 {
9419 if (!rs6000_got_symbol)
9420 {
9421 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
9422 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
9423 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
9424 }
9425
9426 return rs6000_got_symbol;
9427 }
9428
9429 /* AIX Thread-Local Address support. */
9430
9431 static rtx
9432 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
9433 {
9434 rtx sym, mem, tocref, tlsreg, tmpreg, dest;
9435 const char *name;
9436 char *tlsname;
9437
9438 /* Place addr into TOC constant pool. */
9439 sym = force_const_mem (GET_MODE (addr), addr);
9440
9441 /* Output the TOC entry and create the MEM referencing the value. */
9442 if (constant_pool_expr_p (XEXP (sym, 0))
9443 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
9444 {
9445 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
9446 mem = gen_const_mem (Pmode, tocref);
9447 set_mem_alias_set (mem, get_TOC_alias_set ());
9448 }
9449 else
9450 return sym;
9451
9452 /* Use global-dynamic for local-dynamic. */
9453 if (model == TLS_MODEL_GLOBAL_DYNAMIC
9454 || model == TLS_MODEL_LOCAL_DYNAMIC)
9455 {
9456 /* Create new TOC reference for @m symbol. */
9457 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
9458 tlsname = XALLOCAVEC (char, strlen (name) + 1);
9459 strcpy (tlsname, "*LCM");
9460 strcat (tlsname, name + 3);
9461 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
9462 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
9463 tocref = create_TOC_reference (modaddr, NULL_RTX);
9464 rtx modmem = gen_const_mem (Pmode, tocref);
9465 set_mem_alias_set (modmem, get_TOC_alias_set ());
9466
9467 rtx modreg = gen_reg_rtx (Pmode);
9468 emit_insn (gen_rtx_SET (modreg, modmem));
9469
9470 tmpreg = gen_reg_rtx (Pmode);
9471 emit_insn (gen_rtx_SET (tmpreg, mem));
9472
9473 dest = gen_reg_rtx (Pmode);
9474 if (TARGET_32BIT)
9475 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
9476 else
9477 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
9478 return dest;
9479 }
9480 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
9481 else if (TARGET_32BIT)
9482 {
9483 tlsreg = gen_reg_rtx (SImode);
9484 emit_insn (gen_tls_get_tpointer (tlsreg));
9485 }
9486 else
9487 {
9488 tlsreg = gen_rtx_REG (DImode, 13);
9489 xcoff_tls_exec_model_detected = true;
9490 }
9491
9492 /* Load the TOC value into temporary register. */
9493 tmpreg = gen_reg_rtx (Pmode);
9494 emit_insn (gen_rtx_SET (tmpreg, mem));
9495 set_unique_reg_note (get_last_insn (), REG_EQUAL,
9496 gen_rtx_MINUS (Pmode, addr, tlsreg));
9497
9498 /* Add TOC symbol value to TLS pointer. */
9499 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
9500
9501 return dest;
9502 }
9503
9504 /* Passes the tls arg value for global dynamic and local dynamic
9505 emit_library_call_value in rs6000_legitimize_tls_address to
9506 rs6000_call_aix and rs6000_call_sysv. This is used to emit the
9507 marker relocs put on __tls_get_addr calls. */
9508 static rtx global_tlsarg;
9509
9510 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
9511 this (thread-local) address. */
9512
9513 static rtx
9514 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
9515 {
9516 rtx dest, insn;
9517
9518 if (TARGET_XCOFF)
9519 return rs6000_legitimize_tls_address_aix (addr, model);
9520
9521 dest = gen_reg_rtx (Pmode);
9522 if (model == TLS_MODEL_LOCAL_EXEC
9523 && (rs6000_tls_size == 16 || rs6000_pcrel_p ()))
9524 {
9525 rtx tlsreg;
9526
9527 if (TARGET_64BIT)
9528 {
9529 tlsreg = gen_rtx_REG (Pmode, 13);
9530 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
9531 }
9532 else
9533 {
9534 tlsreg = gen_rtx_REG (Pmode, 2);
9535 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
9536 }
9537 emit_insn (insn);
9538 }
9539 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
9540 {
9541 rtx tlsreg, tmp;
9542
9543 tmp = gen_reg_rtx (Pmode);
9544 if (TARGET_64BIT)
9545 {
9546 tlsreg = gen_rtx_REG (Pmode, 13);
9547 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
9548 }
9549 else
9550 {
9551 tlsreg = gen_rtx_REG (Pmode, 2);
9552 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
9553 }
9554 emit_insn (insn);
9555 if (TARGET_64BIT)
9556 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
9557 else
9558 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
9559 emit_insn (insn);
9560 }
9561 else
9562 {
9563 rtx got, tga, tmp1, tmp2;
9564
9565 /* We currently use relocations like @got@tlsgd for tls, which
9566 means the linker will handle allocation of tls entries, placing
9567 them in the .got section. So use a pointer to the .got section,
9568 not one to secondary TOC sections used by 64-bit -mminimal-toc,
9569 or to secondary GOT sections used by 32-bit -fPIC. */
9570 if (rs6000_pcrel_p ())
9571 got = const0_rtx;
9572 else if (TARGET_64BIT)
9573 got = gen_rtx_REG (Pmode, 2);
9574 else
9575 {
9576 if (flag_pic == 1)
9577 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
9578 else
9579 {
9580 rtx gsym = rs6000_got_sym ();
9581 got = gen_reg_rtx (Pmode);
9582 if (flag_pic == 0)
9583 rs6000_emit_move (got, gsym, Pmode);
9584 else
9585 {
9586 rtx mem, lab;
9587
9588 tmp1 = gen_reg_rtx (Pmode);
9589 tmp2 = gen_reg_rtx (Pmode);
9590 mem = gen_const_mem (Pmode, tmp1);
9591 lab = gen_label_rtx ();
9592 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
9593 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
9594 if (TARGET_LINK_STACK)
9595 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
9596 emit_move_insn (tmp2, mem);
9597 rtx_insn *last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
9598 set_unique_reg_note (last, REG_EQUAL, gsym);
9599 }
9600 }
9601 }
9602
9603 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
9604 {
9605 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addr, got),
9606 UNSPEC_TLSGD);
9607 tga = rs6000_tls_get_addr ();
9608 rtx argreg = gen_rtx_REG (Pmode, 3);
9609 emit_insn (gen_rtx_SET (argreg, arg));
9610 global_tlsarg = arg;
9611 emit_library_call_value (tga, dest, LCT_CONST, Pmode, argreg, Pmode);
9612 global_tlsarg = NULL_RTX;
9613
9614 /* Make a note so that the result of this call can be CSEd. */
9615 rtvec vec = gen_rtvec (1, copy_rtx (arg));
9616 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
9617 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
9618 }
9619 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
9620 {
9621 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got), UNSPEC_TLSLD);
9622 tga = rs6000_tls_get_addr ();
9623 tmp1 = gen_reg_rtx (Pmode);
9624 rtx argreg = gen_rtx_REG (Pmode, 3);
9625 emit_insn (gen_rtx_SET (argreg, arg));
9626 global_tlsarg = arg;
9627 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode, argreg, Pmode);
9628 global_tlsarg = NULL_RTX;
9629
9630 /* Make a note so that the result of this call can be CSEd. */
9631 rtvec vec = gen_rtvec (1, copy_rtx (arg));
9632 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
9633 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
9634
9635 if (rs6000_tls_size == 16 || rs6000_pcrel_p ())
9636 {
9637 if (TARGET_64BIT)
9638 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
9639 else
9640 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
9641 }
9642 else if (rs6000_tls_size == 32)
9643 {
9644 tmp2 = gen_reg_rtx (Pmode);
9645 if (TARGET_64BIT)
9646 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
9647 else
9648 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
9649 emit_insn (insn);
9650 if (TARGET_64BIT)
9651 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
9652 else
9653 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
9654 }
9655 else
9656 {
9657 tmp2 = gen_reg_rtx (Pmode);
9658 if (TARGET_64BIT)
9659 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
9660 else
9661 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
9662 emit_insn (insn);
9663 insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
9664 }
9665 emit_insn (insn);
9666 }
9667 else
9668 {
9669 /* IE, or 64-bit offset LE. */
9670 tmp2 = gen_reg_rtx (Pmode);
9671 if (TARGET_64BIT)
9672 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
9673 else
9674 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
9675 emit_insn (insn);
9676 if (rs6000_pcrel_p ())
9677 {
9678 if (TARGET_64BIT)
9679 insn = gen_tls_tls_pcrel_64 (dest, tmp2, addr);
9680 else
9681 insn = gen_tls_tls_pcrel_32 (dest, tmp2, addr);
9682 }
9683 else if (TARGET_64BIT)
9684 insn = gen_tls_tls_64 (dest, tmp2, addr);
9685 else
9686 insn = gen_tls_tls_32 (dest, tmp2, addr);
9687 emit_insn (insn);
9688 }
9689 }
9690
9691 return dest;
9692 }
9693
9694 /* Only create the global variable for the stack protect guard if we are using
9695 the global flavor of that guard. */
9696 static tree
9697 rs6000_init_stack_protect_guard (void)
9698 {
9699 if (rs6000_stack_protector_guard == SSP_GLOBAL)
9700 return default_stack_protect_guard ();
9701
9702 return NULL_TREE;
9703 }
9704
9705 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9706
9707 static bool
9708 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9709 {
9710 if (GET_CODE (x) == HIGH
9711 && GET_CODE (XEXP (x, 0)) == UNSPEC)
9712 return true;
9713
9714 /* A TLS symbol in the TOC cannot contain a sum. */
9715 if (GET_CODE (x) == CONST
9716 && GET_CODE (XEXP (x, 0)) == PLUS
9717 && SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
9718 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
9719 return true;
9720
9721 /* Allow AIX TOC TLS symbols in the constant pool,
9722 but not ELF TLS symbols. */
9723 return TARGET_ELF && tls_referenced_p (x);
9724 }
9725
9726 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
9727 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
9728 can be addressed relative to the toc pointer. */
9729
9730 static bool
9731 use_toc_relative_ref (rtx sym, machine_mode mode)
9732 {
9733 return ((constant_pool_expr_p (sym)
9734 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
9735 get_pool_mode (sym)))
9736 || (TARGET_CMODEL == CMODEL_MEDIUM
9737 && SYMBOL_REF_LOCAL_P (sym)
9738 && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
9739 }
9740
9741 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
9742 that is a valid memory address for an instruction.
9743 The MODE argument is the machine mode for the MEM expression
9744 that wants to use this address.
9745
9746 On the RS/6000, there are four valid address: a SYMBOL_REF that
9747 refers to a constant pool entry of an address (or the sum of it
9748 plus a constant), a short (16-bit signed) constant plus a register,
9749 the sum of two registers, or a register indirect, possibly with an
9750 auto-increment. For DFmode, DDmode and DImode with a constant plus
9751 register, we must ensure that both words are addressable or PowerPC64
9752 with offset word aligned.
9753
9754 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
9755 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
9756 because adjacent memory cells are accessed by adding word-sized offsets
9757 during assembly output. */
9758 static bool
9759 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
9760 {
9761 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
9762 bool quad_offset_p = mode_supports_dq_form (mode);
9763
9764 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
9765 return 0;
9766
9767 /* Handle unaligned altivec lvx/stvx type addresses. */
9768 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
9769 && GET_CODE (x) == AND
9770 && CONST_INT_P (XEXP (x, 1))
9771 && INTVAL (XEXP (x, 1)) == -16)
9772 {
9773 x = XEXP (x, 0);
9774 return (legitimate_indirect_address_p (x, reg_ok_strict)
9775 || legitimate_indexed_address_p (x, reg_ok_strict)
9776 || virtual_stack_registers_memory_p (x));
9777 }
9778
9779 if (legitimate_indirect_address_p (x, reg_ok_strict))
9780 return 1;
9781 if (TARGET_UPDATE
9782 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
9783 && mode_supports_pre_incdec_p (mode)
9784 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
9785 return 1;
9786
9787 /* Handle prefixed addresses (PC-relative or 34-bit offset). */
9788 if (address_is_prefixed (x, mode, NON_PREFIXED_DEFAULT))
9789 return 1;
9790
9791 /* Handle restricted vector d-form offsets in ISA 3.0. */
9792 if (quad_offset_p)
9793 {
9794 if (quad_address_p (x, mode, reg_ok_strict))
9795 return 1;
9796 }
9797 else if (virtual_stack_registers_memory_p (x))
9798 return 1;
9799
9800 else if (reg_offset_p)
9801 {
9802 if (legitimate_small_data_p (mode, x))
9803 return 1;
9804 if (legitimate_constant_pool_address_p (x, mode,
9805 reg_ok_strict || lra_in_progress))
9806 return 1;
9807 }
9808
9809 /* For TImode, if we have TImode in VSX registers, only allow register
9810 indirect addresses. This will allow the values to go in either GPRs
9811 or VSX registers without reloading. The vector types would tend to
9812 go into VSX registers, so we allow REG+REG, while TImode seems
9813 somewhat split, in that some uses are GPR based, and some VSX based. */
9814 /* FIXME: We could loosen this by changing the following to
9815 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX)
9816 but currently we cannot allow REG+REG addressing for TImode. See
9817 PR72827 for complete details on how this ends up hoodwinking DSE. */
9818 if (mode == TImode && TARGET_VSX)
9819 return 0;
9820 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
9821 if (! reg_ok_strict
9822 && reg_offset_p
9823 && GET_CODE (x) == PLUS
9824 && REG_P (XEXP (x, 0))
9825 && (XEXP (x, 0) == virtual_stack_vars_rtx
9826 || XEXP (x, 0) == arg_pointer_rtx)
9827 && CONST_INT_P (XEXP (x, 1)))
9828 return 1;
9829 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
9830 return 1;
9831 if (!FLOAT128_2REG_P (mode)
9832 && (TARGET_HARD_FLOAT
9833 || TARGET_POWERPC64
9834 || (mode != DFmode && mode != DDmode))
9835 && (TARGET_POWERPC64 || mode != DImode)
9836 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
9837 && mode != PTImode
9838 && !avoiding_indexed_address_p (mode)
9839 && legitimate_indexed_address_p (x, reg_ok_strict))
9840 return 1;
9841 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
9842 && mode_supports_pre_modify_p (mode)
9843 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
9844 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
9845 reg_ok_strict, false)
9846 || (!avoiding_indexed_address_p (mode)
9847 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
9848 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
9849 {
9850 /* There is no prefixed version of the load/store with update. */
9851 rtx addr = XEXP (x, 1);
9852 return !address_is_prefixed (addr, mode, NON_PREFIXED_DEFAULT);
9853 }
9854 if (reg_offset_p && !quad_offset_p
9855 && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
9856 return 1;
9857 return 0;
9858 }
9859
9860 /* Debug version of rs6000_legitimate_address_p. */
9861 static bool
9862 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
9863 bool reg_ok_strict)
9864 {
9865 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
9866 fprintf (stderr,
9867 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
9868 "strict = %d, reload = %s, code = %s\n",
9869 ret ? "true" : "false",
9870 GET_MODE_NAME (mode),
9871 reg_ok_strict,
9872 (reload_completed ? "after" : "before"),
9873 GET_RTX_NAME (GET_CODE (x)));
9874 debug_rtx (x);
9875
9876 return ret;
9877 }
9878
9879 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
9880
9881 static bool
9882 rs6000_mode_dependent_address_p (const_rtx addr,
9883 addr_space_t as ATTRIBUTE_UNUSED)
9884 {
9885 return rs6000_mode_dependent_address_ptr (addr);
9886 }
9887
9888 /* Go to LABEL if ADDR (a legitimate address expression)
9889 has an effect that depends on the machine mode it is used for.
9890
9891 On the RS/6000 this is true of all integral offsets (since AltiVec
9892 and VSX modes don't allow them) or is a pre-increment or decrement.
9893
9894 ??? Except that due to conceptual problems in offsettable_address_p
9895 we can't really report the problems of integral offsets. So leave
9896 this assuming that the adjustable offset must be valid for the
9897 sub-words of a TFmode operand, which is what we had before. */
9898
9899 static bool
9900 rs6000_mode_dependent_address (const_rtx addr)
9901 {
9902 switch (GET_CODE (addr))
9903 {
9904 case PLUS:
9905 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
9906 is considered a legitimate address before reload, so there
9907 are no offset restrictions in that case. Note that this
9908 condition is safe in strict mode because any address involving
9909 virtual_stack_vars_rtx or arg_pointer_rtx would already have
9910 been rejected as illegitimate. */
9911 if (XEXP (addr, 0) != virtual_stack_vars_rtx
9912 && XEXP (addr, 0) != arg_pointer_rtx
9913 && CONST_INT_P (XEXP (addr, 1)))
9914 {
9915 HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
9916 HOST_WIDE_INT extra = TARGET_POWERPC64 ? 8 : 12;
9917 if (TARGET_PREFIXED)
9918 return !SIGNED_34BIT_OFFSET_EXTRA_P (val, extra);
9919 else
9920 return !SIGNED_16BIT_OFFSET_EXTRA_P (val, extra);
9921 }
9922 break;
9923
9924 case LO_SUM:
9925 /* Anything in the constant pool is sufficiently aligned that
9926 all bytes have the same high part address. */
9927 return !legitimate_constant_pool_address_p (addr, QImode, false);
9928
9929 /* Auto-increment cases are now treated generically in recog.cc. */
9930 case PRE_MODIFY:
9931 return TARGET_UPDATE;
9932
9933 /* AND is only allowed in Altivec loads. */
9934 case AND:
9935 return true;
9936
9937 default:
9938 break;
9939 }
9940
9941 return false;
9942 }
9943
9944 /* Debug version of rs6000_mode_dependent_address. */
9945 static bool
9946 rs6000_debug_mode_dependent_address (const_rtx addr)
9947 {
9948 bool ret = rs6000_mode_dependent_address (addr);
9949
9950 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
9951 ret ? "true" : "false");
9952 debug_rtx (addr);
9953
9954 return ret;
9955 }
9956
9957 /* Implement FIND_BASE_TERM. */
9958
9959 rtx
9960 rs6000_find_base_term (rtx op)
9961 {
9962 rtx base;
9963
9964 base = op;
9965 if (GET_CODE (base) == CONST)
9966 base = XEXP (base, 0);
9967 if (GET_CODE (base) == PLUS)
9968 base = XEXP (base, 0);
9969 if (GET_CODE (base) == UNSPEC)
9970 switch (XINT (base, 1))
9971 {
9972 case UNSPEC_TOCREL:
9973 case UNSPEC_MACHOPIC_OFFSET:
9974 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
9975 for aliasing purposes. */
9976 return XVECEXP (base, 0, 0);
9977 }
9978
9979 return op;
9980 }
9981
9982 /* More elaborate version of recog's offsettable_memref_p predicate
9983 that works around the ??? note of rs6000_mode_dependent_address.
9984 In particular it accepts
9985
9986 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
9987
9988 in 32-bit mode, that the recog predicate rejects. */
9989
9990 static bool
9991 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode, bool strict)
9992 {
9993 bool worst_case;
9994
9995 if (!MEM_P (op))
9996 return false;
9997
9998 /* First mimic offsettable_memref_p. */
9999 if (offsettable_address_p (strict, GET_MODE (op), XEXP (op, 0)))
10000 return true;
10001
10002 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
10003 the latter predicate knows nothing about the mode of the memory
10004 reference and, therefore, assumes that it is the largest supported
10005 mode (TFmode). As a consequence, legitimate offsettable memory
10006 references are rejected. rs6000_legitimate_offset_address_p contains
10007 the correct logic for the PLUS case of rs6000_mode_dependent_address,
10008 at least with a little bit of help here given that we know the
10009 actual registers used. */
10010 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
10011 || GET_MODE_SIZE (reg_mode) == 4);
10012 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
10013 strict, worst_case);
10014 }
10015
10016 /* Determine the reassociation width to be used in reassociate_bb.
10017 This takes into account how many parallel operations we
10018 can actually do of a given type, and also the latency.
10019 P8:
10020 int add/sub 6/cycle
10021 mul 2/cycle
10022 vect add/sub/mul 2/cycle
10023 fp add/sub/mul 2/cycle
10024 dfp 1/cycle
10025 */
10026
10027 static int
10028 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
10029 machine_mode mode)
10030 {
10031 switch (rs6000_tune)
10032 {
10033 case PROCESSOR_POWER8:
10034 case PROCESSOR_POWER9:
10035 case PROCESSOR_POWER10:
10036 if (DECIMAL_FLOAT_MODE_P (mode))
10037 return 1;
10038 if (VECTOR_MODE_P (mode))
10039 return 4;
10040 if (INTEGRAL_MODE_P (mode))
10041 return 1;
10042 if (FLOAT_MODE_P (mode))
10043 return 4;
10044 break;
10045 default:
10046 break;
10047 }
10048 return 1;
10049 }
10050
10051 /* Change register usage conditional on target flags. */
10052 static void
10053 rs6000_conditional_register_usage (void)
10054 {
10055 int i;
10056
10057 if (TARGET_DEBUG_TARGET)
10058 fprintf (stderr, "rs6000_conditional_register_usage called\n");
10059
10060 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
10061 if (TARGET_64BIT)
10062 fixed_regs[13] = call_used_regs[13] = 1;
10063
10064 /* Conditionally disable FPRs. */
10065 if (TARGET_SOFT_FLOAT)
10066 for (i = 32; i < 64; i++)
10067 fixed_regs[i] = call_used_regs[i] = 1;
10068
10069 /* The TOC register is not killed across calls in a way that is
10070 visible to the compiler. */
10071 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
10072 call_used_regs[2] = 0;
10073
10074 if (DEFAULT_ABI == ABI_V4 && flag_pic == 2)
10075 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10076
10077 if (DEFAULT_ABI == ABI_V4 && flag_pic == 1)
10078 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10079 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10080
10081 if (DEFAULT_ABI == ABI_DARWIN && flag_pic)
10082 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10083 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10084
10085 if (TARGET_TOC && TARGET_MINIMAL_TOC)
10086 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10087
10088 if (!TARGET_ALTIVEC && !TARGET_VSX)
10089 {
10090 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
10091 fixed_regs[i] = call_used_regs[i] = 1;
10092 call_used_regs[VRSAVE_REGNO] = 1;
10093 }
10094
10095 if (TARGET_ALTIVEC || TARGET_VSX)
10096 global_regs[VSCR_REGNO] = 1;
10097
10098 if (TARGET_ALTIVEC_ABI)
10099 {
10100 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
10101 call_used_regs[i] = 1;
10102
10103 /* AIX reserves VR20:31 in non-extended ABI mode. */
10104 if (TARGET_XCOFF && !rs6000_aix_extabi)
10105 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
10106 fixed_regs[i] = call_used_regs[i] = 1;
10107 }
10108 }
10109
10110 \f
10111 /* Output insns to set DEST equal to the constant SOURCE as a series of
10112 lis, ori and shl instructions and return TRUE. */
10113
10114 bool
10115 rs6000_emit_set_const (rtx dest, rtx source)
10116 {
10117 machine_mode mode = GET_MODE (dest);
10118 rtx temp, set;
10119 rtx_insn *insn;
10120 HOST_WIDE_INT c;
10121
10122 gcc_checking_assert (CONST_INT_P (source));
10123 c = INTVAL (source);
10124 switch (mode)
10125 {
10126 case E_QImode:
10127 case E_HImode:
10128 emit_insn (gen_rtx_SET (dest, source));
10129 return true;
10130
10131 case E_SImode:
10132 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
10133
10134 emit_insn (gen_rtx_SET (copy_rtx (temp),
10135 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
10136 emit_insn (gen_rtx_SET (dest,
10137 gen_rtx_IOR (SImode, copy_rtx (temp),
10138 GEN_INT (c & 0xffff))));
10139 break;
10140
10141 case E_DImode:
10142 if (!TARGET_POWERPC64)
10143 {
10144 rtx hi, lo;
10145
10146 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
10147 DImode);
10148 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
10149 DImode);
10150 emit_move_insn (hi, GEN_INT (c >> 32));
10151 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
10152 emit_move_insn (lo, GEN_INT (c));
10153 }
10154 else
10155 rs6000_emit_set_long_const (dest, c);
10156 break;
10157
10158 default:
10159 gcc_unreachable ();
10160 }
10161
10162 insn = get_last_insn ();
10163 set = single_set (insn);
10164 if (! CONSTANT_P (SET_SRC (set)))
10165 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
10166
10167 return true;
10168 }
10169
10170 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
10171 Output insns to set DEST equal to the constant C as a series of
10172 lis, ori and shl instructions. */
10173
10174 static void
10175 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
10176 {
10177 rtx temp;
10178 HOST_WIDE_INT ud1, ud2, ud3, ud4;
10179
10180 ud1 = c & 0xffff;
10181 c = c >> 16;
10182 ud2 = c & 0xffff;
10183 c = c >> 16;
10184 ud3 = c & 0xffff;
10185 c = c >> 16;
10186 ud4 = c & 0xffff;
10187
10188 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
10189 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
10190 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
10191
10192 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
10193 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
10194 {
10195 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10196
10197 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
10198 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
10199 if (ud1 != 0)
10200 emit_move_insn (dest,
10201 gen_rtx_IOR (DImode, copy_rtx (temp),
10202 GEN_INT (ud1)));
10203 }
10204 else if (ud3 == 0 && ud4 == 0)
10205 {
10206 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10207
10208 gcc_assert (ud2 & 0x8000);
10209 emit_move_insn (copy_rtx (temp),
10210 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
10211 if (ud1 != 0)
10212 emit_move_insn (copy_rtx (temp),
10213 gen_rtx_IOR (DImode, copy_rtx (temp),
10214 GEN_INT (ud1)));
10215 emit_move_insn (dest,
10216 gen_rtx_ZERO_EXTEND (DImode,
10217 gen_lowpart (SImode,
10218 copy_rtx (temp))));
10219 }
10220 else if (ud1 == ud3 && ud2 == ud4)
10221 {
10222 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10223 HOST_WIDE_INT num = (ud2 << 16) | ud1;
10224 rs6000_emit_set_long_const (temp, (num ^ 0x80000000) - 0x80000000);
10225 rtx one = gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff));
10226 rtx two = gen_rtx_ASHIFT (DImode, temp, GEN_INT (32));
10227 emit_move_insn (dest, gen_rtx_IOR (DImode, one, two));
10228 }
10229 else if ((ud4 == 0xffff && (ud3 & 0x8000))
10230 || (ud4 == 0 && ! (ud3 & 0x8000)))
10231 {
10232 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10233
10234 emit_move_insn (copy_rtx (temp),
10235 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
10236 if (ud2 != 0)
10237 emit_move_insn (copy_rtx (temp),
10238 gen_rtx_IOR (DImode, copy_rtx (temp),
10239 GEN_INT (ud2)));
10240 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
10241 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
10242 GEN_INT (16)));
10243 if (ud1 != 0)
10244 emit_move_insn (dest,
10245 gen_rtx_IOR (DImode, copy_rtx (temp),
10246 GEN_INT (ud1)));
10247 }
10248 else
10249 {
10250 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10251
10252 emit_move_insn (copy_rtx (temp),
10253 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
10254 if (ud3 != 0)
10255 emit_move_insn (copy_rtx (temp),
10256 gen_rtx_IOR (DImode, copy_rtx (temp),
10257 GEN_INT (ud3)));
10258
10259 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
10260 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
10261 GEN_INT (32)));
10262 if (ud2 != 0)
10263 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
10264 gen_rtx_IOR (DImode, copy_rtx (temp),
10265 GEN_INT (ud2 << 16)));
10266 if (ud1 != 0)
10267 emit_move_insn (dest,
10268 gen_rtx_IOR (DImode, copy_rtx (temp),
10269 GEN_INT (ud1)));
10270 }
10271 }
10272
10273 /* Helper for the following. Get rid of [r+r] memory refs
10274 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
10275
10276 static void
10277 rs6000_eliminate_indexed_memrefs (rtx operands[2])
10278 {
10279 if (MEM_P (operands[0])
10280 && !REG_P (XEXP (operands[0], 0))
10281 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
10282 GET_MODE (operands[0]), false))
10283 operands[0]
10284 = replace_equiv_address (operands[0],
10285 copy_addr_to_reg (XEXP (operands[0], 0)));
10286
10287 if (MEM_P (operands[1])
10288 && !REG_P (XEXP (operands[1], 0))
10289 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
10290 GET_MODE (operands[1]), false))
10291 operands[1]
10292 = replace_equiv_address (operands[1],
10293 copy_addr_to_reg (XEXP (operands[1], 0)));
10294 }
10295
10296 /* Generate a vector of constants to permute MODE for a little-endian
10297 storage operation by swapping the two halves of a vector. */
10298 static rtvec
10299 rs6000_const_vec (machine_mode mode)
10300 {
10301 int i, subparts;
10302 rtvec v;
10303
10304 switch (mode)
10305 {
10306 case E_V1TImode:
10307 subparts = 1;
10308 break;
10309 case E_V2DFmode:
10310 case E_V2DImode:
10311 subparts = 2;
10312 break;
10313 case E_V4SFmode:
10314 case E_V4SImode:
10315 subparts = 4;
10316 break;
10317 case E_V8HImode:
10318 subparts = 8;
10319 break;
10320 case E_V16QImode:
10321 subparts = 16;
10322 break;
10323 default:
10324 gcc_unreachable();
10325 }
10326
10327 v = rtvec_alloc (subparts);
10328
10329 for (i = 0; i < subparts / 2; ++i)
10330 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
10331 for (i = subparts / 2; i < subparts; ++i)
10332 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
10333
10334 return v;
10335 }
10336
10337 /* Emit an lxvd2x, stxvd2x, or xxpermdi instruction for a VSX load or
10338 store operation. */
10339 void
10340 rs6000_emit_le_vsx_permute (rtx dest, rtx source, machine_mode mode)
10341 {
10342 gcc_assert (!altivec_indexed_or_indirect_operand (dest, mode));
10343 gcc_assert (!altivec_indexed_or_indirect_operand (source, mode));
10344
10345 /* Scalar permutations are easier to express in integer modes rather than
10346 floating-point modes, so cast them here. We use V1TImode instead
10347 of TImode to ensure that the values don't go through GPRs. */
10348 if (FLOAT128_VECTOR_P (mode))
10349 {
10350 dest = gen_lowpart (V1TImode, dest);
10351 source = gen_lowpart (V1TImode, source);
10352 mode = V1TImode;
10353 }
10354
10355 /* Use ROTATE instead of VEC_SELECT if the mode contains only a single
10356 scalar. */
10357 if (mode == TImode || mode == V1TImode)
10358 emit_insn (gen_rtx_SET (dest, gen_rtx_ROTATE (mode, source,
10359 GEN_INT (64))));
10360 else
10361 {
10362 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
10363 emit_insn (gen_rtx_SET (dest, gen_rtx_VEC_SELECT (mode, source, par)));
10364 }
10365 }
10366
10367 /* Emit a little-endian load from vector memory location SOURCE to VSX
10368 register DEST in mode MODE. The load is done with two permuting
10369 insn's that represent an lxvd2x and xxpermdi. */
10370 void
10371 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
10372 {
10373 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
10374 V1TImode). */
10375 if (mode == TImode || mode == V1TImode)
10376 {
10377 mode = V2DImode;
10378 dest = gen_lowpart (V2DImode, dest);
10379 source = adjust_address (source, V2DImode, 0);
10380 }
10381
10382 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
10383 rs6000_emit_le_vsx_permute (tmp, source, mode);
10384 rs6000_emit_le_vsx_permute (dest, tmp, mode);
10385 }
10386
10387 /* Emit a little-endian store to vector memory location DEST from VSX
10388 register SOURCE in mode MODE. The store is done with two permuting
10389 insn's that represent an xxpermdi and an stxvd2x. */
10390 void
10391 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
10392 {
10393 /* This should never be called after LRA. */
10394 gcc_assert (can_create_pseudo_p ());
10395
10396 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
10397 V1TImode). */
10398 if (mode == TImode || mode == V1TImode)
10399 {
10400 mode = V2DImode;
10401 dest = adjust_address (dest, V2DImode, 0);
10402 source = gen_lowpart (V2DImode, source);
10403 }
10404
10405 rtx tmp = gen_reg_rtx_and_attrs (source);
10406 rs6000_emit_le_vsx_permute (tmp, source, mode);
10407 rs6000_emit_le_vsx_permute (dest, tmp, mode);
10408 }
10409
10410 /* Emit a sequence representing a little-endian VSX load or store,
10411 moving data from SOURCE to DEST in mode MODE. This is done
10412 separately from rs6000_emit_move to ensure it is called only
10413 during expand. LE VSX loads and stores introduced later are
10414 handled with a split. The expand-time RTL generation allows
10415 us to optimize away redundant pairs of register-permutes. */
10416 void
10417 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
10418 {
10419 gcc_assert (!BYTES_BIG_ENDIAN
10420 && VECTOR_MEM_VSX_P (mode)
10421 && !TARGET_P9_VECTOR
10422 && !gpr_or_gpr_p (dest, source)
10423 && (MEM_P (source) ^ MEM_P (dest)));
10424
10425 if (MEM_P (source))
10426 {
10427 gcc_assert (REG_P (dest) || SUBREG_P (dest));
10428 rs6000_emit_le_vsx_load (dest, source, mode);
10429 }
10430 else
10431 {
10432 if (!REG_P (source))
10433 source = force_reg (mode, source);
10434 rs6000_emit_le_vsx_store (dest, source, mode);
10435 }
10436 }
10437
10438 /* Return whether a SFmode or SImode move can be done without converting one
10439 mode to another. This arrises when we have:
10440
10441 (SUBREG:SF (REG:SI ...))
10442 (SUBREG:SI (REG:SF ...))
10443
10444 and one of the values is in a floating point/vector register, where SFmode
10445 scalars are stored in DFmode format. */
10446
10447 bool
10448 valid_sf_si_move (rtx dest, rtx src, machine_mode mode)
10449 {
10450 if (TARGET_ALLOW_SF_SUBREG)
10451 return true;
10452
10453 if (mode != SFmode && GET_MODE_CLASS (mode) != MODE_INT)
10454 return true;
10455
10456 if (!SUBREG_P (src) || !sf_subreg_operand (src, mode))
10457 return true;
10458
10459 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */
10460 if (SUBREG_P (dest))
10461 {
10462 rtx dest_subreg = SUBREG_REG (dest);
10463 rtx src_subreg = SUBREG_REG (src);
10464 return GET_MODE (dest_subreg) == GET_MODE (src_subreg);
10465 }
10466
10467 return false;
10468 }
10469
10470
10471 /* Helper function to change moves with:
10472
10473 (SUBREG:SF (REG:SI)) and
10474 (SUBREG:SI (REG:SF))
10475
10476 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode
10477 values are stored as DFmode values in the VSX registers. We need to convert
10478 the bits before we can use a direct move or operate on the bits in the
10479 vector register as an integer type.
10480
10481 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */
10482
10483 static bool
10484 rs6000_emit_move_si_sf_subreg (rtx dest, rtx source, machine_mode mode)
10485 {
10486 if (TARGET_DIRECT_MOVE_64BIT && !reload_completed
10487 && (!SUBREG_P (dest) || !sf_subreg_operand (dest, mode))
10488 && SUBREG_P (source) && sf_subreg_operand (source, mode))
10489 {
10490 rtx inner_source = SUBREG_REG (source);
10491 machine_mode inner_mode = GET_MODE (inner_source);
10492
10493 if (mode == SImode && inner_mode == SFmode)
10494 {
10495 emit_insn (gen_movsi_from_sf (dest, inner_source));
10496 return true;
10497 }
10498
10499 if (mode == SFmode && inner_mode == SImode)
10500 {
10501 emit_insn (gen_movsf_from_si (dest, inner_source));
10502 return true;
10503 }
10504 }
10505
10506 return false;
10507 }
10508
10509 /* Emit a move from SOURCE to DEST in mode MODE. */
10510 void
10511 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
10512 {
10513 rtx operands[2];
10514 operands[0] = dest;
10515 operands[1] = source;
10516
10517 if (TARGET_DEBUG_ADDR)
10518 {
10519 fprintf (stderr,
10520 "\nrs6000_emit_move: mode = %s, lra_in_progress = %d, "
10521 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
10522 GET_MODE_NAME (mode),
10523 lra_in_progress,
10524 reload_completed,
10525 can_create_pseudo_p ());
10526 debug_rtx (dest);
10527 fprintf (stderr, "source:\n");
10528 debug_rtx (source);
10529 }
10530
10531 /* Check that we get CONST_WIDE_INT only when we should. */
10532 if (CONST_WIDE_INT_P (operands[1])
10533 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
10534 gcc_unreachable ();
10535
10536 #ifdef HAVE_AS_GNU_ATTRIBUTE
10537 /* If we use a long double type, set the flags in .gnu_attribute that say
10538 what the long double type is. This is to allow the linker's warning
10539 message for the wrong long double to be useful, even if the function does
10540 not do a call (for example, doing a 128-bit add on power9 if the long
10541 double type is IEEE 128-bit. Do not set this if __ibm128 or __floa128 are
10542 used if they aren't the default long dobule type. */
10543 if (rs6000_gnu_attr && (HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT))
10544 {
10545 if (TARGET_LONG_DOUBLE_128 && (mode == TFmode || mode == TCmode))
10546 rs6000_passes_float = rs6000_passes_long_double = true;
10547
10548 else if (!TARGET_LONG_DOUBLE_128 && (mode == DFmode || mode == DCmode))
10549 rs6000_passes_float = rs6000_passes_long_double = true;
10550 }
10551 #endif
10552
10553 /* See if we need to special case SImode/SFmode SUBREG moves. */
10554 if ((mode == SImode || mode == SFmode) && SUBREG_P (source)
10555 && rs6000_emit_move_si_sf_subreg (dest, source, mode))
10556 return;
10557
10558 /* Check if GCC is setting up a block move that will end up using FP
10559 registers as temporaries. We must make sure this is acceptable. */
10560 if (MEM_P (operands[0])
10561 && MEM_P (operands[1])
10562 && mode == DImode
10563 && (rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[0]))
10564 || rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[1])))
10565 && ! (rs6000_slow_unaligned_access (SImode,
10566 (MEM_ALIGN (operands[0]) > 32
10567 ? 32 : MEM_ALIGN (operands[0])))
10568 || rs6000_slow_unaligned_access (SImode,
10569 (MEM_ALIGN (operands[1]) > 32
10570 ? 32 : MEM_ALIGN (operands[1]))))
10571 && ! MEM_VOLATILE_P (operands [0])
10572 && ! MEM_VOLATILE_P (operands [1]))
10573 {
10574 emit_move_insn (adjust_address (operands[0], SImode, 0),
10575 adjust_address (operands[1], SImode, 0));
10576 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
10577 adjust_address (copy_rtx (operands[1]), SImode, 4));
10578 return;
10579 }
10580
10581 if (can_create_pseudo_p () && MEM_P (operands[0])
10582 && !gpc_reg_operand (operands[1], mode))
10583 operands[1] = force_reg (mode, operands[1]);
10584
10585 /* Recognize the case where operand[1] is a reference to thread-local
10586 data and load its address to a register. */
10587 if (tls_referenced_p (operands[1]))
10588 {
10589 enum tls_model model;
10590 rtx tmp = operands[1];
10591 rtx addend = NULL;
10592
10593 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
10594 {
10595 addend = XEXP (XEXP (tmp, 0), 1);
10596 tmp = XEXP (XEXP (tmp, 0), 0);
10597 }
10598
10599 gcc_assert (SYMBOL_REF_P (tmp));
10600 model = SYMBOL_REF_TLS_MODEL (tmp);
10601 gcc_assert (model != 0);
10602
10603 tmp = rs6000_legitimize_tls_address (tmp, model);
10604 if (addend)
10605 {
10606 tmp = gen_rtx_PLUS (mode, tmp, addend);
10607 tmp = force_operand (tmp, operands[0]);
10608 }
10609 operands[1] = tmp;
10610 }
10611
10612 /* 128-bit constant floating-point values on Darwin should really be loaded
10613 as two parts. However, this premature splitting is a problem when DFmode
10614 values can go into Altivec registers. */
10615 if (TARGET_MACHO && CONST_DOUBLE_P (operands[1]) && FLOAT128_IBM_P (mode)
10616 && !reg_addr[DFmode].scalar_in_vmx_p)
10617 {
10618 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
10619 simplify_gen_subreg (DFmode, operands[1], mode, 0),
10620 DFmode);
10621 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
10622 GET_MODE_SIZE (DFmode)),
10623 simplify_gen_subreg (DFmode, operands[1], mode,
10624 GET_MODE_SIZE (DFmode)),
10625 DFmode);
10626 return;
10627 }
10628
10629 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
10630 p1:SD) if p1 is not of floating point class and p0 is spilled as
10631 we can have no analogous movsd_store for this. */
10632 if (lra_in_progress && mode == DDmode
10633 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
10634 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
10635 && SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1]))
10636 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
10637 {
10638 enum reg_class cl;
10639 int regno = REGNO (SUBREG_REG (operands[1]));
10640
10641 if (!HARD_REGISTER_NUM_P (regno))
10642 {
10643 cl = reg_preferred_class (regno);
10644 regno = reg_renumber[regno];
10645 if (regno < 0)
10646 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
10647 }
10648 if (regno >= 0 && ! FP_REGNO_P (regno))
10649 {
10650 mode = SDmode;
10651 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
10652 operands[1] = SUBREG_REG (operands[1]);
10653 }
10654 }
10655 if (lra_in_progress
10656 && mode == SDmode
10657 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
10658 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
10659 && (REG_P (operands[1])
10660 || (SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1])))))
10661 {
10662 int regno = reg_or_subregno (operands[1]);
10663 enum reg_class cl;
10664
10665 if (!HARD_REGISTER_NUM_P (regno))
10666 {
10667 cl = reg_preferred_class (regno);
10668 gcc_assert (cl != NO_REGS);
10669 regno = reg_renumber[regno];
10670 if (regno < 0)
10671 regno = ira_class_hard_regs[cl][0];
10672 }
10673 if (FP_REGNO_P (regno))
10674 {
10675 if (GET_MODE (operands[0]) != DDmode)
10676 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
10677 emit_insn (gen_movsd_store (operands[0], operands[1]));
10678 }
10679 else if (INT_REGNO_P (regno))
10680 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
10681 else
10682 gcc_unreachable();
10683 return;
10684 }
10685 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
10686 p:DD)) if p0 is not of floating point class and p1 is spilled as
10687 we can have no analogous movsd_load for this. */
10688 if (lra_in_progress && mode == DDmode
10689 && SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))
10690 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
10691 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
10692 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
10693 {
10694 enum reg_class cl;
10695 int regno = REGNO (SUBREG_REG (operands[0]));
10696
10697 if (!HARD_REGISTER_NUM_P (regno))
10698 {
10699 cl = reg_preferred_class (regno);
10700 regno = reg_renumber[regno];
10701 if (regno < 0)
10702 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
10703 }
10704 if (regno >= 0 && ! FP_REGNO_P (regno))
10705 {
10706 mode = SDmode;
10707 operands[0] = SUBREG_REG (operands[0]);
10708 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
10709 }
10710 }
10711 if (lra_in_progress
10712 && mode == SDmode
10713 && (REG_P (operands[0])
10714 || (SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))))
10715 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
10716 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
10717 {
10718 int regno = reg_or_subregno (operands[0]);
10719 enum reg_class cl;
10720
10721 if (!HARD_REGISTER_NUM_P (regno))
10722 {
10723 cl = reg_preferred_class (regno);
10724 gcc_assert (cl != NO_REGS);
10725 regno = reg_renumber[regno];
10726 if (regno < 0)
10727 regno = ira_class_hard_regs[cl][0];
10728 }
10729 if (FP_REGNO_P (regno))
10730 {
10731 if (GET_MODE (operands[1]) != DDmode)
10732 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
10733 emit_insn (gen_movsd_load (operands[0], operands[1]));
10734 }
10735 else if (INT_REGNO_P (regno))
10736 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
10737 else
10738 gcc_unreachable();
10739 return;
10740 }
10741
10742 /* FIXME: In the long term, this switch statement should go away
10743 and be replaced by a sequence of tests based on things like
10744 mode == Pmode. */
10745 switch (mode)
10746 {
10747 case E_HImode:
10748 case E_QImode:
10749 if (CONSTANT_P (operands[1])
10750 && !CONST_INT_P (operands[1]))
10751 operands[1] = force_const_mem (mode, operands[1]);
10752 break;
10753
10754 case E_TFmode:
10755 case E_TDmode:
10756 case E_IFmode:
10757 case E_KFmode:
10758 if (FLOAT128_2REG_P (mode))
10759 rs6000_eliminate_indexed_memrefs (operands);
10760 /* fall through */
10761
10762 case E_DFmode:
10763 case E_DDmode:
10764 case E_SFmode:
10765 case E_SDmode:
10766 if (CONSTANT_P (operands[1])
10767 && ! easy_fp_constant (operands[1], mode))
10768 operands[1] = force_const_mem (mode, operands[1]);
10769 break;
10770
10771 case E_V16QImode:
10772 case E_V8HImode:
10773 case E_V4SFmode:
10774 case E_V4SImode:
10775 case E_V2DFmode:
10776 case E_V2DImode:
10777 case E_V1TImode:
10778 if (CONSTANT_P (operands[1])
10779 && !easy_vector_constant (operands[1], mode))
10780 operands[1] = force_const_mem (mode, operands[1]);
10781 break;
10782
10783 case E_OOmode:
10784 case E_XOmode:
10785 if (CONST_INT_P (operands[1]) && INTVAL (operands[1]) != 0)
10786 error ("%qs is an opaque type, and you cannot set it to other values",
10787 (mode == OOmode) ? "__vector_pair" : "__vector_quad");
10788 break;
10789
10790 case E_SImode:
10791 case E_DImode:
10792 /* Use default pattern for address of ELF small data */
10793 if (TARGET_ELF
10794 && mode == Pmode
10795 && DEFAULT_ABI == ABI_V4
10796 && (SYMBOL_REF_P (operands[1])
10797 || GET_CODE (operands[1]) == CONST)
10798 && small_data_operand (operands[1], mode))
10799 {
10800 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10801 return;
10802 }
10803
10804 /* Use the default pattern for loading up PC-relative addresses. */
10805 if (TARGET_PCREL && mode == Pmode
10806 && pcrel_local_or_external_address (operands[1], Pmode))
10807 {
10808 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10809 return;
10810 }
10811
10812 if (DEFAULT_ABI == ABI_V4
10813 && mode == Pmode && mode == SImode
10814 && flag_pic == 1 && got_operand (operands[1], mode))
10815 {
10816 emit_insn (gen_movsi_got (operands[0], operands[1]));
10817 return;
10818 }
10819
10820 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
10821 && TARGET_NO_TOC_OR_PCREL
10822 && ! flag_pic
10823 && mode == Pmode
10824 && CONSTANT_P (operands[1])
10825 && GET_CODE (operands[1]) != HIGH
10826 && !CONST_INT_P (operands[1]))
10827 {
10828 rtx target = (!can_create_pseudo_p ()
10829 ? operands[0]
10830 : gen_reg_rtx (mode));
10831
10832 /* If this is a function address on -mcall-aixdesc,
10833 convert it to the address of the descriptor. */
10834 if (DEFAULT_ABI == ABI_AIX
10835 && SYMBOL_REF_P (operands[1])
10836 && XSTR (operands[1], 0)[0] == '.')
10837 {
10838 const char *name = XSTR (operands[1], 0);
10839 rtx new_ref;
10840 while (*name == '.')
10841 name++;
10842 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
10843 CONSTANT_POOL_ADDRESS_P (new_ref)
10844 = CONSTANT_POOL_ADDRESS_P (operands[1]);
10845 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
10846 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
10847 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
10848 operands[1] = new_ref;
10849 }
10850
10851 if (DEFAULT_ABI == ABI_DARWIN)
10852 {
10853 #if TARGET_MACHO
10854 /* This is not PIC code, but could require the subset of
10855 indirections used by mdynamic-no-pic. */
10856 if (MACHO_DYNAMIC_NO_PIC_P)
10857 {
10858 /* Take care of any required data indirection. */
10859 operands[1] = rs6000_machopic_legitimize_pic_address (
10860 operands[1], mode, operands[0]);
10861 if (operands[0] != operands[1])
10862 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10863 return;
10864 }
10865 #endif
10866 emit_insn (gen_macho_high (Pmode, target, operands[1]));
10867 emit_insn (gen_macho_low (Pmode, operands[0],
10868 target, operands[1]));
10869 return;
10870 }
10871
10872 emit_insn (gen_elf_high (target, operands[1]));
10873 emit_insn (gen_elf_low (operands[0], target, operands[1]));
10874 return;
10875 }
10876
10877 /* If this is a SYMBOL_REF that refers to a constant pool entry,
10878 and we have put it in the TOC, we just need to make a TOC-relative
10879 reference to it. */
10880 if (TARGET_TOC
10881 && SYMBOL_REF_P (operands[1])
10882 && use_toc_relative_ref (operands[1], mode))
10883 operands[1] = create_TOC_reference (operands[1], operands[0]);
10884 else if (mode == Pmode
10885 && CONSTANT_P (operands[1])
10886 && GET_CODE (operands[1]) != HIGH
10887 && ((REG_P (operands[0])
10888 && FP_REGNO_P (REGNO (operands[0])))
10889 || !CONST_INT_P (operands[1])
10890 || (num_insns_constant (operands[1], mode)
10891 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
10892 && !toc_relative_expr_p (operands[1], false, NULL, NULL)
10893 && (TARGET_CMODEL == CMODEL_SMALL
10894 || can_create_pseudo_p ()
10895 || (REG_P (operands[0])
10896 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
10897 {
10898
10899 #if TARGET_MACHO
10900 /* Darwin uses a special PIC legitimizer. */
10901 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
10902 {
10903 operands[1] =
10904 rs6000_machopic_legitimize_pic_address (operands[1], mode,
10905 operands[0]);
10906 if (operands[0] != operands[1])
10907 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10908 return;
10909 }
10910 #endif
10911
10912 /* If we are to limit the number of things we put in the TOC and
10913 this is a symbol plus a constant we can add in one insn,
10914 just put the symbol in the TOC and add the constant. */
10915 if (GET_CODE (operands[1]) == CONST
10916 && TARGET_NO_SUM_IN_TOC
10917 && GET_CODE (XEXP (operands[1], 0)) == PLUS
10918 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
10919 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
10920 || SYMBOL_REF_P (XEXP (XEXP (operands[1], 0), 0)))
10921 && ! side_effects_p (operands[0]))
10922 {
10923 rtx sym =
10924 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
10925 rtx other = XEXP (XEXP (operands[1], 0), 1);
10926
10927 sym = force_reg (mode, sym);
10928 emit_insn (gen_add3_insn (operands[0], sym, other));
10929 return;
10930 }
10931
10932 operands[1] = force_const_mem (mode, operands[1]);
10933
10934 if (TARGET_TOC
10935 && SYMBOL_REF_P (XEXP (operands[1], 0))
10936 && use_toc_relative_ref (XEXP (operands[1], 0), mode))
10937 {
10938 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
10939 operands[0]);
10940 operands[1] = gen_const_mem (mode, tocref);
10941 set_mem_alias_set (operands[1], get_TOC_alias_set ());
10942 }
10943 }
10944 break;
10945
10946 case E_TImode:
10947 if (!VECTOR_MEM_VSX_P (TImode))
10948 rs6000_eliminate_indexed_memrefs (operands);
10949 break;
10950
10951 case E_PTImode:
10952 rs6000_eliminate_indexed_memrefs (operands);
10953 break;
10954
10955 default:
10956 fatal_insn ("bad move", gen_rtx_SET (dest, source));
10957 }
10958
10959 /* Above, we may have called force_const_mem which may have returned
10960 an invalid address. If we can, fix this up; otherwise, reload will
10961 have to deal with it. */
10962 if (MEM_P (operands[1]))
10963 operands[1] = validize_mem (operands[1]);
10964
10965 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10966 }
10967 \f
10968
10969 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
10970 static void
10971 init_float128_ibm (machine_mode mode)
10972 {
10973 if (!TARGET_XL_COMPAT)
10974 {
10975 set_optab_libfunc (add_optab, mode, "__gcc_qadd");
10976 set_optab_libfunc (sub_optab, mode, "__gcc_qsub");
10977 set_optab_libfunc (smul_optab, mode, "__gcc_qmul");
10978 set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv");
10979
10980 set_optab_libfunc (neg_optab, mode, "__gcc_qneg");
10981 set_optab_libfunc (eq_optab, mode, "__gcc_qeq");
10982 set_optab_libfunc (ne_optab, mode, "__gcc_qne");
10983 set_optab_libfunc (gt_optab, mode, "__gcc_qgt");
10984 set_optab_libfunc (ge_optab, mode, "__gcc_qge");
10985 set_optab_libfunc (lt_optab, mode, "__gcc_qlt");
10986 set_optab_libfunc (le_optab, mode, "__gcc_qle");
10987 set_optab_libfunc (unord_optab, mode, "__gcc_qunord");
10988
10989 set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq");
10990 set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq");
10991 set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos");
10992 set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod");
10993 set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi");
10994 set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou");
10995 set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq");
10996 set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq");
10997 }
10998 else
10999 {
11000 set_optab_libfunc (add_optab, mode, "_xlqadd");
11001 set_optab_libfunc (sub_optab, mode, "_xlqsub");
11002 set_optab_libfunc (smul_optab, mode, "_xlqmul");
11003 set_optab_libfunc (sdiv_optab, mode, "_xlqdiv");
11004 }
11005
11006 /* Add various conversions for IFmode to use the traditional TFmode
11007 names. */
11008 if (mode == IFmode)
11009 {
11010 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf");
11011 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf");
11012 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdtf");
11013 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd");
11014 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd");
11015 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtftd");
11016
11017 set_conv_libfunc (sfix_optab, DImode, mode, "__fixtfdi");
11018 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunstfdi");
11019
11020 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatditf");
11021 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatunditf");
11022
11023 if (TARGET_POWERPC64)
11024 {
11025 set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti");
11026 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti");
11027 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf");
11028 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf");
11029 }
11030 }
11031 }
11032
11033 /* Create a decl for either complex long double multiply or complex long double
11034 divide when long double is IEEE 128-bit floating point. We can't use
11035 __multc3 and __divtc3 because the original long double using IBM extended
11036 double used those names. The complex multiply/divide functions are encoded
11037 as builtin functions with a complex result and 4 scalar inputs. */
11038
11039 static void
11040 create_complex_muldiv (const char *name, built_in_function fncode, tree fntype)
11041 {
11042 tree fndecl = add_builtin_function (name, fntype, fncode, BUILT_IN_NORMAL,
11043 name, NULL_TREE);
11044
11045 set_builtin_decl (fncode, fndecl, true);
11046
11047 if (TARGET_DEBUG_BUILTIN)
11048 fprintf (stderr, "create complex %s, fncode: %d\n", name, (int) fncode);
11049
11050 return;
11051 }
11052
11053 /* Set up IEEE 128-bit floating point routines. Use different names if the
11054 arguments can be passed in a vector register. The historical PowerPC
11055 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
11056 continue to use that if we aren't using vector registers to pass IEEE
11057 128-bit floating point. */
11058
11059 static void
11060 init_float128_ieee (machine_mode mode)
11061 {
11062 if (FLOAT128_VECTOR_P (mode))
11063 {
11064 set_optab_libfunc (add_optab, mode, "__addkf3");
11065 set_optab_libfunc (sub_optab, mode, "__subkf3");
11066 set_optab_libfunc (neg_optab, mode, "__negkf2");
11067 set_optab_libfunc (smul_optab, mode, "__mulkf3");
11068 set_optab_libfunc (sdiv_optab, mode, "__divkf3");
11069 set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2");
11070 set_optab_libfunc (abs_optab, mode, "__abskf2");
11071 set_optab_libfunc (powi_optab, mode, "__powikf2");
11072
11073 set_optab_libfunc (eq_optab, mode, "__eqkf2");
11074 set_optab_libfunc (ne_optab, mode, "__nekf2");
11075 set_optab_libfunc (gt_optab, mode, "__gtkf2");
11076 set_optab_libfunc (ge_optab, mode, "__gekf2");
11077 set_optab_libfunc (lt_optab, mode, "__ltkf2");
11078 set_optab_libfunc (le_optab, mode, "__lekf2");
11079 set_optab_libfunc (unord_optab, mode, "__unordkf2");
11080
11081 set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2");
11082 set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2");
11083 set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2");
11084 set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2");
11085
11086 set_conv_libfunc (sext_optab, mode, IFmode, "__trunctfkf2");
11087 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
11088 set_conv_libfunc (sext_optab, mode, TFmode, "__trunctfkf2");
11089
11090 set_conv_libfunc (trunc_optab, IFmode, mode, "__extendkftf2");
11091 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
11092 set_conv_libfunc (trunc_optab, TFmode, mode, "__extendkftf2");
11093
11094 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf");
11095 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf");
11096 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdkf");
11097 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd");
11098 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd");
11099 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendkftd");
11100
11101 set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi");
11102 set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi");
11103 set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi");
11104 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi");
11105
11106 set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf");
11107 set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf");
11108 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf");
11109 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf");
11110
11111 if (TARGET_POWERPC64)
11112 {
11113 set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti_sw");
11114 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti_sw");
11115 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf_sw");
11116 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf_sw");
11117 }
11118 }
11119
11120 else
11121 {
11122 set_optab_libfunc (add_optab, mode, "_q_add");
11123 set_optab_libfunc (sub_optab, mode, "_q_sub");
11124 set_optab_libfunc (neg_optab, mode, "_q_neg");
11125 set_optab_libfunc (smul_optab, mode, "_q_mul");
11126 set_optab_libfunc (sdiv_optab, mode, "_q_div");
11127 if (TARGET_PPC_GPOPT)
11128 set_optab_libfunc (sqrt_optab, mode, "_q_sqrt");
11129
11130 set_optab_libfunc (eq_optab, mode, "_q_feq");
11131 set_optab_libfunc (ne_optab, mode, "_q_fne");
11132 set_optab_libfunc (gt_optab, mode, "_q_fgt");
11133 set_optab_libfunc (ge_optab, mode, "_q_fge");
11134 set_optab_libfunc (lt_optab, mode, "_q_flt");
11135 set_optab_libfunc (le_optab, mode, "_q_fle");
11136
11137 set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq");
11138 set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq");
11139 set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos");
11140 set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod");
11141 set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi");
11142 set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou");
11143 set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq");
11144 set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq");
11145 }
11146 }
11147
11148 static void
11149 rs6000_init_libfuncs (void)
11150 {
11151 /* __float128 support. */
11152 if (TARGET_FLOAT128_TYPE)
11153 init_float128_ieee (KFmode);
11154
11155 /* __ibm128 support. */
11156 if (TARGET_IBM128)
11157 init_float128_ibm (IFmode);
11158
11159 /* AIX/Darwin/64-bit Linux quad floating point routines. */
11160 if (TARGET_LONG_DOUBLE_128)
11161 {
11162 if (!TARGET_IEEEQUAD)
11163 init_float128_ibm (TFmode);
11164
11165 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
11166 else
11167 init_float128_ieee (TFmode);
11168 }
11169
11170 /* Set up to call __mulkc3 and __divkc3 when long double uses the IEEE
11171 128-bit encoding. We cannot use the same name (__mulkc3 or __divkc3 for
11172 both IEEE long double and for explicit _Float128/__float128) because
11173 c_builtin_function will complain if we create two built-in functions with
11174 the same name. Instead we use an alias name for the case when long double
11175 uses the IEEE 128-bit encoding. Libgcc will create a weak alias reference
11176 for this name.
11177
11178 We need to only execute this once. If we have clone or target attributes,
11179 this will be called a second time. We need to create the built-in
11180 function only once. */
11181 static bool complex_muldiv_init_p = false;
11182
11183 if (TARGET_FLOAT128_TYPE && TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128
11184 && !complex_muldiv_init_p)
11185 {
11186 complex_muldiv_init_p = true;
11187
11188 tree fntype = build_function_type_list (complex_long_double_type_node,
11189 long_double_type_node,
11190 long_double_type_node,
11191 long_double_type_node,
11192 long_double_type_node,
11193 NULL_TREE);
11194
11195 /* Create complex multiply. */
11196 built_in_function mul_fncode =
11197 (built_in_function) (BUILT_IN_COMPLEX_MUL_MIN + TCmode
11198 - MIN_MODE_COMPLEX_FLOAT);
11199
11200 create_complex_muldiv ("__multc3_ieee128", mul_fncode, fntype);
11201
11202 /* Create complex divide. */
11203 built_in_function div_fncode =
11204 (built_in_function) (BUILT_IN_COMPLEX_DIV_MIN + TCmode
11205 - MIN_MODE_COMPLEX_FLOAT);
11206
11207 create_complex_muldiv ("__divtc3_ieee128", div_fncode, fntype);
11208 }
11209 }
11210
11211 /* Emit a potentially record-form instruction, setting DST from SRC.
11212 If DOT is 0, that is all; otherwise, set CCREG to the result of the
11213 signed comparison of DST with zero. If DOT is 1, the generated RTL
11214 doesn't care about the DST result; if DOT is 2, it does. If CCREG
11215 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
11216 a separate COMPARE. */
11217
11218 void
11219 rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
11220 {
11221 if (dot == 0)
11222 {
11223 emit_move_insn (dst, src);
11224 return;
11225 }
11226
11227 if (cc_reg_not_cr0_operand (ccreg, CCmode))
11228 {
11229 emit_move_insn (dst, src);
11230 emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx));
11231 return;
11232 }
11233
11234 rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx));
11235 if (dot == 1)
11236 {
11237 rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst);
11238 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber)));
11239 }
11240 else
11241 {
11242 rtx set = gen_rtx_SET (dst, src);
11243 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set)));
11244 }
11245 }
11246
11247 \f
11248 /* A validation routine: say whether CODE, a condition code, and MODE
11249 match. The other alternatives either don't make sense or should
11250 never be generated. */
11251
11252 void
11253 validate_condition_mode (enum rtx_code code, machine_mode mode)
11254 {
11255 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
11256 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
11257 && GET_MODE_CLASS (mode) == MODE_CC);
11258
11259 /* These don't make sense. */
11260 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
11261 || mode != CCUNSmode);
11262
11263 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
11264 || mode == CCUNSmode);
11265
11266 gcc_assert (mode == CCFPmode
11267 || (code != ORDERED && code != UNORDERED
11268 && code != UNEQ && code != LTGT
11269 && code != UNGT && code != UNLT
11270 && code != UNGE && code != UNLE));
11271
11272 /* These are invalid; the information is not there. */
11273 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
11274 }
11275
11276 \f
11277 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
11278 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
11279 not zero, store there the bit offset (counted from the right) where
11280 the single stretch of 1 bits begins; and similarly for B, the bit
11281 offset where it ends. */
11282
11283 bool
11284 rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
11285 {
11286 unsigned HOST_WIDE_INT val = INTVAL (mask);
11287 unsigned HOST_WIDE_INT bit;
11288 int nb, ne;
11289 int n = GET_MODE_PRECISION (mode);
11290
11291 if (mode != DImode && mode != SImode)
11292 return false;
11293
11294 if (INTVAL (mask) >= 0)
11295 {
11296 bit = val & -val;
11297 ne = exact_log2 (bit);
11298 nb = exact_log2 (val + bit);
11299 }
11300 else if (val + 1 == 0)
11301 {
11302 nb = n;
11303 ne = 0;
11304 }
11305 else if (val & 1)
11306 {
11307 val = ~val;
11308 bit = val & -val;
11309 nb = exact_log2 (bit);
11310 ne = exact_log2 (val + bit);
11311 }
11312 else
11313 {
11314 bit = val & -val;
11315 ne = exact_log2 (bit);
11316 if (val + bit == 0)
11317 nb = n;
11318 else
11319 nb = 0;
11320 }
11321
11322 nb--;
11323
11324 if (nb < 0 || ne < 0 || nb >= n || ne >= n)
11325 return false;
11326
11327 if (b)
11328 *b = nb;
11329 if (e)
11330 *e = ne;
11331
11332 return true;
11333 }
11334
11335 bool
11336 rs6000_is_valid_rotate_dot_mask (rtx mask, machine_mode mode)
11337 {
11338 int nb, ne;
11339 return rs6000_is_valid_mask (mask, &nb, &ne, mode) && nb >= ne && ne > 0;
11340 }
11341
11342 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
11343 or rldicr instruction, to implement an AND with it in mode MODE. */
11344
11345 bool
11346 rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
11347 {
11348 int nb, ne;
11349
11350 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
11351 return false;
11352
11353 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
11354 does not wrap. */
11355 if (mode == DImode)
11356 return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
11357
11358 /* For SImode, rlwinm can do everything. */
11359 if (mode == SImode)
11360 return (nb < 32 && ne < 32);
11361
11362 return false;
11363 }
11364
11365 /* Return the instruction template for an AND with mask in mode MODE, with
11366 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11367
11368 const char *
11369 rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
11370 {
11371 int nb, ne;
11372
11373 if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
11374 gcc_unreachable ();
11375
11376 if (mode == DImode && ne == 0)
11377 {
11378 operands[3] = GEN_INT (63 - nb);
11379 if (dot)
11380 return "rldicl. %0,%1,0,%3";
11381 return "rldicl %0,%1,0,%3";
11382 }
11383
11384 if (mode == DImode && nb == 63)
11385 {
11386 operands[3] = GEN_INT (63 - ne);
11387 if (dot)
11388 return "rldicr. %0,%1,0,%3";
11389 return "rldicr %0,%1,0,%3";
11390 }
11391
11392 if (nb < 32 && ne < 32)
11393 {
11394 operands[3] = GEN_INT (31 - nb);
11395 operands[4] = GEN_INT (31 - ne);
11396 if (dot)
11397 return "rlwinm. %0,%1,0,%3,%4";
11398 return "rlwinm %0,%1,0,%3,%4";
11399 }
11400
11401 gcc_unreachable ();
11402 }
11403
11404 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
11405 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
11406 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
11407
11408 bool
11409 rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
11410 {
11411 int nb, ne;
11412
11413 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
11414 return false;
11415
11416 int n = GET_MODE_PRECISION (mode);
11417 int sh = -1;
11418
11419 if (CONST_INT_P (XEXP (shift, 1)))
11420 {
11421 sh = INTVAL (XEXP (shift, 1));
11422 if (sh < 0 || sh >= n)
11423 return false;
11424 }
11425
11426 rtx_code code = GET_CODE (shift);
11427
11428 /* Convert any shift by 0 to a rotate, to simplify below code. */
11429 if (sh == 0)
11430 code = ROTATE;
11431
11432 /* Convert rotate to simple shift if we can, to make analysis simpler. */
11433 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
11434 code = ASHIFT;
11435 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
11436 {
11437 code = LSHIFTRT;
11438 sh = n - sh;
11439 }
11440
11441 /* DImode rotates need rld*. */
11442 if (mode == DImode && code == ROTATE)
11443 return (nb == 63 || ne == 0 || ne == sh);
11444
11445 /* SImode rotates need rlw*. */
11446 if (mode == SImode && code == ROTATE)
11447 return (nb < 32 && ne < 32 && sh < 32);
11448
11449 /* Wrap-around masks are only okay for rotates. */
11450 if (ne > nb)
11451 return false;
11452
11453 /* Variable shifts are only okay for rotates. */
11454 if (sh < 0)
11455 return false;
11456
11457 /* Don't allow ASHIFT if the mask is wrong for that. */
11458 if (code == ASHIFT && ne < sh)
11459 return false;
11460
11461 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
11462 if the mask is wrong for that. */
11463 if (nb < 32 && ne < 32 && sh < 32
11464 && !(code == LSHIFTRT && nb >= 32 - sh))
11465 return true;
11466
11467 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
11468 if the mask is wrong for that. */
11469 if (code == LSHIFTRT)
11470 sh = 64 - sh;
11471 if (nb == 63 || ne == 0 || ne == sh)
11472 return !(code == LSHIFTRT && nb >= sh);
11473
11474 return false;
11475 }
11476
11477 /* Return the instruction template for a shift with mask in mode MODE, with
11478 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11479
11480 const char *
11481 rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
11482 {
11483 int nb, ne;
11484
11485 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
11486 gcc_unreachable ();
11487
11488 if (mode == DImode && ne == 0)
11489 {
11490 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
11491 operands[2] = GEN_INT (64 - INTVAL (operands[2]));
11492 operands[3] = GEN_INT (63 - nb);
11493 if (dot)
11494 return "rld%I2cl. %0,%1,%2,%3";
11495 return "rld%I2cl %0,%1,%2,%3";
11496 }
11497
11498 if (mode == DImode && nb == 63)
11499 {
11500 operands[3] = GEN_INT (63 - ne);
11501 if (dot)
11502 return "rld%I2cr. %0,%1,%2,%3";
11503 return "rld%I2cr %0,%1,%2,%3";
11504 }
11505
11506 if (mode == DImode
11507 && GET_CODE (operands[4]) != LSHIFTRT
11508 && CONST_INT_P (operands[2])
11509 && ne == INTVAL (operands[2]))
11510 {
11511 operands[3] = GEN_INT (63 - nb);
11512 if (dot)
11513 return "rld%I2c. %0,%1,%2,%3";
11514 return "rld%I2c %0,%1,%2,%3";
11515 }
11516
11517 if (nb < 32 && ne < 32)
11518 {
11519 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
11520 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
11521 operands[3] = GEN_INT (31 - nb);
11522 operands[4] = GEN_INT (31 - ne);
11523 /* This insn can also be a 64-bit rotate with mask that really makes
11524 it just a shift right (with mask); the %h below are to adjust for
11525 that situation (shift count is >= 32 in that case). */
11526 if (dot)
11527 return "rlw%I2nm. %0,%1,%h2,%3,%4";
11528 return "rlw%I2nm %0,%1,%h2,%3,%4";
11529 }
11530
11531 gcc_unreachable ();
11532 }
11533
11534 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
11535 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
11536 ASHIFT, or LSHIFTRT) in mode MODE. */
11537
11538 bool
11539 rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
11540 {
11541 int nb, ne;
11542
11543 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
11544 return false;
11545
11546 int n = GET_MODE_PRECISION (mode);
11547
11548 int sh = INTVAL (XEXP (shift, 1));
11549 if (sh < 0 || sh >= n)
11550 return false;
11551
11552 rtx_code code = GET_CODE (shift);
11553
11554 /* Convert any shift by 0 to a rotate, to simplify below code. */
11555 if (sh == 0)
11556 code = ROTATE;
11557
11558 /* Convert rotate to simple shift if we can, to make analysis simpler. */
11559 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
11560 code = ASHIFT;
11561 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
11562 {
11563 code = LSHIFTRT;
11564 sh = n - sh;
11565 }
11566
11567 /* DImode rotates need rldimi. */
11568 if (mode == DImode && code == ROTATE)
11569 return (ne == sh);
11570
11571 /* SImode rotates need rlwimi. */
11572 if (mode == SImode && code == ROTATE)
11573 return (nb < 32 && ne < 32 && sh < 32);
11574
11575 /* Wrap-around masks are only okay for rotates. */
11576 if (ne > nb)
11577 return false;
11578
11579 /* Don't allow ASHIFT if the mask is wrong for that. */
11580 if (code == ASHIFT && ne < sh)
11581 return false;
11582
11583 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
11584 if the mask is wrong for that. */
11585 if (nb < 32 && ne < 32 && sh < 32
11586 && !(code == LSHIFTRT && nb >= 32 - sh))
11587 return true;
11588
11589 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
11590 if the mask is wrong for that. */
11591 if (code == LSHIFTRT)
11592 sh = 64 - sh;
11593 if (ne == sh)
11594 return !(code == LSHIFTRT && nb >= sh);
11595
11596 return false;
11597 }
11598
11599 /* Return the instruction template for an insert with mask in mode MODE, with
11600 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11601
11602 const char *
11603 rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
11604 {
11605 int nb, ne;
11606
11607 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
11608 gcc_unreachable ();
11609
11610 /* Prefer rldimi because rlwimi is cracked. */
11611 if (TARGET_POWERPC64
11612 && (!dot || mode == DImode)
11613 && GET_CODE (operands[4]) != LSHIFTRT
11614 && ne == INTVAL (operands[2]))
11615 {
11616 operands[3] = GEN_INT (63 - nb);
11617 if (dot)
11618 return "rldimi. %0,%1,%2,%3";
11619 return "rldimi %0,%1,%2,%3";
11620 }
11621
11622 if (nb < 32 && ne < 32)
11623 {
11624 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
11625 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
11626 operands[3] = GEN_INT (31 - nb);
11627 operands[4] = GEN_INT (31 - ne);
11628 if (dot)
11629 return "rlwimi. %0,%1,%2,%3,%4";
11630 return "rlwimi %0,%1,%2,%3,%4";
11631 }
11632
11633 gcc_unreachable ();
11634 }
11635
11636 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
11637 using two machine instructions. */
11638
11639 bool
11640 rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
11641 {
11642 /* There are two kinds of AND we can handle with two insns:
11643 1) those we can do with two rl* insn;
11644 2) ori[s];xori[s].
11645
11646 We do not handle that last case yet. */
11647
11648 /* If there is just one stretch of ones, we can do it. */
11649 if (rs6000_is_valid_mask (c, NULL, NULL, mode))
11650 return true;
11651
11652 /* Otherwise, fill in the lowest "hole"; if we can do the result with
11653 one insn, we can do the whole thing with two. */
11654 unsigned HOST_WIDE_INT val = INTVAL (c);
11655 unsigned HOST_WIDE_INT bit1 = val & -val;
11656 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
11657 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
11658 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
11659 return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
11660 }
11661
11662 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
11663 If EXPAND is true, split rotate-and-mask instructions we generate to
11664 their constituent parts as well (this is used during expand); if DOT
11665 is 1, make the last insn a record-form instruction clobbering the
11666 destination GPR and setting the CC reg (from operands[3]); if 2, set
11667 that GPR as well as the CC reg. */
11668
11669 void
11670 rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot)
11671 {
11672 gcc_assert (!(expand && dot));
11673
11674 unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
11675
11676 /* If it is one stretch of ones, it is DImode; shift left, mask, then
11677 shift right. This generates better code than doing the masks without
11678 shifts, or shifting first right and then left. */
11679 int nb, ne;
11680 if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
11681 {
11682 gcc_assert (mode == DImode);
11683
11684 int shift = 63 - nb;
11685 if (expand)
11686 {
11687 rtx tmp1 = gen_reg_rtx (DImode);
11688 rtx tmp2 = gen_reg_rtx (DImode);
11689 emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
11690 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
11691 emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
11692 }
11693 else
11694 {
11695 rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
11696 tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
11697 emit_move_insn (operands[0], tmp);
11698 tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift));
11699 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
11700 }
11701 return;
11702 }
11703
11704 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
11705 that does the rest. */
11706 unsigned HOST_WIDE_INT bit1 = val & -val;
11707 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
11708 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
11709 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
11710
11711 unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
11712 unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
11713
11714 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
11715
11716 /* Two "no-rotate"-and-mask instructions, for SImode. */
11717 if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
11718 {
11719 gcc_assert (mode == SImode);
11720
11721 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
11722 rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
11723 emit_move_insn (reg, tmp);
11724 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
11725 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
11726 return;
11727 }
11728
11729 gcc_assert (mode == DImode);
11730
11731 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
11732 insns; we have to do the first in SImode, because it wraps. */
11733 if (mask2 <= 0xffffffff
11734 && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
11735 {
11736 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
11737 rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
11738 GEN_INT (mask1));
11739 rtx reg_low = gen_lowpart (SImode, reg);
11740 emit_move_insn (reg_low, tmp);
11741 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
11742 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
11743 return;
11744 }
11745
11746 /* Two rld* insns: rotate, clear the hole in the middle (which now is
11747 at the top end), rotate back and clear the other hole. */
11748 int right = exact_log2 (bit3);
11749 int left = 64 - right;
11750
11751 /* Rotate the mask too. */
11752 mask1 = (mask1 >> right) | ((bit2 - 1) << left);
11753
11754 if (expand)
11755 {
11756 rtx tmp1 = gen_reg_rtx (DImode);
11757 rtx tmp2 = gen_reg_rtx (DImode);
11758 rtx tmp3 = gen_reg_rtx (DImode);
11759 emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
11760 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
11761 emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
11762 emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
11763 }
11764 else
11765 {
11766 rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
11767 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
11768 emit_move_insn (operands[0], tmp);
11769 tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
11770 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
11771 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
11772 }
11773 }
11774 \f
11775 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
11776 for lfq and stfq insns iff the registers are hard registers. */
11777
11778 int
11779 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
11780 {
11781 /* We might have been passed a SUBREG. */
11782 if (!REG_P (reg1) || !REG_P (reg2))
11783 return 0;
11784
11785 /* We might have been passed non floating point registers. */
11786 if (!FP_REGNO_P (REGNO (reg1))
11787 || !FP_REGNO_P (REGNO (reg2)))
11788 return 0;
11789
11790 return (REGNO (reg1) == REGNO (reg2) - 1);
11791 }
11792
11793 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
11794 addr1 and addr2 must be in consecutive memory locations
11795 (addr2 == addr1 + 8). */
11796
11797 int
11798 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
11799 {
11800 rtx addr1, addr2;
11801 unsigned int reg1, reg2;
11802 int offset1, offset2;
11803
11804 /* The mems cannot be volatile. */
11805 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
11806 return 0;
11807
11808 addr1 = XEXP (mem1, 0);
11809 addr2 = XEXP (mem2, 0);
11810
11811 /* Extract an offset (if used) from the first addr. */
11812 if (GET_CODE (addr1) == PLUS)
11813 {
11814 /* If not a REG, return zero. */
11815 if (!REG_P (XEXP (addr1, 0)))
11816 return 0;
11817 else
11818 {
11819 reg1 = REGNO (XEXP (addr1, 0));
11820 /* The offset must be constant! */
11821 if (!CONST_INT_P (XEXP (addr1, 1)))
11822 return 0;
11823 offset1 = INTVAL (XEXP (addr1, 1));
11824 }
11825 }
11826 else if (!REG_P (addr1))
11827 return 0;
11828 else
11829 {
11830 reg1 = REGNO (addr1);
11831 /* This was a simple (mem (reg)) expression. Offset is 0. */
11832 offset1 = 0;
11833 }
11834
11835 /* And now for the second addr. */
11836 if (GET_CODE (addr2) == PLUS)
11837 {
11838 /* If not a REG, return zero. */
11839 if (!REG_P (XEXP (addr2, 0)))
11840 return 0;
11841 else
11842 {
11843 reg2 = REGNO (XEXP (addr2, 0));
11844 /* The offset must be constant. */
11845 if (!CONST_INT_P (XEXP (addr2, 1)))
11846 return 0;
11847 offset2 = INTVAL (XEXP (addr2, 1));
11848 }
11849 }
11850 else if (!REG_P (addr2))
11851 return 0;
11852 else
11853 {
11854 reg2 = REGNO (addr2);
11855 /* This was a simple (mem (reg)) expression. Offset is 0. */
11856 offset2 = 0;
11857 }
11858
11859 /* Both of these must have the same base register. */
11860 if (reg1 != reg2)
11861 return 0;
11862
11863 /* The offset for the second addr must be 8 more than the first addr. */
11864 if (offset2 != offset1 + 8)
11865 return 0;
11866
11867 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
11868 instructions. */
11869 return 1;
11870 }
11871 \f
11872 /* Implement TARGET_SECONDARY_RELOAD_NEEDED_MODE. For SDmode values we
11873 need to use DDmode, in all other cases we can use the same mode. */
11874 static machine_mode
11875 rs6000_secondary_memory_needed_mode (machine_mode mode)
11876 {
11877 if (lra_in_progress && mode == SDmode)
11878 return DDmode;
11879 return mode;
11880 }
11881
11882 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
11883 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
11884 only work on the traditional altivec registers, note if an altivec register
11885 was chosen. */
11886
11887 static enum rs6000_reg_type
11888 register_to_reg_type (rtx reg, bool *is_altivec)
11889 {
11890 HOST_WIDE_INT regno;
11891 enum reg_class rclass;
11892
11893 if (SUBREG_P (reg))
11894 reg = SUBREG_REG (reg);
11895
11896 if (!REG_P (reg))
11897 return NO_REG_TYPE;
11898
11899 regno = REGNO (reg);
11900 if (!HARD_REGISTER_NUM_P (regno))
11901 {
11902 if (!lra_in_progress && !reload_completed)
11903 return PSEUDO_REG_TYPE;
11904
11905 regno = true_regnum (reg);
11906 if (regno < 0 || !HARD_REGISTER_NUM_P (regno))
11907 return PSEUDO_REG_TYPE;
11908 }
11909
11910 gcc_assert (regno >= 0);
11911
11912 if (is_altivec && ALTIVEC_REGNO_P (regno))
11913 *is_altivec = true;
11914
11915 rclass = rs6000_regno_regclass[regno];
11916 return reg_class_to_reg_type[(int)rclass];
11917 }
11918
11919 /* Helper function to return the cost of adding a TOC entry address. */
11920
11921 static inline int
11922 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
11923 {
11924 int ret;
11925
11926 if (TARGET_CMODEL != CMODEL_SMALL)
11927 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
11928
11929 else
11930 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
11931
11932 return ret;
11933 }
11934
11935 /* Helper function for rs6000_secondary_reload to determine whether the memory
11936 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
11937 needs reloading. Return negative if the memory is not handled by the memory
11938 helper functions and to try a different reload method, 0 if no additional
11939 instructions are need, and positive to give the extra cost for the
11940 memory. */
11941
11942 static int
11943 rs6000_secondary_reload_memory (rtx addr,
11944 enum reg_class rclass,
11945 machine_mode mode)
11946 {
11947 int extra_cost = 0;
11948 rtx reg, and_arg, plus_arg0, plus_arg1;
11949 addr_mask_type addr_mask;
11950 const char *type = NULL;
11951 const char *fail_msg = NULL;
11952
11953 if (GPR_REG_CLASS_P (rclass))
11954 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
11955
11956 else if (rclass == FLOAT_REGS)
11957 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
11958
11959 else if (rclass == ALTIVEC_REGS)
11960 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
11961
11962 /* For the combined VSX_REGS, turn off Altivec AND -16. */
11963 else if (rclass == VSX_REGS)
11964 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
11965 & ~RELOAD_REG_AND_M16);
11966
11967 /* If the register allocator hasn't made up its mind yet on the register
11968 class to use, settle on defaults to use. */
11969 else if (rclass == NO_REGS)
11970 {
11971 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY]
11972 & ~RELOAD_REG_AND_M16);
11973
11974 if ((addr_mask & RELOAD_REG_MULTIPLE) != 0)
11975 addr_mask &= ~(RELOAD_REG_INDEXED
11976 | RELOAD_REG_PRE_INCDEC
11977 | RELOAD_REG_PRE_MODIFY);
11978 }
11979
11980 else
11981 addr_mask = 0;
11982
11983 /* If the register isn't valid in this register class, just return now. */
11984 if ((addr_mask & RELOAD_REG_VALID) == 0)
11985 {
11986 if (TARGET_DEBUG_ADDR)
11987 {
11988 fprintf (stderr,
11989 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
11990 "not valid in class\n",
11991 GET_MODE_NAME (mode), reg_class_names[rclass]);
11992 debug_rtx (addr);
11993 }
11994
11995 return -1;
11996 }
11997
11998 switch (GET_CODE (addr))
11999 {
12000 /* Does the register class supports auto update forms for this mode? We
12001 don't need a scratch register, since the powerpc only supports
12002 PRE_INC, PRE_DEC, and PRE_MODIFY. */
12003 case PRE_INC:
12004 case PRE_DEC:
12005 reg = XEXP (addr, 0);
12006 if (!base_reg_operand (addr, GET_MODE (reg)))
12007 {
12008 fail_msg = "no base register #1";
12009 extra_cost = -1;
12010 }
12011
12012 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
12013 {
12014 extra_cost = 1;
12015 type = "update";
12016 }
12017 break;
12018
12019 case PRE_MODIFY:
12020 reg = XEXP (addr, 0);
12021 plus_arg1 = XEXP (addr, 1);
12022 if (!base_reg_operand (reg, GET_MODE (reg))
12023 || GET_CODE (plus_arg1) != PLUS
12024 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
12025 {
12026 fail_msg = "bad PRE_MODIFY";
12027 extra_cost = -1;
12028 }
12029
12030 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
12031 {
12032 extra_cost = 1;
12033 type = "update";
12034 }
12035 break;
12036
12037 /* Do we need to simulate AND -16 to clear the bottom address bits used
12038 in VMX load/stores? Only allow the AND for vector sizes. */
12039 case AND:
12040 and_arg = XEXP (addr, 0);
12041 if (GET_MODE_SIZE (mode) != 16
12042 || !CONST_INT_P (XEXP (addr, 1))
12043 || INTVAL (XEXP (addr, 1)) != -16)
12044 {
12045 fail_msg = "bad Altivec AND #1";
12046 extra_cost = -1;
12047 }
12048
12049 if (rclass != ALTIVEC_REGS)
12050 {
12051 if (legitimate_indirect_address_p (and_arg, false))
12052 extra_cost = 1;
12053
12054 else if (legitimate_indexed_address_p (and_arg, false))
12055 extra_cost = 2;
12056
12057 else
12058 {
12059 fail_msg = "bad Altivec AND #2";
12060 extra_cost = -1;
12061 }
12062
12063 type = "and";
12064 }
12065 break;
12066
12067 /* If this is an indirect address, make sure it is a base register. */
12068 case REG:
12069 case SUBREG:
12070 if (!legitimate_indirect_address_p (addr, false))
12071 {
12072 extra_cost = 1;
12073 type = "move";
12074 }
12075 break;
12076
12077 /* If this is an indexed address, make sure the register class can handle
12078 indexed addresses for this mode. */
12079 case PLUS:
12080 plus_arg0 = XEXP (addr, 0);
12081 plus_arg1 = XEXP (addr, 1);
12082
12083 /* (plus (plus (reg) (constant)) (constant)) is generated during
12084 push_reload processing, so handle it now. */
12085 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
12086 {
12087 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12088 {
12089 extra_cost = 1;
12090 type = "offset";
12091 }
12092 }
12093
12094 /* (plus (plus (reg) (constant)) (reg)) is also generated during
12095 push_reload processing, so handle it now. */
12096 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
12097 {
12098 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
12099 {
12100 extra_cost = 1;
12101 type = "indexed #2";
12102 }
12103 }
12104
12105 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
12106 {
12107 fail_msg = "no base register #2";
12108 extra_cost = -1;
12109 }
12110
12111 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
12112 {
12113 if ((addr_mask & RELOAD_REG_INDEXED) == 0
12114 || !legitimate_indexed_address_p (addr, false))
12115 {
12116 extra_cost = 1;
12117 type = "indexed";
12118 }
12119 }
12120
12121 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0
12122 && CONST_INT_P (plus_arg1))
12123 {
12124 if (!quad_address_offset_p (INTVAL (plus_arg1)))
12125 {
12126 extra_cost = 1;
12127 type = "vector d-form offset";
12128 }
12129 }
12130
12131 /* Make sure the register class can handle offset addresses. */
12132 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
12133 {
12134 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12135 {
12136 extra_cost = 1;
12137 type = "offset #2";
12138 }
12139 }
12140
12141 else
12142 {
12143 fail_msg = "bad PLUS";
12144 extra_cost = -1;
12145 }
12146
12147 break;
12148
12149 case LO_SUM:
12150 /* Quad offsets are restricted and can't handle normal addresses. */
12151 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
12152 {
12153 extra_cost = -1;
12154 type = "vector d-form lo_sum";
12155 }
12156
12157 else if (!legitimate_lo_sum_address_p (mode, addr, false))
12158 {
12159 fail_msg = "bad LO_SUM";
12160 extra_cost = -1;
12161 }
12162
12163 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12164 {
12165 extra_cost = 1;
12166 type = "lo_sum";
12167 }
12168 break;
12169
12170 /* Static addresses need to create a TOC entry. */
12171 case CONST:
12172 case SYMBOL_REF:
12173 case LABEL_REF:
12174 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
12175 {
12176 extra_cost = -1;
12177 type = "vector d-form lo_sum #2";
12178 }
12179
12180 else
12181 {
12182 type = "address";
12183 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
12184 }
12185 break;
12186
12187 /* TOC references look like offsetable memory. */
12188 case UNSPEC:
12189 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
12190 {
12191 fail_msg = "bad UNSPEC";
12192 extra_cost = -1;
12193 }
12194
12195 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
12196 {
12197 extra_cost = -1;
12198 type = "vector d-form lo_sum #3";
12199 }
12200
12201 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12202 {
12203 extra_cost = 1;
12204 type = "toc reference";
12205 }
12206 break;
12207
12208 default:
12209 {
12210 fail_msg = "bad address";
12211 extra_cost = -1;
12212 }
12213 }
12214
12215 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
12216 {
12217 if (extra_cost < 0)
12218 fprintf (stderr,
12219 "rs6000_secondary_reload_memory error: mode = %s, "
12220 "class = %s, addr_mask = '%s', %s\n",
12221 GET_MODE_NAME (mode),
12222 reg_class_names[rclass],
12223 rs6000_debug_addr_mask (addr_mask, false),
12224 (fail_msg != NULL) ? fail_msg : "<bad address>");
12225
12226 else
12227 fprintf (stderr,
12228 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
12229 "addr_mask = '%s', extra cost = %d, %s\n",
12230 GET_MODE_NAME (mode),
12231 reg_class_names[rclass],
12232 rs6000_debug_addr_mask (addr_mask, false),
12233 extra_cost,
12234 (type) ? type : "<none>");
12235
12236 debug_rtx (addr);
12237 }
12238
12239 return extra_cost;
12240 }
12241
12242 /* Helper function for rs6000_secondary_reload to return true if a move to a
12243 different register classe is really a simple move. */
12244
12245 static bool
12246 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
12247 enum rs6000_reg_type from_type,
12248 machine_mode mode)
12249 {
12250 int size = GET_MODE_SIZE (mode);
12251
12252 /* Add support for various direct moves available. In this function, we only
12253 look at cases where we don't need any extra registers, and one or more
12254 simple move insns are issued. Originally small integers are not allowed
12255 in FPR/VSX registers. Single precision binary floating is not a simple
12256 move because we need to convert to the single precision memory layout.
12257 The 4-byte SDmode can be moved. TDmode values are disallowed since they
12258 need special direct move handling, which we do not support yet. */
12259 if (TARGET_DIRECT_MOVE
12260 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
12261 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
12262 {
12263 if (TARGET_POWERPC64)
12264 {
12265 /* ISA 2.07: MTVSRD or MVFVSRD. */
12266 if (size == 8)
12267 return true;
12268
12269 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
12270 if (size == 16 && TARGET_P9_VECTOR && mode != TDmode)
12271 return true;
12272 }
12273
12274 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
12275 if (TARGET_P8_VECTOR)
12276 {
12277 if (mode == SImode)
12278 return true;
12279
12280 if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode))
12281 return true;
12282 }
12283
12284 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
12285 if (mode == SDmode)
12286 return true;
12287 }
12288
12289 /* Move to/from SPR. */
12290 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
12291 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
12292 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
12293 return true;
12294
12295 return false;
12296 }
12297
12298 /* Direct move helper function for rs6000_secondary_reload, handle all of the
12299 special direct moves that involve allocating an extra register, return the
12300 insn code of the helper function if there is such a function or
12301 CODE_FOR_nothing if not. */
12302
12303 static bool
12304 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
12305 enum rs6000_reg_type from_type,
12306 machine_mode mode,
12307 secondary_reload_info *sri,
12308 bool altivec_p)
12309 {
12310 bool ret = false;
12311 enum insn_code icode = CODE_FOR_nothing;
12312 int cost = 0;
12313 int size = GET_MODE_SIZE (mode);
12314
12315 if (TARGET_POWERPC64 && size == 16)
12316 {
12317 /* Handle moving 128-bit values from GPRs to VSX point registers on
12318 ISA 2.07 (power8, power9) when running in 64-bit mode using
12319 XXPERMDI to glue the two 64-bit values back together. */
12320 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
12321 {
12322 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
12323 icode = reg_addr[mode].reload_vsx_gpr;
12324 }
12325
12326 /* Handle moving 128-bit values from VSX point registers to GPRs on
12327 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
12328 bottom 64-bit value. */
12329 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
12330 {
12331 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
12332 icode = reg_addr[mode].reload_gpr_vsx;
12333 }
12334 }
12335
12336 else if (TARGET_POWERPC64 && mode == SFmode)
12337 {
12338 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
12339 {
12340 cost = 3; /* xscvdpspn, mfvsrd, and. */
12341 icode = reg_addr[mode].reload_gpr_vsx;
12342 }
12343
12344 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
12345 {
12346 cost = 2; /* mtvsrz, xscvspdpn. */
12347 icode = reg_addr[mode].reload_vsx_gpr;
12348 }
12349 }
12350
12351 else if (!TARGET_POWERPC64 && size == 8)
12352 {
12353 /* Handle moving 64-bit values from GPRs to floating point registers on
12354 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
12355 32-bit values back together. Altivec register classes must be handled
12356 specially since a different instruction is used, and the secondary
12357 reload support requires a single instruction class in the scratch
12358 register constraint. However, right now TFmode is not allowed in
12359 Altivec registers, so the pattern will never match. */
12360 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
12361 {
12362 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
12363 icode = reg_addr[mode].reload_fpr_gpr;
12364 }
12365 }
12366
12367 if (icode != CODE_FOR_nothing)
12368 {
12369 ret = true;
12370 if (sri)
12371 {
12372 sri->icode = icode;
12373 sri->extra_cost = cost;
12374 }
12375 }
12376
12377 return ret;
12378 }
12379
12380 /* Return whether a move between two register classes can be done either
12381 directly (simple move) or via a pattern that uses a single extra temporary
12382 (using ISA 2.07's direct move in this case. */
12383
12384 static bool
12385 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
12386 enum rs6000_reg_type from_type,
12387 machine_mode mode,
12388 secondary_reload_info *sri,
12389 bool altivec_p)
12390 {
12391 /* Fall back to load/store reloads if either type is not a register. */
12392 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
12393 return false;
12394
12395 /* If we haven't allocated registers yet, assume the move can be done for the
12396 standard register types. */
12397 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
12398 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
12399 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
12400 return true;
12401
12402 /* Moves to the same set of registers is a simple move for non-specialized
12403 registers. */
12404 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
12405 return true;
12406
12407 /* Check whether a simple move can be done directly. */
12408 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
12409 {
12410 if (sri)
12411 {
12412 sri->icode = CODE_FOR_nothing;
12413 sri->extra_cost = 0;
12414 }
12415 return true;
12416 }
12417
12418 /* Now check if we can do it in a few steps. */
12419 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
12420 altivec_p);
12421 }
12422
12423 /* Inform reload about cases where moving X with a mode MODE to a register in
12424 RCLASS requires an extra scratch or immediate register. Return the class
12425 needed for the immediate register.
12426
12427 For VSX and Altivec, we may need a register to convert sp+offset into
12428 reg+sp.
12429
12430 For misaligned 64-bit gpr loads and stores we need a register to
12431 convert an offset address to indirect. */
12432
12433 static reg_class_t
12434 rs6000_secondary_reload (bool in_p,
12435 rtx x,
12436 reg_class_t rclass_i,
12437 machine_mode mode,
12438 secondary_reload_info *sri)
12439 {
12440 enum reg_class rclass = (enum reg_class) rclass_i;
12441 reg_class_t ret = ALL_REGS;
12442 enum insn_code icode;
12443 bool default_p = false;
12444 bool done_p = false;
12445
12446 /* Allow subreg of memory before/during reload. */
12447 bool memory_p = (MEM_P (x)
12448 || (!reload_completed && SUBREG_P (x)
12449 && MEM_P (SUBREG_REG (x))));
12450
12451 sri->icode = CODE_FOR_nothing;
12452 sri->t_icode = CODE_FOR_nothing;
12453 sri->extra_cost = 0;
12454 icode = ((in_p)
12455 ? reg_addr[mode].reload_load
12456 : reg_addr[mode].reload_store);
12457
12458 if (REG_P (x) || register_operand (x, mode))
12459 {
12460 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
12461 bool altivec_p = (rclass == ALTIVEC_REGS);
12462 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
12463
12464 if (!in_p)
12465 std::swap (to_type, from_type);
12466
12467 /* Can we do a direct move of some sort? */
12468 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
12469 altivec_p))
12470 {
12471 icode = (enum insn_code)sri->icode;
12472 default_p = false;
12473 done_p = true;
12474 ret = NO_REGS;
12475 }
12476 }
12477
12478 /* Make sure 0.0 is not reloaded or forced into memory. */
12479 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
12480 {
12481 ret = NO_REGS;
12482 default_p = false;
12483 done_p = true;
12484 }
12485
12486 /* If this is a scalar floating point value and we want to load it into the
12487 traditional Altivec registers, do it via a move via a traditional floating
12488 point register, unless we have D-form addressing. Also make sure that
12489 non-zero constants use a FPR. */
12490 if (!done_p && reg_addr[mode].scalar_in_vmx_p
12491 && !mode_supports_vmx_dform (mode)
12492 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
12493 && (memory_p || CONST_DOUBLE_P (x)))
12494 {
12495 ret = FLOAT_REGS;
12496 default_p = false;
12497 done_p = true;
12498 }
12499
12500 /* Handle reload of load/stores if we have reload helper functions. */
12501 if (!done_p && icode != CODE_FOR_nothing && memory_p)
12502 {
12503 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
12504 mode);
12505
12506 if (extra_cost >= 0)
12507 {
12508 done_p = true;
12509 ret = NO_REGS;
12510 if (extra_cost > 0)
12511 {
12512 sri->extra_cost = extra_cost;
12513 sri->icode = icode;
12514 }
12515 }
12516 }
12517
12518 /* Handle unaligned loads and stores of integer registers. */
12519 if (!done_p && TARGET_POWERPC64
12520 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
12521 && memory_p
12522 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
12523 {
12524 rtx addr = XEXP (x, 0);
12525 rtx off = address_offset (addr);
12526
12527 if (off != NULL_RTX)
12528 {
12529 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
12530 unsigned HOST_WIDE_INT offset = INTVAL (off);
12531
12532 /* We need a secondary reload when our legitimate_address_p
12533 says the address is good (as otherwise the entire address
12534 will be reloaded), and the offset is not a multiple of
12535 four or we have an address wrap. Address wrap will only
12536 occur for LO_SUMs since legitimate_offset_address_p
12537 rejects addresses for 16-byte mems that will wrap. */
12538 if (GET_CODE (addr) == LO_SUM
12539 ? (1 /* legitimate_address_p allows any offset for lo_sum */
12540 && ((offset & 3) != 0
12541 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
12542 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
12543 && (offset & 3) != 0))
12544 {
12545 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
12546 if (in_p)
12547 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
12548 : CODE_FOR_reload_di_load);
12549 else
12550 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
12551 : CODE_FOR_reload_di_store);
12552 sri->extra_cost = 2;
12553 ret = NO_REGS;
12554 done_p = true;
12555 }
12556 else
12557 default_p = true;
12558 }
12559 else
12560 default_p = true;
12561 }
12562
12563 if (!done_p && !TARGET_POWERPC64
12564 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
12565 && memory_p
12566 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
12567 {
12568 rtx addr = XEXP (x, 0);
12569 rtx off = address_offset (addr);
12570
12571 if (off != NULL_RTX)
12572 {
12573 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
12574 unsigned HOST_WIDE_INT offset = INTVAL (off);
12575
12576 /* We need a secondary reload when our legitimate_address_p
12577 says the address is good (as otherwise the entire address
12578 will be reloaded), and we have a wrap.
12579
12580 legitimate_lo_sum_address_p allows LO_SUM addresses to
12581 have any offset so test for wrap in the low 16 bits.
12582
12583 legitimate_offset_address_p checks for the range
12584 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
12585 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
12586 [0x7ff4,0x7fff] respectively, so test for the
12587 intersection of these ranges, [0x7ffc,0x7fff] and
12588 [0x7ff4,0x7ff7] respectively.
12589
12590 Note that the address we see here may have been
12591 manipulated by legitimize_reload_address. */
12592 if (GET_CODE (addr) == LO_SUM
12593 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
12594 : offset - (0x8000 - extra) < UNITS_PER_WORD)
12595 {
12596 if (in_p)
12597 sri->icode = CODE_FOR_reload_si_load;
12598 else
12599 sri->icode = CODE_FOR_reload_si_store;
12600 sri->extra_cost = 2;
12601 ret = NO_REGS;
12602 done_p = true;
12603 }
12604 else
12605 default_p = true;
12606 }
12607 else
12608 default_p = true;
12609 }
12610
12611 if (!done_p)
12612 default_p = true;
12613
12614 if (default_p)
12615 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
12616
12617 gcc_assert (ret != ALL_REGS);
12618
12619 if (TARGET_DEBUG_ADDR)
12620 {
12621 fprintf (stderr,
12622 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
12623 "mode = %s",
12624 reg_class_names[ret],
12625 in_p ? "true" : "false",
12626 reg_class_names[rclass],
12627 GET_MODE_NAME (mode));
12628
12629 if (reload_completed)
12630 fputs (", after reload", stderr);
12631
12632 if (!done_p)
12633 fputs (", done_p not set", stderr);
12634
12635 if (default_p)
12636 fputs (", default secondary reload", stderr);
12637
12638 if (sri->icode != CODE_FOR_nothing)
12639 fprintf (stderr, ", reload func = %s, extra cost = %d",
12640 insn_data[sri->icode].name, sri->extra_cost);
12641
12642 else if (sri->extra_cost > 0)
12643 fprintf (stderr, ", extra cost = %d", sri->extra_cost);
12644
12645 fputs ("\n", stderr);
12646 debug_rtx (x);
12647 }
12648
12649 return ret;
12650 }
12651
12652 /* Better tracing for rs6000_secondary_reload_inner. */
12653
12654 static void
12655 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
12656 bool store_p)
12657 {
12658 rtx set, clobber;
12659
12660 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
12661
12662 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
12663 store_p ? "store" : "load");
12664
12665 if (store_p)
12666 set = gen_rtx_SET (mem, reg);
12667 else
12668 set = gen_rtx_SET (reg, mem);
12669
12670 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
12671 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
12672 }
12673
12674 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
12675 ATTRIBUTE_NORETURN;
12676
12677 static void
12678 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
12679 bool store_p)
12680 {
12681 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
12682 gcc_unreachable ();
12683 }
12684
12685 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
12686 reload helper functions. These were identified in
12687 rs6000_secondary_reload_memory, and if reload decided to use the secondary
12688 reload, it calls the insns:
12689 reload_<RELOAD:mode>_<P:mptrsize>_store
12690 reload_<RELOAD:mode>_<P:mptrsize>_load
12691
12692 which in turn calls this function, to do whatever is necessary to create
12693 valid addresses. */
12694
12695 void
12696 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
12697 {
12698 int regno = true_regnum (reg);
12699 machine_mode mode = GET_MODE (reg);
12700 addr_mask_type addr_mask;
12701 rtx addr;
12702 rtx new_addr;
12703 rtx op_reg, op0, op1;
12704 rtx and_op;
12705 rtx cc_clobber;
12706 rtvec rv;
12707
12708 if (regno < 0 || !HARD_REGISTER_NUM_P (regno) || !MEM_P (mem)
12709 || !base_reg_operand (scratch, GET_MODE (scratch)))
12710 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12711
12712 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
12713 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
12714
12715 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
12716 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
12717
12718 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
12719 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
12720
12721 else
12722 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12723
12724 /* Make sure the mode is valid in this register class. */
12725 if ((addr_mask & RELOAD_REG_VALID) == 0)
12726 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12727
12728 if (TARGET_DEBUG_ADDR)
12729 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
12730
12731 new_addr = addr = XEXP (mem, 0);
12732 switch (GET_CODE (addr))
12733 {
12734 /* Does the register class support auto update forms for this mode? If
12735 not, do the update now. We don't need a scratch register, since the
12736 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
12737 case PRE_INC:
12738 case PRE_DEC:
12739 op_reg = XEXP (addr, 0);
12740 if (!base_reg_operand (op_reg, Pmode))
12741 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12742
12743 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
12744 {
12745 int delta = GET_MODE_SIZE (mode);
12746 if (GET_CODE (addr) == PRE_DEC)
12747 delta = -delta;
12748 emit_insn (gen_add2_insn (op_reg, GEN_INT (delta)));
12749 new_addr = op_reg;
12750 }
12751 break;
12752
12753 case PRE_MODIFY:
12754 op0 = XEXP (addr, 0);
12755 op1 = XEXP (addr, 1);
12756 if (!base_reg_operand (op0, Pmode)
12757 || GET_CODE (op1) != PLUS
12758 || !rtx_equal_p (op0, XEXP (op1, 0)))
12759 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12760
12761 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
12762 {
12763 emit_insn (gen_rtx_SET (op0, op1));
12764 new_addr = reg;
12765 }
12766 break;
12767
12768 /* Do we need to simulate AND -16 to clear the bottom address bits used
12769 in VMX load/stores? */
12770 case AND:
12771 op0 = XEXP (addr, 0);
12772 op1 = XEXP (addr, 1);
12773 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
12774 {
12775 if (REG_P (op0) || SUBREG_P (op0))
12776 op_reg = op0;
12777
12778 else if (GET_CODE (op1) == PLUS)
12779 {
12780 emit_insn (gen_rtx_SET (scratch, op1));
12781 op_reg = scratch;
12782 }
12783
12784 else
12785 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12786
12787 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
12788 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
12789 rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
12790 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
12791 new_addr = scratch;
12792 }
12793 break;
12794
12795 /* If this is an indirect address, make sure it is a base register. */
12796 case REG:
12797 case SUBREG:
12798 if (!base_reg_operand (addr, GET_MODE (addr)))
12799 {
12800 emit_insn (gen_rtx_SET (scratch, addr));
12801 new_addr = scratch;
12802 }
12803 break;
12804
12805 /* If this is an indexed address, make sure the register class can handle
12806 indexed addresses for this mode. */
12807 case PLUS:
12808 op0 = XEXP (addr, 0);
12809 op1 = XEXP (addr, 1);
12810 if (!base_reg_operand (op0, Pmode))
12811 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12812
12813 else if (int_reg_operand (op1, Pmode))
12814 {
12815 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
12816 {
12817 emit_insn (gen_rtx_SET (scratch, addr));
12818 new_addr = scratch;
12819 }
12820 }
12821
12822 else if (mode_supports_dq_form (mode) && CONST_INT_P (op1))
12823 {
12824 if (((addr_mask & RELOAD_REG_QUAD_OFFSET) == 0)
12825 || !quad_address_p (addr, mode, false))
12826 {
12827 emit_insn (gen_rtx_SET (scratch, addr));
12828 new_addr = scratch;
12829 }
12830 }
12831
12832 /* Make sure the register class can handle offset addresses. */
12833 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
12834 {
12835 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12836 {
12837 emit_insn (gen_rtx_SET (scratch, addr));
12838 new_addr = scratch;
12839 }
12840 }
12841
12842 else
12843 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12844
12845 break;
12846
12847 case LO_SUM:
12848 op0 = XEXP (addr, 0);
12849 op1 = XEXP (addr, 1);
12850 if (!base_reg_operand (op0, Pmode))
12851 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12852
12853 else if (int_reg_operand (op1, Pmode))
12854 {
12855 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
12856 {
12857 emit_insn (gen_rtx_SET (scratch, addr));
12858 new_addr = scratch;
12859 }
12860 }
12861
12862 /* Quad offsets are restricted and can't handle normal addresses. */
12863 else if (mode_supports_dq_form (mode))
12864 {
12865 emit_insn (gen_rtx_SET (scratch, addr));
12866 new_addr = scratch;
12867 }
12868
12869 /* Make sure the register class can handle offset addresses. */
12870 else if (legitimate_lo_sum_address_p (mode, addr, false))
12871 {
12872 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12873 {
12874 emit_insn (gen_rtx_SET (scratch, addr));
12875 new_addr = scratch;
12876 }
12877 }
12878
12879 else
12880 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12881
12882 break;
12883
12884 case SYMBOL_REF:
12885 case CONST:
12886 case LABEL_REF:
12887 rs6000_emit_move (scratch, addr, Pmode);
12888 new_addr = scratch;
12889 break;
12890
12891 default:
12892 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12893 }
12894
12895 /* Adjust the address if it changed. */
12896 if (addr != new_addr)
12897 {
12898 mem = replace_equiv_address_nv (mem, new_addr);
12899 if (TARGET_DEBUG_ADDR)
12900 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
12901 }
12902
12903 /* Now create the move. */
12904 if (store_p)
12905 emit_insn (gen_rtx_SET (mem, reg));
12906 else
12907 emit_insn (gen_rtx_SET (reg, mem));
12908
12909 return;
12910 }
12911
12912 /* Convert reloads involving 64-bit gprs and misaligned offset
12913 addressing, or multiple 32-bit gprs and offsets that are too large,
12914 to use indirect addressing. */
12915
12916 void
12917 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
12918 {
12919 int regno = true_regnum (reg);
12920 enum reg_class rclass;
12921 rtx addr;
12922 rtx scratch_or_premodify = scratch;
12923
12924 if (TARGET_DEBUG_ADDR)
12925 {
12926 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
12927 store_p ? "store" : "load");
12928 fprintf (stderr, "reg:\n");
12929 debug_rtx (reg);
12930 fprintf (stderr, "mem:\n");
12931 debug_rtx (mem);
12932 fprintf (stderr, "scratch:\n");
12933 debug_rtx (scratch);
12934 }
12935
12936 gcc_assert (regno >= 0 && HARD_REGISTER_NUM_P (regno));
12937 gcc_assert (MEM_P (mem));
12938 rclass = REGNO_REG_CLASS (regno);
12939 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
12940 addr = XEXP (mem, 0);
12941
12942 if (GET_CODE (addr) == PRE_MODIFY)
12943 {
12944 gcc_assert (REG_P (XEXP (addr, 0))
12945 && GET_CODE (XEXP (addr, 1)) == PLUS
12946 && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0));
12947 scratch_or_premodify = XEXP (addr, 0);
12948 addr = XEXP (addr, 1);
12949 }
12950 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
12951
12952 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
12953
12954 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
12955
12956 /* Now create the move. */
12957 if (store_p)
12958 emit_insn (gen_rtx_SET (mem, reg));
12959 else
12960 emit_insn (gen_rtx_SET (reg, mem));
12961
12962 return;
12963 }
12964
12965 /* Given an rtx X being reloaded into a reg required to be
12966 in class CLASS, return the class of reg to actually use.
12967 In general this is just CLASS; but on some machines
12968 in some cases it is preferable to use a more restrictive class.
12969
12970 On the RS/6000, we have to return NO_REGS when we want to reload a
12971 floating-point CONST_DOUBLE to force it to be copied to memory.
12972
12973 We also don't want to reload integer values into floating-point
12974 registers if we can at all help it. In fact, this can
12975 cause reload to die, if it tries to generate a reload of CTR
12976 into a FP register and discovers it doesn't have the memory location
12977 required.
12978
12979 ??? Would it be a good idea to have reload do the converse, that is
12980 try to reload floating modes into FP registers if possible?
12981 */
12982
12983 static enum reg_class
12984 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
12985 {
12986 machine_mode mode = GET_MODE (x);
12987 bool is_constant = CONSTANT_P (x);
12988
12989 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
12990 reload class for it. */
12991 if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS)
12992 && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0)
12993 return NO_REGS;
12994
12995 if ((rclass == FLOAT_REGS || rclass == VSX_REGS)
12996 && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
12997 return NO_REGS;
12998
12999 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
13000 the reloading of address expressions using PLUS into floating point
13001 registers. */
13002 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
13003 {
13004 if (is_constant)
13005 {
13006 /* Zero is always allowed in all VSX registers. */
13007 if (x == CONST0_RTX (mode))
13008 return rclass;
13009
13010 /* If this is a vector constant that can be formed with a few Altivec
13011 instructions, we want altivec registers. */
13012 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
13013 return ALTIVEC_REGS;
13014
13015 /* If this is an integer constant that can easily be loaded into
13016 vector registers, allow it. */
13017 if (CONST_INT_P (x))
13018 {
13019 HOST_WIDE_INT value = INTVAL (x);
13020
13021 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
13022 2.06 can generate it in the Altivec registers with
13023 VSPLTI<x>. */
13024 if (value == -1)
13025 {
13026 if (TARGET_P8_VECTOR)
13027 return rclass;
13028 else if (rclass == ALTIVEC_REGS || rclass == VSX_REGS)
13029 return ALTIVEC_REGS;
13030 else
13031 return NO_REGS;
13032 }
13033
13034 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
13035 a sign extend in the Altivec registers. */
13036 if (IN_RANGE (value, -128, 127) && TARGET_P9_VECTOR
13037 && (rclass == ALTIVEC_REGS || rclass == VSX_REGS))
13038 return ALTIVEC_REGS;
13039 }
13040
13041 /* Force constant to memory. */
13042 return NO_REGS;
13043 }
13044
13045 /* D-form addressing can easily reload the value. */
13046 if (mode_supports_vmx_dform (mode)
13047 || mode_supports_dq_form (mode))
13048 return rclass;
13049
13050 /* If this is a scalar floating point value and we don't have D-form
13051 addressing, prefer the traditional floating point registers so that we
13052 can use D-form (register+offset) addressing. */
13053 if (rclass == VSX_REGS
13054 && (mode == SFmode || GET_MODE_SIZE (mode) == 8))
13055 return FLOAT_REGS;
13056
13057 /* Prefer the Altivec registers if Altivec is handling the vector
13058 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
13059 loads. */
13060 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
13061 || mode == V1TImode)
13062 return ALTIVEC_REGS;
13063
13064 return rclass;
13065 }
13066
13067 if (is_constant || GET_CODE (x) == PLUS)
13068 {
13069 if (reg_class_subset_p (GENERAL_REGS, rclass))
13070 return GENERAL_REGS;
13071 if (reg_class_subset_p (BASE_REGS, rclass))
13072 return BASE_REGS;
13073 return NO_REGS;
13074 }
13075
13076 /* For the vector pair and vector quad modes, prefer their natural register
13077 (VSX or FPR) rather than GPR registers. For other integer types, prefer
13078 the GPR registers. */
13079 if (rclass == GEN_OR_FLOAT_REGS)
13080 {
13081 if (mode == OOmode)
13082 return VSX_REGS;
13083
13084 if (mode == XOmode)
13085 return FLOAT_REGS;
13086
13087 if (GET_MODE_CLASS (mode) == MODE_INT)
13088 return GENERAL_REGS;
13089 }
13090
13091 return rclass;
13092 }
13093
13094 /* Debug version of rs6000_preferred_reload_class. */
13095 static enum reg_class
13096 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
13097 {
13098 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
13099
13100 fprintf (stderr,
13101 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
13102 "mode = %s, x:\n",
13103 reg_class_names[ret], reg_class_names[rclass],
13104 GET_MODE_NAME (GET_MODE (x)));
13105 debug_rtx (x);
13106
13107 return ret;
13108 }
13109
13110 /* If we are copying between FP or AltiVec registers and anything else, we need
13111 a memory location. The exception is when we are targeting ppc64 and the
13112 move to/from fpr to gpr instructions are available. Also, under VSX, you
13113 can copy vector registers from the FP register set to the Altivec register
13114 set and vice versa. */
13115
13116 static bool
13117 rs6000_secondary_memory_needed (machine_mode mode,
13118 reg_class_t from_class,
13119 reg_class_t to_class)
13120 {
13121 enum rs6000_reg_type from_type, to_type;
13122 bool altivec_p = ((from_class == ALTIVEC_REGS)
13123 || (to_class == ALTIVEC_REGS));
13124
13125 /* If a simple/direct move is available, we don't need secondary memory */
13126 from_type = reg_class_to_reg_type[(int)from_class];
13127 to_type = reg_class_to_reg_type[(int)to_class];
13128
13129 if (rs6000_secondary_reload_move (to_type, from_type, mode,
13130 (secondary_reload_info *)0, altivec_p))
13131 return false;
13132
13133 /* If we have a floating point or vector register class, we need to use
13134 memory to transfer the data. */
13135 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
13136 return true;
13137
13138 return false;
13139 }
13140
13141 /* Debug version of rs6000_secondary_memory_needed. */
13142 static bool
13143 rs6000_debug_secondary_memory_needed (machine_mode mode,
13144 reg_class_t from_class,
13145 reg_class_t to_class)
13146 {
13147 bool ret = rs6000_secondary_memory_needed (mode, from_class, to_class);
13148
13149 fprintf (stderr,
13150 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
13151 "to_class = %s, mode = %s\n",
13152 ret ? "true" : "false",
13153 reg_class_names[from_class],
13154 reg_class_names[to_class],
13155 GET_MODE_NAME (mode));
13156
13157 return ret;
13158 }
13159
13160 /* Return the register class of a scratch register needed to copy IN into
13161 or out of a register in RCLASS in MODE. If it can be done directly,
13162 NO_REGS is returned. */
13163
13164 static enum reg_class
13165 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
13166 rtx in)
13167 {
13168 int regno;
13169
13170 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
13171 #if TARGET_MACHO
13172 && MACHOPIC_INDIRECT
13173 #endif
13174 ))
13175 {
13176 /* We cannot copy a symbolic operand directly into anything
13177 other than BASE_REGS for TARGET_ELF. So indicate that a
13178 register from BASE_REGS is needed as an intermediate
13179 register.
13180
13181 On Darwin, pic addresses require a load from memory, which
13182 needs a base register. */
13183 if (rclass != BASE_REGS
13184 && (SYMBOL_REF_P (in)
13185 || GET_CODE (in) == HIGH
13186 || GET_CODE (in) == LABEL_REF
13187 || GET_CODE (in) == CONST))
13188 return BASE_REGS;
13189 }
13190
13191 if (REG_P (in))
13192 {
13193 regno = REGNO (in);
13194 if (!HARD_REGISTER_NUM_P (regno))
13195 {
13196 regno = true_regnum (in);
13197 if (!HARD_REGISTER_NUM_P (regno))
13198 regno = -1;
13199 }
13200 }
13201 else if (SUBREG_P (in))
13202 {
13203 regno = true_regnum (in);
13204 if (!HARD_REGISTER_NUM_P (regno))
13205 regno = -1;
13206 }
13207 else
13208 regno = -1;
13209
13210 /* If we have VSX register moves, prefer moving scalar values between
13211 Altivec registers and GPR by going via an FPR (and then via memory)
13212 instead of reloading the secondary memory address for Altivec moves. */
13213 if (TARGET_VSX
13214 && GET_MODE_SIZE (mode) < 16
13215 && !mode_supports_vmx_dform (mode)
13216 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
13217 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
13218 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
13219 && (regno >= 0 && INT_REGNO_P (regno)))))
13220 return FLOAT_REGS;
13221
13222 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
13223 into anything. */
13224 if (rclass == GENERAL_REGS || rclass == BASE_REGS
13225 || (regno >= 0 && INT_REGNO_P (regno)))
13226 return NO_REGS;
13227
13228 /* Constants, memory, and VSX registers can go into VSX registers (both the
13229 traditional floating point and the altivec registers). */
13230 if (rclass == VSX_REGS
13231 && (regno == -1 || VSX_REGNO_P (regno)))
13232 return NO_REGS;
13233
13234 /* Constants, memory, and FP registers can go into FP registers. */
13235 if ((regno == -1 || FP_REGNO_P (regno))
13236 && (rclass == FLOAT_REGS || rclass == GEN_OR_FLOAT_REGS))
13237 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
13238
13239 /* Memory, and AltiVec registers can go into AltiVec registers. */
13240 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
13241 && rclass == ALTIVEC_REGS)
13242 return NO_REGS;
13243
13244 /* We can copy among the CR registers. */
13245 if ((rclass == CR_REGS || rclass == CR0_REGS)
13246 && regno >= 0 && CR_REGNO_P (regno))
13247 return NO_REGS;
13248
13249 /* Otherwise, we need GENERAL_REGS. */
13250 return GENERAL_REGS;
13251 }
13252
13253 /* Debug version of rs6000_secondary_reload_class. */
13254 static enum reg_class
13255 rs6000_debug_secondary_reload_class (enum reg_class rclass,
13256 machine_mode mode, rtx in)
13257 {
13258 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
13259 fprintf (stderr,
13260 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
13261 "mode = %s, input rtx:\n",
13262 reg_class_names[ret], reg_class_names[rclass],
13263 GET_MODE_NAME (mode));
13264 debug_rtx (in);
13265
13266 return ret;
13267 }
13268
13269 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
13270
13271 static bool
13272 rs6000_can_change_mode_class (machine_mode from,
13273 machine_mode to,
13274 reg_class_t rclass)
13275 {
13276 unsigned from_size = GET_MODE_SIZE (from);
13277 unsigned to_size = GET_MODE_SIZE (to);
13278
13279 if (from_size != to_size)
13280 {
13281 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
13282
13283 if (reg_classes_intersect_p (xclass, rclass))
13284 {
13285 unsigned to_nregs = hard_regno_nregs (FIRST_FPR_REGNO, to);
13286 unsigned from_nregs = hard_regno_nregs (FIRST_FPR_REGNO, from);
13287 bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
13288 bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
13289
13290 /* Don't allow 64-bit types to overlap with 128-bit types that take a
13291 single register under VSX because the scalar part of the register
13292 is in the upper 64-bits, and not the lower 64-bits. Types like
13293 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
13294 IEEE floating point can't overlap, and neither can small
13295 values. */
13296
13297 if (to_float128_vector_p && from_float128_vector_p)
13298 return true;
13299
13300 else if (to_float128_vector_p || from_float128_vector_p)
13301 return false;
13302
13303 /* TDmode in floating-mode registers must always go into a register
13304 pair with the most significant word in the even-numbered register
13305 to match ISA requirements. In little-endian mode, this does not
13306 match subreg numbering, so we cannot allow subregs. */
13307 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
13308 return false;
13309
13310 /* Allow SD<->DD changes, since SDmode values are stored in
13311 the low half of the DDmode, just like target-independent
13312 code expects. We need to allow at least SD->DD since
13313 rs6000_secondary_memory_needed_mode asks for that change
13314 to be made for SD reloads. */
13315 if ((to == DDmode && from == SDmode)
13316 || (to == SDmode && from == DDmode))
13317 return true;
13318
13319 if (from_size < 8 || to_size < 8)
13320 return false;
13321
13322 if (from_size == 8 && (8 * to_nregs) != to_size)
13323 return false;
13324
13325 if (to_size == 8 && (8 * from_nregs) != from_size)
13326 return false;
13327
13328 return true;
13329 }
13330 else
13331 return true;
13332 }
13333
13334 /* Since the VSX register set includes traditional floating point registers
13335 and altivec registers, just check for the size being different instead of
13336 trying to check whether the modes are vector modes. Otherwise it won't
13337 allow say DF and DI to change classes. For types like TFmode and TDmode
13338 that take 2 64-bit registers, rather than a single 128-bit register, don't
13339 allow subregs of those types to other 128 bit types. */
13340 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
13341 {
13342 unsigned num_regs = (from_size + 15) / 16;
13343 if (hard_regno_nregs (FIRST_FPR_REGNO, to) > num_regs
13344 || hard_regno_nregs (FIRST_FPR_REGNO, from) > num_regs)
13345 return false;
13346
13347 return (from_size == 8 || from_size == 16);
13348 }
13349
13350 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
13351 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
13352 return false;
13353
13354 return true;
13355 }
13356
13357 /* Debug version of rs6000_can_change_mode_class. */
13358 static bool
13359 rs6000_debug_can_change_mode_class (machine_mode from,
13360 machine_mode to,
13361 reg_class_t rclass)
13362 {
13363 bool ret = rs6000_can_change_mode_class (from, to, rclass);
13364
13365 fprintf (stderr,
13366 "rs6000_can_change_mode_class, return %s, from = %s, "
13367 "to = %s, rclass = %s\n",
13368 ret ? "true" : "false",
13369 GET_MODE_NAME (from), GET_MODE_NAME (to),
13370 reg_class_names[rclass]);
13371
13372 return ret;
13373 }
13374 \f
13375 /* Return a string to do a move operation of 128 bits of data. */
13376
13377 const char *
13378 rs6000_output_move_128bit (rtx operands[])
13379 {
13380 rtx dest = operands[0];
13381 rtx src = operands[1];
13382 machine_mode mode = GET_MODE (dest);
13383 int dest_regno;
13384 int src_regno;
13385 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
13386 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
13387
13388 if (REG_P (dest))
13389 {
13390 dest_regno = REGNO (dest);
13391 dest_gpr_p = INT_REGNO_P (dest_regno);
13392 dest_fp_p = FP_REGNO_P (dest_regno);
13393 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
13394 dest_vsx_p = dest_fp_p | dest_vmx_p;
13395 }
13396 else
13397 {
13398 dest_regno = -1;
13399 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
13400 }
13401
13402 if (REG_P (src))
13403 {
13404 src_regno = REGNO (src);
13405 src_gpr_p = INT_REGNO_P (src_regno);
13406 src_fp_p = FP_REGNO_P (src_regno);
13407 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
13408 src_vsx_p = src_fp_p | src_vmx_p;
13409 }
13410 else
13411 {
13412 src_regno = -1;
13413 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
13414 }
13415
13416 /* Register moves. */
13417 if (dest_regno >= 0 && src_regno >= 0)
13418 {
13419 if (dest_gpr_p)
13420 {
13421 if (src_gpr_p)
13422 return "#";
13423
13424 if (TARGET_DIRECT_MOVE_128 && src_vsx_p)
13425 return (WORDS_BIG_ENDIAN
13426 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
13427 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
13428
13429 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
13430 return "#";
13431 }
13432
13433 else if (TARGET_VSX && dest_vsx_p)
13434 {
13435 if (src_vsx_p)
13436 return "xxlor %x0,%x1,%x1";
13437
13438 else if (TARGET_DIRECT_MOVE_128 && src_gpr_p)
13439 return (WORDS_BIG_ENDIAN
13440 ? "mtvsrdd %x0,%1,%L1"
13441 : "mtvsrdd %x0,%L1,%1");
13442
13443 else if (TARGET_DIRECT_MOVE && src_gpr_p)
13444 return "#";
13445 }
13446
13447 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
13448 return "vor %0,%1,%1";
13449
13450 else if (dest_fp_p && src_fp_p)
13451 return "#";
13452 }
13453
13454 /* Loads. */
13455 else if (dest_regno >= 0 && MEM_P (src))
13456 {
13457 if (dest_gpr_p)
13458 {
13459 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
13460 return "lq %0,%1";
13461 else
13462 return "#";
13463 }
13464
13465 else if (TARGET_ALTIVEC && dest_vmx_p
13466 && altivec_indexed_or_indirect_operand (src, mode))
13467 return "lvx %0,%y1";
13468
13469 else if (TARGET_VSX && dest_vsx_p)
13470 {
13471 if (mode_supports_dq_form (mode)
13472 && quad_address_p (XEXP (src, 0), mode, true))
13473 return "lxv %x0,%1";
13474
13475 else if (TARGET_P9_VECTOR)
13476 return "lxvx %x0,%y1";
13477
13478 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
13479 return "lxvw4x %x0,%y1";
13480
13481 else
13482 return "lxvd2x %x0,%y1";
13483 }
13484
13485 else if (TARGET_ALTIVEC && dest_vmx_p)
13486 return "lvx %0,%y1";
13487
13488 else if (dest_fp_p)
13489 return "#";
13490 }
13491
13492 /* Stores. */
13493 else if (src_regno >= 0 && MEM_P (dest))
13494 {
13495 if (src_gpr_p)
13496 {
13497 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
13498 return "stq %1,%0";
13499 else
13500 return "#";
13501 }
13502
13503 else if (TARGET_ALTIVEC && src_vmx_p
13504 && altivec_indexed_or_indirect_operand (dest, mode))
13505 return "stvx %1,%y0";
13506
13507 else if (TARGET_VSX && src_vsx_p)
13508 {
13509 if (mode_supports_dq_form (mode)
13510 && quad_address_p (XEXP (dest, 0), mode, true))
13511 return "stxv %x1,%0";
13512
13513 else if (TARGET_P9_VECTOR)
13514 return "stxvx %x1,%y0";
13515
13516 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
13517 return "stxvw4x %x1,%y0";
13518
13519 else
13520 return "stxvd2x %x1,%y0";
13521 }
13522
13523 else if (TARGET_ALTIVEC && src_vmx_p)
13524 return "stvx %1,%y0";
13525
13526 else if (src_fp_p)
13527 return "#";
13528 }
13529
13530 /* Constants. */
13531 else if (dest_regno >= 0
13532 && (CONST_INT_P (src)
13533 || CONST_WIDE_INT_P (src)
13534 || CONST_DOUBLE_P (src)
13535 || GET_CODE (src) == CONST_VECTOR))
13536 {
13537 if (dest_gpr_p)
13538 return "#";
13539
13540 else if ((dest_vmx_p && TARGET_ALTIVEC)
13541 || (dest_vsx_p && TARGET_VSX))
13542 return output_vec_const_move (operands);
13543 }
13544
13545 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
13546 }
13547
13548 /* Validate a 128-bit move. */
13549 bool
13550 rs6000_move_128bit_ok_p (rtx operands[])
13551 {
13552 machine_mode mode = GET_MODE (operands[0]);
13553 return (gpc_reg_operand (operands[0], mode)
13554 || gpc_reg_operand (operands[1], mode));
13555 }
13556
13557 /* Return true if a 128-bit move needs to be split. */
13558 bool
13559 rs6000_split_128bit_ok_p (rtx operands[])
13560 {
13561 if (!reload_completed)
13562 return false;
13563
13564 if (!gpr_or_gpr_p (operands[0], operands[1]))
13565 return false;
13566
13567 if (quad_load_store_p (operands[0], operands[1]))
13568 return false;
13569
13570 return true;
13571 }
13572
13573 \f
13574 /* Given a comparison operation, return the bit number in CCR to test. We
13575 know this is a valid comparison.
13576
13577 SCC_P is 1 if this is for an scc. That means that %D will have been
13578 used instead of %C, so the bits will be in different places.
13579
13580 Return -1 if OP isn't a valid comparison for some reason. */
13581
13582 int
13583 ccr_bit (rtx op, int scc_p)
13584 {
13585 enum rtx_code code = GET_CODE (op);
13586 machine_mode cc_mode;
13587 int cc_regnum;
13588 int base_bit;
13589 rtx reg;
13590
13591 if (!COMPARISON_P (op))
13592 return -1;
13593
13594 reg = XEXP (op, 0);
13595
13596 if (!REG_P (reg) || !CR_REGNO_P (REGNO (reg)))
13597 return -1;
13598
13599 cc_mode = GET_MODE (reg);
13600 cc_regnum = REGNO (reg);
13601 base_bit = 4 * (cc_regnum - CR0_REGNO);
13602
13603 validate_condition_mode (code, cc_mode);
13604
13605 /* When generating a sCOND operation, only positive conditions are
13606 allowed. */
13607 if (scc_p)
13608 switch (code)
13609 {
13610 case EQ:
13611 case GT:
13612 case LT:
13613 case UNORDERED:
13614 case GTU:
13615 case LTU:
13616 break;
13617 default:
13618 return -1;
13619 }
13620
13621 switch (code)
13622 {
13623 case NE:
13624 return scc_p ? base_bit + 3 : base_bit + 2;
13625 case EQ:
13626 return base_bit + 2;
13627 case GT: case GTU: case UNLE:
13628 return base_bit + 1;
13629 case LT: case LTU: case UNGE:
13630 return base_bit;
13631 case ORDERED: case UNORDERED:
13632 return base_bit + 3;
13633
13634 case GE: case GEU:
13635 /* If scc, we will have done a cror to put the bit in the
13636 unordered position. So test that bit. For integer, this is ! LT
13637 unless this is an scc insn. */
13638 return scc_p ? base_bit + 3 : base_bit;
13639
13640 case LE: case LEU:
13641 return scc_p ? base_bit + 3 : base_bit + 1;
13642
13643 default:
13644 return -1;
13645 }
13646 }
13647 \f
13648 /* Return the GOT register. */
13649
13650 rtx
13651 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
13652 {
13653 /* The second flow pass currently (June 1999) can't update
13654 regs_ever_live without disturbing other parts of the compiler, so
13655 update it here to make the prolog/epilogue code happy. */
13656 if (!can_create_pseudo_p ()
13657 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
13658 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
13659
13660 crtl->uses_pic_offset_table = 1;
13661
13662 return pic_offset_table_rtx;
13663 }
13664 \f
13665 #define INT_P(X) (CONST_INT_P (X) && GET_MODE (X) == VOIDmode)
13666
13667 /* Write out a function code label. */
13668
13669 void
13670 rs6000_output_function_entry (FILE *file, const char *fname)
13671 {
13672 if (fname[0] != '.')
13673 {
13674 switch (DEFAULT_ABI)
13675 {
13676 default:
13677 gcc_unreachable ();
13678
13679 case ABI_AIX:
13680 if (DOT_SYMBOLS)
13681 putc ('.', file);
13682 else
13683 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
13684 break;
13685
13686 case ABI_ELFv2:
13687 case ABI_V4:
13688 case ABI_DARWIN:
13689 break;
13690 }
13691 }
13692
13693 RS6000_OUTPUT_BASENAME (file, fname);
13694 }
13695
13696 /* Print an operand. Recognize special options, documented below. */
13697
13698 #if TARGET_ELF
13699 /* Access to .sdata2 through r2 (see -msdata=eabi in invoke.texi) is
13700 only introduced by the linker, when applying the sda21
13701 relocation. */
13702 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
13703 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
13704 #else
13705 #define SMALL_DATA_RELOC "sda21"
13706 #define SMALL_DATA_REG 0
13707 #endif
13708
13709 void
13710 print_operand (FILE *file, rtx x, int code)
13711 {
13712 int i;
13713 unsigned HOST_WIDE_INT uval;
13714
13715 switch (code)
13716 {
13717 /* %a is output_address. */
13718
13719 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
13720 output_operand. */
13721
13722 case 'A':
13723 /* Write the MMA accumulator number associated with VSX register X. */
13724 if (!REG_P (x) || !FP_REGNO_P (REGNO (x)) || (REGNO (x) % 4) != 0)
13725 output_operand_lossage ("invalid %%A value");
13726 else
13727 fprintf (file, "%d", (REGNO (x) - FIRST_FPR_REGNO) / 4);
13728 return;
13729
13730 case 'D':
13731 /* Like 'J' but get to the GT bit only. */
13732 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13733 {
13734 output_operand_lossage ("invalid %%D value");
13735 return;
13736 }
13737
13738 /* Bit 1 is GT bit. */
13739 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
13740
13741 /* Add one for shift count in rlinm for scc. */
13742 fprintf (file, "%d", i + 1);
13743 return;
13744
13745 case 'e':
13746 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
13747 if (! INT_P (x))
13748 {
13749 output_operand_lossage ("invalid %%e value");
13750 return;
13751 }
13752
13753 uval = INTVAL (x);
13754 if ((uval & 0xffff) == 0 && uval != 0)
13755 putc ('s', file);
13756 return;
13757
13758 case 'E':
13759 /* X is a CR register. Print the number of the EQ bit of the CR */
13760 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13761 output_operand_lossage ("invalid %%E value");
13762 else
13763 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
13764 return;
13765
13766 case 'f':
13767 /* X is a CR register. Print the shift count needed to move it
13768 to the high-order four bits. */
13769 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13770 output_operand_lossage ("invalid %%f value");
13771 else
13772 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
13773 return;
13774
13775 case 'F':
13776 /* Similar, but print the count for the rotate in the opposite
13777 direction. */
13778 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13779 output_operand_lossage ("invalid %%F value");
13780 else
13781 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
13782 return;
13783
13784 case 'G':
13785 /* X is a constant integer. If it is negative, print "m",
13786 otherwise print "z". This is to make an aze or ame insn. */
13787 if (!CONST_INT_P (x))
13788 output_operand_lossage ("invalid %%G value");
13789 else if (INTVAL (x) >= 0)
13790 putc ('z', file);
13791 else
13792 putc ('m', file);
13793 return;
13794
13795 case 'h':
13796 /* If constant, output low-order five bits. Otherwise, write
13797 normally. */
13798 if (INT_P (x))
13799 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
13800 else
13801 print_operand (file, x, 0);
13802 return;
13803
13804 case 'H':
13805 /* If constant, output low-order six bits. Otherwise, write
13806 normally. */
13807 if (INT_P (x))
13808 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
13809 else
13810 print_operand (file, x, 0);
13811 return;
13812
13813 case 'I':
13814 /* Print `i' if this is a constant, else nothing. */
13815 if (INT_P (x))
13816 putc ('i', file);
13817 return;
13818
13819 case 'j':
13820 /* Write the bit number in CCR for jump. */
13821 i = ccr_bit (x, 0);
13822 if (i == -1)
13823 output_operand_lossage ("invalid %%j code");
13824 else
13825 fprintf (file, "%d", i);
13826 return;
13827
13828 case 'J':
13829 /* Similar, but add one for shift count in rlinm for scc and pass
13830 scc flag to `ccr_bit'. */
13831 i = ccr_bit (x, 1);
13832 if (i == -1)
13833 output_operand_lossage ("invalid %%J code");
13834 else
13835 /* If we want bit 31, write a shift count of zero, not 32. */
13836 fprintf (file, "%d", i == 31 ? 0 : i + 1);
13837 return;
13838
13839 case 'k':
13840 /* X must be a constant. Write the 1's complement of the
13841 constant. */
13842 if (! INT_P (x))
13843 output_operand_lossage ("invalid %%k value");
13844 else
13845 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
13846 return;
13847
13848 case 'K':
13849 /* X must be a symbolic constant on ELF. Write an
13850 expression suitable for an 'addi' that adds in the low 16
13851 bits of the MEM. */
13852 if (GET_CODE (x) == CONST)
13853 {
13854 if (GET_CODE (XEXP (x, 0)) != PLUS
13855 || (!SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
13856 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
13857 || !CONST_INT_P (XEXP (XEXP (x, 0), 1)))
13858 output_operand_lossage ("invalid %%K value");
13859 }
13860 print_operand_address (file, x);
13861 fputs ("@l", file);
13862 return;
13863
13864 /* %l is output_asm_label. */
13865
13866 case 'L':
13867 /* Write second word of DImode or DFmode reference. Works on register
13868 or non-indexed memory only. */
13869 if (REG_P (x))
13870 fputs (reg_names[REGNO (x) + 1], file);
13871 else if (MEM_P (x))
13872 {
13873 machine_mode mode = GET_MODE (x);
13874 /* Handle possible auto-increment. Since it is pre-increment and
13875 we have already done it, we can just use an offset of word. */
13876 if (GET_CODE (XEXP (x, 0)) == PRE_INC
13877 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
13878 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
13879 UNITS_PER_WORD));
13880 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
13881 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
13882 UNITS_PER_WORD));
13883 else
13884 output_address (mode, XEXP (adjust_address_nv (x, SImode,
13885 UNITS_PER_WORD),
13886 0));
13887
13888 if (small_data_operand (x, GET_MODE (x)))
13889 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
13890 reg_names[SMALL_DATA_REG]);
13891 }
13892 return;
13893
13894 case 'N': /* Unused */
13895 /* Write the number of elements in the vector times 4. */
13896 if (GET_CODE (x) != PARALLEL)
13897 output_operand_lossage ("invalid %%N value");
13898 else
13899 fprintf (file, "%d", XVECLEN (x, 0) * 4);
13900 return;
13901
13902 case 'O': /* Unused */
13903 /* Similar, but subtract 1 first. */
13904 if (GET_CODE (x) != PARALLEL)
13905 output_operand_lossage ("invalid %%O value");
13906 else
13907 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
13908 return;
13909
13910 case 'p':
13911 /* X is a CONST_INT that is a power of two. Output the logarithm. */
13912 if (! INT_P (x)
13913 || INTVAL (x) < 0
13914 || (i = exact_log2 (INTVAL (x))) < 0)
13915 output_operand_lossage ("invalid %%p value");
13916 else
13917 fprintf (file, "%d", i);
13918 return;
13919
13920 case 'P':
13921 /* The operand must be an indirect memory reference. The result
13922 is the register name. */
13923 if (!MEM_P (x) || !REG_P (XEXP (x, 0))
13924 || REGNO (XEXP (x, 0)) >= 32)
13925 output_operand_lossage ("invalid %%P value");
13926 else
13927 fputs (reg_names[REGNO (XEXP (x, 0))], file);
13928 return;
13929
13930 case 'q':
13931 /* This outputs the logical code corresponding to a boolean
13932 expression. The expression may have one or both operands
13933 negated (if one, only the first one). For condition register
13934 logical operations, it will also treat the negated
13935 CR codes as NOTs, but not handle NOTs of them. */
13936 {
13937 const char *const *t = 0;
13938 const char *s;
13939 enum rtx_code code = GET_CODE (x);
13940 static const char * const tbl[3][3] = {
13941 { "and", "andc", "nor" },
13942 { "or", "orc", "nand" },
13943 { "xor", "eqv", "xor" } };
13944
13945 if (code == AND)
13946 t = tbl[0];
13947 else if (code == IOR)
13948 t = tbl[1];
13949 else if (code == XOR)
13950 t = tbl[2];
13951 else
13952 output_operand_lossage ("invalid %%q value");
13953
13954 if (GET_CODE (XEXP (x, 0)) != NOT)
13955 s = t[0];
13956 else
13957 {
13958 if (GET_CODE (XEXP (x, 1)) == NOT)
13959 s = t[2];
13960 else
13961 s = t[1];
13962 }
13963
13964 fputs (s, file);
13965 }
13966 return;
13967
13968 case 'Q':
13969 if (! TARGET_MFCRF)
13970 return;
13971 fputc (',', file);
13972 /* FALLTHRU */
13973
13974 case 'R':
13975 /* X is a CR register. Print the mask for `mtcrf'. */
13976 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13977 output_operand_lossage ("invalid %%R value");
13978 else
13979 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
13980 return;
13981
13982 case 's':
13983 /* Low 5 bits of 32 - value */
13984 if (! INT_P (x))
13985 output_operand_lossage ("invalid %%s value");
13986 else
13987 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
13988 return;
13989
13990 case 't':
13991 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
13992 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13993 {
13994 output_operand_lossage ("invalid %%t value");
13995 return;
13996 }
13997
13998 /* Bit 3 is OV bit. */
13999 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
14000
14001 /* If we want bit 31, write a shift count of zero, not 32. */
14002 fprintf (file, "%d", i == 31 ? 0 : i + 1);
14003 return;
14004
14005 case 'T':
14006 /* Print the symbolic name of a branch target register. */
14007 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
14008 x = XVECEXP (x, 0, 0);
14009 if (!REG_P (x) || (REGNO (x) != LR_REGNO
14010 && REGNO (x) != CTR_REGNO))
14011 output_operand_lossage ("invalid %%T value");
14012 else if (REGNO (x) == LR_REGNO)
14013 fputs ("lr", file);
14014 else
14015 fputs ("ctr", file);
14016 return;
14017
14018 case 'u':
14019 /* High-order or low-order 16 bits of constant, whichever is non-zero,
14020 for use in unsigned operand. */
14021 if (! INT_P (x))
14022 {
14023 output_operand_lossage ("invalid %%u value");
14024 return;
14025 }
14026
14027 uval = INTVAL (x);
14028 if ((uval & 0xffff) == 0)
14029 uval >>= 16;
14030
14031 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
14032 return;
14033
14034 case 'v':
14035 /* High-order 16 bits of constant for use in signed operand. */
14036 if (! INT_P (x))
14037 output_operand_lossage ("invalid %%v value");
14038 else
14039 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
14040 (INTVAL (x) >> 16) & 0xffff);
14041 return;
14042
14043 case 'U':
14044 /* Print `u' if this has an auto-increment or auto-decrement. */
14045 if (MEM_P (x)
14046 && (GET_CODE (XEXP (x, 0)) == PRE_INC
14047 || GET_CODE (XEXP (x, 0)) == PRE_DEC
14048 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
14049 putc ('u', file);
14050 return;
14051
14052 case 'V':
14053 /* Print the trap code for this operand. */
14054 switch (GET_CODE (x))
14055 {
14056 case EQ:
14057 fputs ("eq", file); /* 4 */
14058 break;
14059 case NE:
14060 fputs ("ne", file); /* 24 */
14061 break;
14062 case LT:
14063 fputs ("lt", file); /* 16 */
14064 break;
14065 case LE:
14066 fputs ("le", file); /* 20 */
14067 break;
14068 case GT:
14069 fputs ("gt", file); /* 8 */
14070 break;
14071 case GE:
14072 fputs ("ge", file); /* 12 */
14073 break;
14074 case LTU:
14075 fputs ("llt", file); /* 2 */
14076 break;
14077 case LEU:
14078 fputs ("lle", file); /* 6 */
14079 break;
14080 case GTU:
14081 fputs ("lgt", file); /* 1 */
14082 break;
14083 case GEU:
14084 fputs ("lge", file); /* 5 */
14085 break;
14086 default:
14087 output_operand_lossage ("invalid %%V value");
14088 }
14089 break;
14090
14091 case 'w':
14092 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
14093 normally. */
14094 if (INT_P (x))
14095 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
14096 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
14097 else
14098 print_operand (file, x, 0);
14099 return;
14100
14101 case 'x':
14102 /* X is a FPR or Altivec register used in a VSX context. */
14103 if (!REG_P (x) || !VSX_REGNO_P (REGNO (x)))
14104 output_operand_lossage ("invalid %%x value");
14105 else
14106 {
14107 int reg = REGNO (x);
14108 int vsx_reg = (FP_REGNO_P (reg)
14109 ? reg - 32
14110 : reg - FIRST_ALTIVEC_REGNO + 32);
14111
14112 #ifdef TARGET_REGNAMES
14113 if (TARGET_REGNAMES)
14114 fprintf (file, "%%vs%d", vsx_reg);
14115 else
14116 #endif
14117 fprintf (file, "%d", vsx_reg);
14118 }
14119 return;
14120
14121 case 'X':
14122 if (MEM_P (x)
14123 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
14124 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
14125 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
14126 putc ('x', file);
14127 return;
14128
14129 case 'Y':
14130 /* Like 'L', for third word of TImode/PTImode */
14131 if (REG_P (x))
14132 fputs (reg_names[REGNO (x) + 2], file);
14133 else if (MEM_P (x))
14134 {
14135 machine_mode mode = GET_MODE (x);
14136 if (GET_CODE (XEXP (x, 0)) == PRE_INC
14137 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
14138 output_address (mode, plus_constant (Pmode,
14139 XEXP (XEXP (x, 0), 0), 8));
14140 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
14141 output_address (mode, plus_constant (Pmode,
14142 XEXP (XEXP (x, 0), 0), 8));
14143 else
14144 output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0));
14145 if (small_data_operand (x, GET_MODE (x)))
14146 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
14147 reg_names[SMALL_DATA_REG]);
14148 }
14149 return;
14150
14151 case 'z':
14152 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
14153 x = XVECEXP (x, 0, 1);
14154 /* X is a SYMBOL_REF. Write out the name preceded by a
14155 period and without any trailing data in brackets. Used for function
14156 names. If we are configured for System V (or the embedded ABI) on
14157 the PowerPC, do not emit the period, since those systems do not use
14158 TOCs and the like. */
14159 if (!SYMBOL_REF_P (x))
14160 {
14161 output_operand_lossage ("invalid %%z value");
14162 return;
14163 }
14164
14165 /* For macho, check to see if we need a stub. */
14166 if (TARGET_MACHO)
14167 {
14168 const char *name = XSTR (x, 0);
14169 #if TARGET_MACHO
14170 if (darwin_symbol_stubs
14171 && MACHOPIC_INDIRECT
14172 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14173 name = machopic_indirection_name (x, /*stub_p=*/true);
14174 #endif
14175 assemble_name (file, name);
14176 }
14177 else if (!DOT_SYMBOLS)
14178 assemble_name (file, XSTR (x, 0));
14179 else
14180 rs6000_output_function_entry (file, XSTR (x, 0));
14181 return;
14182
14183 case 'Z':
14184 /* Like 'L', for last word of TImode/PTImode. */
14185 if (REG_P (x))
14186 fputs (reg_names[REGNO (x) + 3], file);
14187 else if (MEM_P (x))
14188 {
14189 machine_mode mode = GET_MODE (x);
14190 if (GET_CODE (XEXP (x, 0)) == PRE_INC
14191 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
14192 output_address (mode, plus_constant (Pmode,
14193 XEXP (XEXP (x, 0), 0), 12));
14194 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
14195 output_address (mode, plus_constant (Pmode,
14196 XEXP (XEXP (x, 0), 0), 12));
14197 else
14198 output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0));
14199 if (small_data_operand (x, GET_MODE (x)))
14200 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
14201 reg_names[SMALL_DATA_REG]);
14202 }
14203 return;
14204
14205 /* Print AltiVec memory operand. */
14206 case 'y':
14207 {
14208 rtx tmp;
14209
14210 gcc_assert (MEM_P (x));
14211
14212 tmp = XEXP (x, 0);
14213
14214 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (GET_MODE (x))
14215 && GET_CODE (tmp) == AND
14216 && CONST_INT_P (XEXP (tmp, 1))
14217 && INTVAL (XEXP (tmp, 1)) == -16)
14218 tmp = XEXP (tmp, 0);
14219 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
14220 && GET_CODE (tmp) == PRE_MODIFY)
14221 tmp = XEXP (tmp, 1);
14222 if (REG_P (tmp))
14223 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
14224 else
14225 {
14226 if (GET_CODE (tmp) != PLUS
14227 || !REG_P (XEXP (tmp, 0))
14228 || !REG_P (XEXP (tmp, 1)))
14229 {
14230 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
14231 break;
14232 }
14233
14234 if (REGNO (XEXP (tmp, 0)) == 0)
14235 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
14236 reg_names[ REGNO (XEXP (tmp, 0)) ]);
14237 else
14238 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
14239 reg_names[ REGNO (XEXP (tmp, 1)) ]);
14240 }
14241 break;
14242 }
14243
14244 case 0:
14245 if (REG_P (x))
14246 fprintf (file, "%s", reg_names[REGNO (x)]);
14247 else if (MEM_P (x))
14248 {
14249 /* We need to handle PRE_INC and PRE_DEC here, since we need to
14250 know the width from the mode. */
14251 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
14252 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
14253 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
14254 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
14255 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
14256 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
14257 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
14258 output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
14259 else
14260 output_address (GET_MODE (x), XEXP (x, 0));
14261 }
14262 else if (toc_relative_expr_p (x, false,
14263 &tocrel_base_oac, &tocrel_offset_oac))
14264 /* This hack along with a corresponding hack in
14265 rs6000_output_addr_const_extra arranges to output addends
14266 where the assembler expects to find them. eg.
14267 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
14268 without this hack would be output as "x@toc+4". We
14269 want "x+4@toc". */
14270 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
14271 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
14272 output_addr_const (file, XVECEXP (x, 0, 0));
14273 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
14274 output_addr_const (file, XVECEXP (x, 0, 1));
14275 else
14276 output_addr_const (file, x);
14277 return;
14278
14279 case '&':
14280 if (const char *name = get_some_local_dynamic_name ())
14281 assemble_name (file, name);
14282 else
14283 output_operand_lossage ("'%%&' used without any "
14284 "local dynamic TLS references");
14285 return;
14286
14287 default:
14288 output_operand_lossage ("invalid %%xn code");
14289 }
14290 }
14291 \f
14292 /* Print the address of an operand. */
14293
14294 void
14295 print_operand_address (FILE *file, rtx x)
14296 {
14297 if (REG_P (x))
14298 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
14299
14300 /* Is it a PC-relative address? */
14301 else if (TARGET_PCREL && pcrel_local_or_external_address (x, VOIDmode))
14302 {
14303 HOST_WIDE_INT offset;
14304
14305 if (GET_CODE (x) == CONST)
14306 x = XEXP (x, 0);
14307
14308 if (GET_CODE (x) == PLUS)
14309 {
14310 offset = INTVAL (XEXP (x, 1));
14311 x = XEXP (x, 0);
14312 }
14313 else
14314 offset = 0;
14315
14316 output_addr_const (file, x);
14317
14318 if (offset)
14319 fprintf (file, "%+" PRId64, offset);
14320
14321 if (SYMBOL_REF_P (x) && !SYMBOL_REF_LOCAL_P (x))
14322 fprintf (file, "@got");
14323
14324 fprintf (file, "@pcrel");
14325 }
14326 else if (SYMBOL_REF_P (x) || GET_CODE (x) == CONST
14327 || GET_CODE (x) == LABEL_REF)
14328 {
14329 output_addr_const (file, x);
14330 if (small_data_operand (x, GET_MODE (x)))
14331 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
14332 reg_names[SMALL_DATA_REG]);
14333 else
14334 gcc_assert (!TARGET_TOC);
14335 }
14336 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
14337 && REG_P (XEXP (x, 1)))
14338 {
14339 if (REGNO (XEXP (x, 0)) == 0)
14340 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
14341 reg_names[ REGNO (XEXP (x, 0)) ]);
14342 else
14343 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
14344 reg_names[ REGNO (XEXP (x, 1)) ]);
14345 }
14346 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
14347 && CONST_INT_P (XEXP (x, 1)))
14348 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
14349 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
14350 #if TARGET_MACHO
14351 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
14352 && CONSTANT_P (XEXP (x, 1)))
14353 {
14354 fprintf (file, "lo16(");
14355 output_addr_const (file, XEXP (x, 1));
14356 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
14357 }
14358 #endif
14359 #if TARGET_ELF
14360 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
14361 && CONSTANT_P (XEXP (x, 1)))
14362 {
14363 output_addr_const (file, XEXP (x, 1));
14364 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
14365 }
14366 #endif
14367 else if (toc_relative_expr_p (x, false, &tocrel_base_oac, &tocrel_offset_oac))
14368 {
14369 /* This hack along with a corresponding hack in
14370 rs6000_output_addr_const_extra arranges to output addends
14371 where the assembler expects to find them. eg.
14372 (lo_sum (reg 9)
14373 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
14374 without this hack would be output as "x@toc+8@l(9)". We
14375 want "x+8@toc@l(9)". */
14376 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
14377 if (GET_CODE (x) == LO_SUM)
14378 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
14379 else
14380 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base_oac, 0, 1))]);
14381 }
14382 else
14383 output_addr_const (file, x);
14384 }
14385 \f
14386 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14387
14388 bool
14389 rs6000_output_addr_const_extra (FILE *file, rtx x)
14390 {
14391 if (GET_CODE (x) == UNSPEC)
14392 switch (XINT (x, 1))
14393 {
14394 case UNSPEC_TOCREL:
14395 gcc_checking_assert (SYMBOL_REF_P (XVECEXP (x, 0, 0))
14396 && REG_P (XVECEXP (x, 0, 1))
14397 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
14398 output_addr_const (file, XVECEXP (x, 0, 0));
14399 if (x == tocrel_base_oac && tocrel_offset_oac != const0_rtx)
14400 {
14401 if (INTVAL (tocrel_offset_oac) >= 0)
14402 fprintf (file, "+");
14403 output_addr_const (file, CONST_CAST_RTX (tocrel_offset_oac));
14404 }
14405 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
14406 {
14407 putc ('-', file);
14408 assemble_name (file, toc_label_name);
14409 need_toc_init = 1;
14410 }
14411 else if (TARGET_ELF)
14412 fputs ("@toc", file);
14413 return true;
14414
14415 #if TARGET_MACHO
14416 case UNSPEC_MACHOPIC_OFFSET:
14417 output_addr_const (file, XVECEXP (x, 0, 0));
14418 putc ('-', file);
14419 machopic_output_function_base_name (file);
14420 return true;
14421 #endif
14422 }
14423 return false;
14424 }
14425 \f
14426 /* Target hook for assembling integer objects. The PowerPC version has
14427 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
14428 is defined. It also needs to handle DI-mode objects on 64-bit
14429 targets. */
14430
14431 static bool
14432 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
14433 {
14434 #ifdef RELOCATABLE_NEEDS_FIXUP
14435 /* Special handling for SI values. */
14436 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
14437 {
14438 static int recurse = 0;
14439
14440 /* For -mrelocatable, we mark all addresses that need to be fixed up in
14441 the .fixup section. Since the TOC section is already relocated, we
14442 don't need to mark it here. We used to skip the text section, but it
14443 should never be valid for relocated addresses to be placed in the text
14444 section. */
14445 if (DEFAULT_ABI == ABI_V4
14446 && (TARGET_RELOCATABLE || flag_pic > 1)
14447 && in_section != toc_section
14448 && !recurse
14449 && !CONST_SCALAR_INT_P (x)
14450 && CONSTANT_P (x))
14451 {
14452 char buf[256];
14453
14454 recurse = 1;
14455 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
14456 fixuplabelno++;
14457 ASM_OUTPUT_LABEL (asm_out_file, buf);
14458 fprintf (asm_out_file, "\t.long\t(");
14459 output_addr_const (asm_out_file, x);
14460 fprintf (asm_out_file, ")@fixup\n");
14461 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
14462 ASM_OUTPUT_ALIGN (asm_out_file, 2);
14463 fprintf (asm_out_file, "\t.long\t");
14464 assemble_name (asm_out_file, buf);
14465 fprintf (asm_out_file, "\n\t.previous\n");
14466 recurse = 0;
14467 return true;
14468 }
14469 /* Remove initial .'s to turn a -mcall-aixdesc function
14470 address into the address of the descriptor, not the function
14471 itself. */
14472 else if (SYMBOL_REF_P (x)
14473 && XSTR (x, 0)[0] == '.'
14474 && DEFAULT_ABI == ABI_AIX)
14475 {
14476 const char *name = XSTR (x, 0);
14477 while (*name == '.')
14478 name++;
14479
14480 fprintf (asm_out_file, "\t.long\t%s\n", name);
14481 return true;
14482 }
14483 }
14484 #endif /* RELOCATABLE_NEEDS_FIXUP */
14485 return default_assemble_integer (x, size, aligned_p);
14486 }
14487
14488 /* Return a template string for assembly to emit when making an
14489 external call. FUNOP is the call mem argument operand number. */
14490
14491 static const char *
14492 rs6000_call_template_1 (rtx *operands, unsigned int funop, bool sibcall)
14493 {
14494 /* -Wformat-overflow workaround, without which gcc thinks that %u
14495 might produce 10 digits. */
14496 gcc_assert (funop <= MAX_RECOG_OPERANDS);
14497
14498 char arg[12];
14499 arg[0] = 0;
14500 if (GET_CODE (operands[funop + 1]) == UNSPEC)
14501 {
14502 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
14503 sprintf (arg, "(%%%u@tlsgd)", funop + 1);
14504 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
14505 sprintf (arg, "(%%&@tlsld)");
14506 }
14507
14508 /* The magic 32768 offset here corresponds to the offset of
14509 r30 in .got2, as given by LCTOC1. See sysv4.h:toc_section. */
14510 char z[11];
14511 sprintf (z, "%%z%u%s", funop,
14512 (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic == 2
14513 ? "+32768" : ""));
14514
14515 static char str[32]; /* 1 spare */
14516 if (rs6000_pcrel_p ())
14517 sprintf (str, "b%s %s@notoc%s", sibcall ? "" : "l", z, arg);
14518 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
14519 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
14520 sibcall ? "" : "\n\tnop");
14521 else if (DEFAULT_ABI == ABI_V4)
14522 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
14523 flag_pic ? "@plt" : "");
14524 #if TARGET_MACHO
14525 /* If/when we remove the mlongcall opt, we can share the AIX/ELGv2 case. */
14526 else if (DEFAULT_ABI == ABI_DARWIN)
14527 {
14528 /* The cookie is in operand func+2. */
14529 gcc_checking_assert (GET_CODE (operands[funop + 2]) == CONST_INT);
14530 int cookie = INTVAL (operands[funop + 2]);
14531 if (cookie & CALL_LONG)
14532 {
14533 tree funname = get_identifier (XSTR (operands[funop], 0));
14534 tree labelname = get_prev_label (funname);
14535 gcc_checking_assert (labelname && !sibcall);
14536
14537 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
14538 instruction will reach 'foo', otherwise link as 'bl L42'".
14539 "L42" should be a 'branch island', that will do a far jump to
14540 'foo'. Branch islands are generated in
14541 macho_branch_islands(). */
14542 sprintf (str, "jbsr %%z%u,%.10s", funop,
14543 IDENTIFIER_POINTER (labelname));
14544 }
14545 else
14546 /* Same as AIX or ELFv2, except to keep backwards compat, no nop
14547 after the call. */
14548 sprintf (str, "b%s %s%s", sibcall ? "" : "l", z, arg);
14549 }
14550 #endif
14551 else
14552 gcc_unreachable ();
14553 return str;
14554 }
14555
14556 const char *
14557 rs6000_call_template (rtx *operands, unsigned int funop)
14558 {
14559 return rs6000_call_template_1 (operands, funop, false);
14560 }
14561
14562 const char *
14563 rs6000_sibcall_template (rtx *operands, unsigned int funop)
14564 {
14565 return rs6000_call_template_1 (operands, funop, true);
14566 }
14567
14568 /* As above, for indirect calls. */
14569
14570 static const char *
14571 rs6000_indirect_call_template_1 (rtx *operands, unsigned int funop,
14572 bool sibcall)
14573 {
14574 /* -Wformat-overflow workaround, without which gcc thinks that %u
14575 might produce 10 digits. Note that -Wformat-overflow will not
14576 currently warn here for str[], so do not rely on a warning to
14577 ensure str[] is correctly sized. */
14578 gcc_assert (funop <= MAX_RECOG_OPERANDS);
14579
14580 /* Currently, funop is either 0 or 1. The maximum string is always
14581 a !speculate 64-bit __tls_get_addr call.
14582
14583 ABI_ELFv2, pcrel:
14584 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14585 . 35 .reloc .,R_PPC64_PLTSEQ_NOTOC,%z1\n\t
14586 . 9 crset 2\n\t
14587 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14588 . 36 .reloc .,R_PPC64_PLTCALL_NOTOC,%z1\n\t
14589 . 8 beq%T1l-
14590 .---
14591 .142
14592
14593 ABI_AIX:
14594 . 9 ld 2,%3\n\t
14595 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14596 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
14597 . 9 crset 2\n\t
14598 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14599 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
14600 . 10 beq%T1l-\n\t
14601 . 10 ld 2,%4(1)
14602 .---
14603 .151
14604
14605 ABI_ELFv2:
14606 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14607 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
14608 . 9 crset 2\n\t
14609 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14610 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
14611 . 10 beq%T1l-\n\t
14612 . 10 ld 2,%3(1)
14613 .---
14614 .142
14615
14616 ABI_V4:
14617 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14618 . 35 .reloc .,R_PPC64_PLTSEQ,%z1+32768\n\t
14619 . 9 crset 2\n\t
14620 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14621 . 36 .reloc .,R_PPC64_PLTCALL,%z1+32768\n\t
14622 . 8 beq%T1l-
14623 .---
14624 .141 */
14625 static char str[160]; /* 8 spare */
14626 char *s = str;
14627 const char *ptrload = TARGET_64BIT ? "d" : "wz";
14628
14629 if (DEFAULT_ABI == ABI_AIX)
14630 s += sprintf (s,
14631 "l%s 2,%%%u\n\t",
14632 ptrload, funop + 3);
14633
14634 /* We don't need the extra code to stop indirect call speculation if
14635 calling via LR. */
14636 bool speculate = (TARGET_MACHO
14637 || rs6000_speculate_indirect_jumps
14638 || (REG_P (operands[funop])
14639 && REGNO (operands[funop]) == LR_REGNO));
14640
14641 if (TARGET_PLTSEQ && GET_CODE (operands[funop]) == UNSPEC)
14642 {
14643 const char *rel64 = TARGET_64BIT ? "64" : "";
14644 char tls[29];
14645 tls[0] = 0;
14646 if (GET_CODE (operands[funop + 1]) == UNSPEC)
14647 {
14648 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
14649 sprintf (tls, ".reloc .,R_PPC%s_TLSGD,%%%u\n\t",
14650 rel64, funop + 1);
14651 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
14652 sprintf (tls, ".reloc .,R_PPC%s_TLSLD,%%&\n\t",
14653 rel64);
14654 }
14655
14656 const char *notoc = rs6000_pcrel_p () ? "_NOTOC" : "";
14657 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
14658 && flag_pic == 2 ? "+32768" : "");
14659 if (!speculate)
14660 {
14661 s += sprintf (s,
14662 "%s.reloc .,R_PPC%s_PLTSEQ%s,%%z%u%s\n\t",
14663 tls, rel64, notoc, funop, addend);
14664 s += sprintf (s, "crset 2\n\t");
14665 }
14666 s += sprintf (s,
14667 "%s.reloc .,R_PPC%s_PLTCALL%s,%%z%u%s\n\t",
14668 tls, rel64, notoc, funop, addend);
14669 }
14670 else if (!speculate)
14671 s += sprintf (s, "crset 2\n\t");
14672
14673 if (rs6000_pcrel_p ())
14674 {
14675 if (speculate)
14676 sprintf (s, "b%%T%ul", funop);
14677 else
14678 sprintf (s, "beq%%T%ul-", funop);
14679 }
14680 else if (DEFAULT_ABI == ABI_AIX)
14681 {
14682 if (speculate)
14683 sprintf (s,
14684 "b%%T%ul\n\t"
14685 "l%s 2,%%%u(1)",
14686 funop, ptrload, funop + 4);
14687 else
14688 sprintf (s,
14689 "beq%%T%ul-\n\t"
14690 "l%s 2,%%%u(1)",
14691 funop, ptrload, funop + 4);
14692 }
14693 else if (DEFAULT_ABI == ABI_ELFv2)
14694 {
14695 if (speculate)
14696 sprintf (s,
14697 "b%%T%ul\n\t"
14698 "l%s 2,%%%u(1)",
14699 funop, ptrload, funop + 3);
14700 else
14701 sprintf (s,
14702 "beq%%T%ul-\n\t"
14703 "l%s 2,%%%u(1)",
14704 funop, ptrload, funop + 3);
14705 }
14706 else
14707 {
14708 if (speculate)
14709 sprintf (s,
14710 "b%%T%u%s",
14711 funop, sibcall ? "" : "l");
14712 else
14713 sprintf (s,
14714 "beq%%T%u%s-%s",
14715 funop, sibcall ? "" : "l", sibcall ? "\n\tb $" : "");
14716 }
14717 return str;
14718 }
14719
14720 const char *
14721 rs6000_indirect_call_template (rtx *operands, unsigned int funop)
14722 {
14723 return rs6000_indirect_call_template_1 (operands, funop, false);
14724 }
14725
14726 const char *
14727 rs6000_indirect_sibcall_template (rtx *operands, unsigned int funop)
14728 {
14729 return rs6000_indirect_call_template_1 (operands, funop, true);
14730 }
14731
14732 #if HAVE_AS_PLTSEQ
14733 /* Output indirect call insns. WHICH identifies the type of sequence. */
14734 const char *
14735 rs6000_pltseq_template (rtx *operands, int which)
14736 {
14737 const char *rel64 = TARGET_64BIT ? "64" : "";
14738 char tls[30];
14739 tls[0] = 0;
14740 if (GET_CODE (operands[3]) == UNSPEC)
14741 {
14742 char off = which == RS6000_PLTSEQ_PLT_PCREL34 ? '8' : '4';
14743 if (XINT (operands[3], 1) == UNSPEC_TLSGD)
14744 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSGD,%%3\n\t",
14745 off, rel64);
14746 else if (XINT (operands[3], 1) == UNSPEC_TLSLD)
14747 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSLD,%%&\n\t",
14748 off, rel64);
14749 }
14750
14751 gcc_assert (DEFAULT_ABI == ABI_ELFv2 || DEFAULT_ABI == ABI_V4);
14752 static char str[96]; /* 10 spare */
14753 char off = WORDS_BIG_ENDIAN ? '2' : '4';
14754 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
14755 && flag_pic == 2 ? "+32768" : "");
14756 switch (which)
14757 {
14758 case RS6000_PLTSEQ_TOCSAVE:
14759 sprintf (str,
14760 "st%s\n\t"
14761 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2",
14762 TARGET_64BIT ? "d 2,24(1)" : "w 2,12(1)",
14763 tls, rel64);
14764 break;
14765 case RS6000_PLTSEQ_PLT16_HA:
14766 if (DEFAULT_ABI == ABI_V4 && !flag_pic)
14767 sprintf (str,
14768 "lis %%0,0\n\t"
14769 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2",
14770 tls, off, rel64);
14771 else
14772 sprintf (str,
14773 "addis %%0,%%1,0\n\t"
14774 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2%s",
14775 tls, off, rel64, addend);
14776 break;
14777 case RS6000_PLTSEQ_PLT16_LO:
14778 sprintf (str,
14779 "l%s %%0,0(%%1)\n\t"
14780 "%s.reloc .-%c,R_PPC%s_PLT16_LO%s,%%z2%s",
14781 TARGET_64BIT ? "d" : "wz",
14782 tls, off, rel64, TARGET_64BIT ? "_DS" : "", addend);
14783 break;
14784 case RS6000_PLTSEQ_MTCTR:
14785 sprintf (str,
14786 "mtctr %%1\n\t"
14787 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2%s",
14788 tls, rel64, addend);
14789 break;
14790 case RS6000_PLTSEQ_PLT_PCREL34:
14791 sprintf (str,
14792 "pl%s %%0,0(0),1\n\t"
14793 "%s.reloc .-8,R_PPC%s_PLT_PCREL34_NOTOC,%%z2",
14794 TARGET_64BIT ? "d" : "wz",
14795 tls, rel64);
14796 break;
14797 default:
14798 gcc_unreachable ();
14799 }
14800 return str;
14801 }
14802 #endif
14803 \f
14804 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
14805 /* Emit an assembler directive to set symbol visibility for DECL to
14806 VISIBILITY_TYPE. */
14807
14808 static void
14809 rs6000_assemble_visibility (tree decl, int vis)
14810 {
14811 if (TARGET_XCOFF)
14812 return;
14813
14814 /* Functions need to have their entry point symbol visibility set as
14815 well as their descriptor symbol visibility. */
14816 if (DEFAULT_ABI == ABI_AIX
14817 && DOT_SYMBOLS
14818 && TREE_CODE (decl) == FUNCTION_DECL)
14819 {
14820 static const char * const visibility_types[] = {
14821 NULL, "protected", "hidden", "internal"
14822 };
14823
14824 const char *name, *type;
14825
14826 name = ((* targetm.strip_name_encoding)
14827 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
14828 type = visibility_types[vis];
14829
14830 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
14831 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
14832 }
14833 else
14834 default_assemble_visibility (decl, vis);
14835 }
14836 #endif
14837 \f
14838 /* Write PATCH_AREA_SIZE NOPs into the asm outfile FILE around a function
14839 entry. If RECORD_P is true and the target supports named sections,
14840 the location of the NOPs will be recorded in a special object section
14841 called "__patchable_function_entries". This routine may be called
14842 twice per function to put NOPs before and after the function
14843 entry. */
14844
14845 void
14846 rs6000_print_patchable_function_entry (FILE *file,
14847 unsigned HOST_WIDE_INT patch_area_size,
14848 bool record_p)
14849 {
14850 unsigned int flags = SECTION_WRITE | SECTION_RELRO;
14851 /* When .opd section is emitted, the function symbol
14852 default_print_patchable_function_entry_1 is emitted into the .opd section
14853 while the patchable area is emitted into the function section.
14854 Don't use SECTION_LINK_ORDER in that case. */
14855 if (!(TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
14856 && HAVE_GAS_SECTION_LINK_ORDER)
14857 flags |= SECTION_LINK_ORDER;
14858 default_print_patchable_function_entry_1 (file, patch_area_size, record_p,
14859 flags);
14860 }
14861 \f
14862 enum rtx_code
14863 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
14864 {
14865 /* Reversal of FP compares takes care -- an ordered compare
14866 becomes an unordered compare and vice versa. */
14867 if (mode == CCFPmode
14868 && (!flag_finite_math_only
14869 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
14870 || code == UNEQ || code == LTGT))
14871 return reverse_condition_maybe_unordered (code);
14872 else
14873 return reverse_condition (code);
14874 }
14875
14876 /* Generate a compare for CODE. Return a brand-new rtx that
14877 represents the result of the compare. */
14878
14879 static rtx
14880 rs6000_generate_compare (rtx cmp, machine_mode mode)
14881 {
14882 machine_mode comp_mode;
14883 rtx compare_result;
14884 enum rtx_code code = GET_CODE (cmp);
14885 rtx op0 = XEXP (cmp, 0);
14886 rtx op1 = XEXP (cmp, 1);
14887
14888 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
14889 comp_mode = CCmode;
14890 else if (FLOAT_MODE_P (mode))
14891 comp_mode = CCFPmode;
14892 else if (code == GTU || code == LTU
14893 || code == GEU || code == LEU)
14894 comp_mode = CCUNSmode;
14895 else if ((code == EQ || code == NE)
14896 && unsigned_reg_p (op0)
14897 && (unsigned_reg_p (op1)
14898 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
14899 /* These are unsigned values, perhaps there will be a later
14900 ordering compare that can be shared with this one. */
14901 comp_mode = CCUNSmode;
14902 else
14903 comp_mode = CCmode;
14904
14905 /* If we have an unsigned compare, make sure we don't have a signed value as
14906 an immediate. */
14907 if (comp_mode == CCUNSmode && CONST_INT_P (op1)
14908 && INTVAL (op1) < 0)
14909 {
14910 op0 = copy_rtx_if_shared (op0);
14911 op1 = force_reg (GET_MODE (op0), op1);
14912 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
14913 }
14914
14915 /* First, the compare. */
14916 compare_result = gen_reg_rtx (comp_mode);
14917
14918 /* IEEE 128-bit support in VSX registers when we do not have hardware
14919 support. */
14920 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
14921 {
14922 rtx libfunc = NULL_RTX;
14923 bool check_nan = false;
14924 rtx dest;
14925
14926 switch (code)
14927 {
14928 case EQ:
14929 case NE:
14930 libfunc = optab_libfunc (eq_optab, mode);
14931 break;
14932
14933 case GT:
14934 case GE:
14935 libfunc = optab_libfunc (ge_optab, mode);
14936 break;
14937
14938 case LT:
14939 case LE:
14940 libfunc = optab_libfunc (le_optab, mode);
14941 break;
14942
14943 case UNORDERED:
14944 case ORDERED:
14945 libfunc = optab_libfunc (unord_optab, mode);
14946 code = (code == UNORDERED) ? NE : EQ;
14947 break;
14948
14949 case UNGE:
14950 case UNGT:
14951 check_nan = true;
14952 libfunc = optab_libfunc (ge_optab, mode);
14953 code = (code == UNGE) ? GE : GT;
14954 break;
14955
14956 case UNLE:
14957 case UNLT:
14958 check_nan = true;
14959 libfunc = optab_libfunc (le_optab, mode);
14960 code = (code == UNLE) ? LE : LT;
14961 break;
14962
14963 case UNEQ:
14964 case LTGT:
14965 check_nan = true;
14966 libfunc = optab_libfunc (eq_optab, mode);
14967 code = (code = UNEQ) ? EQ : NE;
14968 break;
14969
14970 default:
14971 gcc_unreachable ();
14972 }
14973
14974 gcc_assert (libfunc);
14975
14976 if (!check_nan)
14977 dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
14978 SImode, op0, mode, op1, mode);
14979
14980 /* The library signals an exception for signalling NaNs, so we need to
14981 handle isgreater, etc. by first checking isordered. */
14982 else
14983 {
14984 rtx ne_rtx, normal_dest, unord_dest;
14985 rtx unord_func = optab_libfunc (unord_optab, mode);
14986 rtx join_label = gen_label_rtx ();
14987 rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label);
14988 rtx unord_cmp = gen_reg_rtx (comp_mode);
14989
14990
14991 /* Test for either value being a NaN. */
14992 gcc_assert (unord_func);
14993 unord_dest = emit_library_call_value (unord_func, NULL_RTX, LCT_CONST,
14994 SImode, op0, mode, op1, mode);
14995
14996 /* Set value (0) if either value is a NaN, and jump to the join
14997 label. */
14998 dest = gen_reg_rtx (SImode);
14999 emit_move_insn (dest, const1_rtx);
15000 emit_insn (gen_rtx_SET (unord_cmp,
15001 gen_rtx_COMPARE (comp_mode, unord_dest,
15002 const0_rtx)));
15003
15004 ne_rtx = gen_rtx_NE (comp_mode, unord_cmp, const0_rtx);
15005 emit_jump_insn (gen_rtx_SET (pc_rtx,
15006 gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
15007 join_ref,
15008 pc_rtx)));
15009
15010 /* Do the normal comparison, knowing that the values are not
15011 NaNs. */
15012 normal_dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
15013 SImode, op0, mode, op1, mode);
15014
15015 emit_insn (gen_cstoresi4 (dest,
15016 gen_rtx_fmt_ee (code, SImode, normal_dest,
15017 const0_rtx),
15018 normal_dest, const0_rtx));
15019
15020 /* Join NaN and non-Nan paths. Compare dest against 0. */
15021 emit_label (join_label);
15022 code = NE;
15023 }
15024
15025 emit_insn (gen_rtx_SET (compare_result,
15026 gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
15027 }
15028
15029 else
15030 {
15031 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
15032 CLOBBERs to match cmptf_internal2 pattern. */
15033 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
15034 && FLOAT128_IBM_P (GET_MODE (op0))
15035 && TARGET_HARD_FLOAT)
15036 emit_insn (gen_rtx_PARALLEL (VOIDmode,
15037 gen_rtvec (10,
15038 gen_rtx_SET (compare_result,
15039 gen_rtx_COMPARE (comp_mode, op0, op1)),
15040 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15041 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15042 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15043 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15044 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15045 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15046 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15047 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15048 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
15049 else if (GET_CODE (op1) == UNSPEC
15050 && XINT (op1, 1) == UNSPEC_SP_TEST)
15051 {
15052 rtx op1b = XVECEXP (op1, 0, 0);
15053 comp_mode = CCEQmode;
15054 compare_result = gen_reg_rtx (CCEQmode);
15055 if (TARGET_64BIT)
15056 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
15057 else
15058 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
15059 }
15060 else
15061 emit_insn (gen_rtx_SET (compare_result,
15062 gen_rtx_COMPARE (comp_mode, op0, op1)));
15063 }
15064
15065 validate_condition_mode (code, GET_MODE (compare_result));
15066
15067 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
15068 }
15069
15070 \f
15071 /* Return the diagnostic message string if the binary operation OP is
15072 not permitted on TYPE1 and TYPE2, NULL otherwise. */
15073
15074 static const char*
15075 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED,
15076 const_tree type1,
15077 const_tree type2)
15078 {
15079 machine_mode mode1 = TYPE_MODE (type1);
15080 machine_mode mode2 = TYPE_MODE (type2);
15081
15082 /* For complex modes, use the inner type. */
15083 if (COMPLEX_MODE_P (mode1))
15084 mode1 = GET_MODE_INNER (mode1);
15085
15086 if (COMPLEX_MODE_P (mode2))
15087 mode2 = GET_MODE_INNER (mode2);
15088
15089 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
15090 double to intermix unless -mfloat128-convert. */
15091 if (mode1 == mode2)
15092 return NULL;
15093
15094 if (!TARGET_FLOAT128_CVT)
15095 {
15096 if ((FLOAT128_IEEE_P (mode1) && FLOAT128_IBM_P (mode2))
15097 || (FLOAT128_IBM_P (mode1) && FLOAT128_IEEE_P (mode2)))
15098 return N_("Invalid mixing of IEEE 128-bit and IBM 128-bit floating "
15099 "point types");
15100 }
15101
15102 return NULL;
15103 }
15104
15105 \f
15106 /* Expand floating point conversion to/from __float128 and __ibm128. */
15107
15108 void
15109 rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
15110 {
15111 machine_mode dest_mode = GET_MODE (dest);
15112 machine_mode src_mode = GET_MODE (src);
15113 convert_optab cvt = unknown_optab;
15114 bool do_move = false;
15115 rtx libfunc = NULL_RTX;
15116 rtx dest2;
15117 typedef rtx (*rtx_2func_t) (rtx, rtx);
15118 rtx_2func_t hw_convert = (rtx_2func_t)0;
15119 size_t kf_or_tf;
15120
15121 struct hw_conv_t {
15122 rtx_2func_t from_df;
15123 rtx_2func_t from_sf;
15124 rtx_2func_t from_si_sign;
15125 rtx_2func_t from_si_uns;
15126 rtx_2func_t from_di_sign;
15127 rtx_2func_t from_di_uns;
15128 rtx_2func_t to_df;
15129 rtx_2func_t to_sf;
15130 rtx_2func_t to_si_sign;
15131 rtx_2func_t to_si_uns;
15132 rtx_2func_t to_di_sign;
15133 rtx_2func_t to_di_uns;
15134 } hw_conversions[2] = {
15135 /* convertions to/from KFmode */
15136 {
15137 gen_extenddfkf2_hw, /* KFmode <- DFmode. */
15138 gen_extendsfkf2_hw, /* KFmode <- SFmode. */
15139 gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */
15140 gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */
15141 gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */
15142 gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */
15143 gen_trunckfdf2_hw, /* DFmode <- KFmode. */
15144 gen_trunckfsf2_hw, /* SFmode <- KFmode. */
15145 gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */
15146 gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */
15147 gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */
15148 gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */
15149 },
15150
15151 /* convertions to/from TFmode */
15152 {
15153 gen_extenddftf2_hw, /* TFmode <- DFmode. */
15154 gen_extendsftf2_hw, /* TFmode <- SFmode. */
15155 gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */
15156 gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */
15157 gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */
15158 gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */
15159 gen_trunctfdf2_hw, /* DFmode <- TFmode. */
15160 gen_trunctfsf2_hw, /* SFmode <- TFmode. */
15161 gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */
15162 gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */
15163 gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */
15164 gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */
15165 },
15166 };
15167
15168 if (dest_mode == src_mode)
15169 gcc_unreachable ();
15170
15171 /* Eliminate memory operations. */
15172 if (MEM_P (src))
15173 src = force_reg (src_mode, src);
15174
15175 if (MEM_P (dest))
15176 {
15177 rtx tmp = gen_reg_rtx (dest_mode);
15178 rs6000_expand_float128_convert (tmp, src, unsigned_p);
15179 rs6000_emit_move (dest, tmp, dest_mode);
15180 return;
15181 }
15182
15183 /* Convert to IEEE 128-bit floating point. */
15184 if (FLOAT128_IEEE_P (dest_mode))
15185 {
15186 if (dest_mode == KFmode)
15187 kf_or_tf = 0;
15188 else if (dest_mode == TFmode)
15189 kf_or_tf = 1;
15190 else
15191 gcc_unreachable ();
15192
15193 switch (src_mode)
15194 {
15195 case E_DFmode:
15196 cvt = sext_optab;
15197 hw_convert = hw_conversions[kf_or_tf].from_df;
15198 break;
15199
15200 case E_SFmode:
15201 cvt = sext_optab;
15202 hw_convert = hw_conversions[kf_or_tf].from_sf;
15203 break;
15204
15205 case E_KFmode:
15206 case E_IFmode:
15207 case E_TFmode:
15208 if (FLOAT128_IBM_P (src_mode))
15209 cvt = sext_optab;
15210 else
15211 do_move = true;
15212 break;
15213
15214 case E_SImode:
15215 if (unsigned_p)
15216 {
15217 cvt = ufloat_optab;
15218 hw_convert = hw_conversions[kf_or_tf].from_si_uns;
15219 }
15220 else
15221 {
15222 cvt = sfloat_optab;
15223 hw_convert = hw_conversions[kf_or_tf].from_si_sign;
15224 }
15225 break;
15226
15227 case E_DImode:
15228 if (unsigned_p)
15229 {
15230 cvt = ufloat_optab;
15231 hw_convert = hw_conversions[kf_or_tf].from_di_uns;
15232 }
15233 else
15234 {
15235 cvt = sfloat_optab;
15236 hw_convert = hw_conversions[kf_or_tf].from_di_sign;
15237 }
15238 break;
15239
15240 default:
15241 gcc_unreachable ();
15242 }
15243 }
15244
15245 /* Convert from IEEE 128-bit floating point. */
15246 else if (FLOAT128_IEEE_P (src_mode))
15247 {
15248 if (src_mode == KFmode)
15249 kf_or_tf = 0;
15250 else if (src_mode == TFmode)
15251 kf_or_tf = 1;
15252 else
15253 gcc_unreachable ();
15254
15255 switch (dest_mode)
15256 {
15257 case E_DFmode:
15258 cvt = trunc_optab;
15259 hw_convert = hw_conversions[kf_or_tf].to_df;
15260 break;
15261
15262 case E_SFmode:
15263 cvt = trunc_optab;
15264 hw_convert = hw_conversions[kf_or_tf].to_sf;
15265 break;
15266
15267 case E_KFmode:
15268 case E_IFmode:
15269 case E_TFmode:
15270 if (FLOAT128_IBM_P (dest_mode))
15271 cvt = trunc_optab;
15272 else
15273 do_move = true;
15274 break;
15275
15276 case E_SImode:
15277 if (unsigned_p)
15278 {
15279 cvt = ufix_optab;
15280 hw_convert = hw_conversions[kf_or_tf].to_si_uns;
15281 }
15282 else
15283 {
15284 cvt = sfix_optab;
15285 hw_convert = hw_conversions[kf_or_tf].to_si_sign;
15286 }
15287 break;
15288
15289 case E_DImode:
15290 if (unsigned_p)
15291 {
15292 cvt = ufix_optab;
15293 hw_convert = hw_conversions[kf_or_tf].to_di_uns;
15294 }
15295 else
15296 {
15297 cvt = sfix_optab;
15298 hw_convert = hw_conversions[kf_or_tf].to_di_sign;
15299 }
15300 break;
15301
15302 default:
15303 gcc_unreachable ();
15304 }
15305 }
15306
15307 /* Both IBM format. */
15308 else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode))
15309 do_move = true;
15310
15311 else
15312 gcc_unreachable ();
15313
15314 /* Handle conversion between TFmode/KFmode/IFmode. */
15315 if (do_move)
15316 emit_insn (gen_rtx_SET (dest, gen_rtx_FLOAT_EXTEND (dest_mode, src)));
15317
15318 /* Handle conversion if we have hardware support. */
15319 else if (TARGET_FLOAT128_HW && hw_convert)
15320 emit_insn ((hw_convert) (dest, src));
15321
15322 /* Call an external function to do the conversion. */
15323 else if (cvt != unknown_optab)
15324 {
15325 libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
15326 gcc_assert (libfunc != NULL_RTX);
15327
15328 dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode,
15329 src, src_mode);
15330
15331 gcc_assert (dest2 != NULL_RTX);
15332 if (!rtx_equal_p (dest, dest2))
15333 emit_move_insn (dest, dest2);
15334 }
15335
15336 else
15337 gcc_unreachable ();
15338
15339 return;
15340 }
15341
15342 \f
15343 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
15344 can be used as that dest register. Return the dest register. */
15345
15346 rtx
15347 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
15348 {
15349 if (op2 == const0_rtx)
15350 return op1;
15351
15352 if (GET_CODE (scratch) == SCRATCH)
15353 scratch = gen_reg_rtx (mode);
15354
15355 if (logical_operand (op2, mode))
15356 emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
15357 else
15358 emit_insn (gen_rtx_SET (scratch,
15359 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
15360
15361 return scratch;
15362 }
15363
15364 /* Emit code doing a cror of two CR bits, for FP comparisons with a CODE that
15365 requires this. The result is mode MODE. */
15366 rtx
15367 rs6000_emit_fp_cror (rtx_code code, machine_mode mode, rtx x)
15368 {
15369 rtx cond[2];
15370 int n = 0;
15371 if (code == LTGT || code == LE || code == UNLT)
15372 cond[n++] = gen_rtx_fmt_ee (LT, mode, x, const0_rtx);
15373 if (code == LTGT || code == GE || code == UNGT)
15374 cond[n++] = gen_rtx_fmt_ee (GT, mode, x, const0_rtx);
15375 if (code == LE || code == GE || code == UNEQ)
15376 cond[n++] = gen_rtx_fmt_ee (EQ, mode, x, const0_rtx);
15377 if (code == UNLT || code == UNGT || code == UNEQ)
15378 cond[n++] = gen_rtx_fmt_ee (UNORDERED, mode, x, const0_rtx);
15379
15380 gcc_assert (n == 2);
15381
15382 rtx cc = gen_reg_rtx (CCEQmode);
15383 rtx logical = gen_rtx_IOR (mode, cond[0], cond[1]);
15384 emit_insn (gen_cceq_ior_compare (mode, cc, logical, cond[0], x, cond[1], x));
15385
15386 return cc;
15387 }
15388
15389 void
15390 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
15391 {
15392 rtx condition_rtx = rs6000_generate_compare (operands[1], mode);
15393 rtx_code cond_code = GET_CODE (condition_rtx);
15394
15395 if (FLOAT_MODE_P (mode) && HONOR_NANS (mode)
15396 && !(FLOAT128_VECTOR_P (mode) && !TARGET_FLOAT128_HW))
15397 ;
15398 else if (cond_code == NE
15399 || cond_code == GE || cond_code == LE
15400 || cond_code == GEU || cond_code == LEU
15401 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
15402 {
15403 rtx not_result = gen_reg_rtx (CCEQmode);
15404 rtx not_op, rev_cond_rtx;
15405 machine_mode cc_mode;
15406
15407 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
15408
15409 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
15410 SImode, XEXP (condition_rtx, 0), const0_rtx);
15411 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
15412 emit_insn (gen_rtx_SET (not_result, not_op));
15413 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
15414 }
15415
15416 machine_mode op_mode = GET_MODE (XEXP (operands[1], 0));
15417 if (op_mode == VOIDmode)
15418 op_mode = GET_MODE (XEXP (operands[1], 1));
15419
15420 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
15421 {
15422 PUT_MODE (condition_rtx, DImode);
15423 convert_move (operands[0], condition_rtx, 0);
15424 }
15425 else
15426 {
15427 PUT_MODE (condition_rtx, SImode);
15428 emit_insn (gen_rtx_SET (operands[0], condition_rtx));
15429 }
15430 }
15431
15432 /* Emit a branch of kind CODE to location LOC. */
15433
15434 void
15435 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
15436 {
15437 rtx condition_rtx = rs6000_generate_compare (operands[0], mode);
15438 rtx loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
15439 rtx ite = gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx, loc_ref, pc_rtx);
15440 emit_jump_insn (gen_rtx_SET (pc_rtx, ite));
15441 }
15442
15443 /* Return the string to output a conditional branch to LABEL, which is
15444 the operand template of the label, or NULL if the branch is really a
15445 conditional return.
15446
15447 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
15448 condition code register and its mode specifies what kind of
15449 comparison we made.
15450
15451 REVERSED is nonzero if we should reverse the sense of the comparison.
15452
15453 INSN is the insn. */
15454
15455 char *
15456 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
15457 {
15458 static char string[64];
15459 enum rtx_code code = GET_CODE (op);
15460 rtx cc_reg = XEXP (op, 0);
15461 machine_mode mode = GET_MODE (cc_reg);
15462 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
15463 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
15464 int really_reversed = reversed ^ need_longbranch;
15465 char *s = string;
15466 const char *ccode;
15467 const char *pred;
15468 rtx note;
15469
15470 validate_condition_mode (code, mode);
15471
15472 /* Work out which way this really branches. We could use
15473 reverse_condition_maybe_unordered here always but this
15474 makes the resulting assembler clearer. */
15475 if (really_reversed)
15476 {
15477 /* Reversal of FP compares takes care -- an ordered compare
15478 becomes an unordered compare and vice versa. */
15479 if (mode == CCFPmode)
15480 code = reverse_condition_maybe_unordered (code);
15481 else
15482 code = reverse_condition (code);
15483 }
15484
15485 switch (code)
15486 {
15487 /* Not all of these are actually distinct opcodes, but
15488 we distinguish them for clarity of the resulting assembler. */
15489 case NE: case LTGT:
15490 ccode = "ne"; break;
15491 case EQ: case UNEQ:
15492 ccode = "eq"; break;
15493 case GE: case GEU:
15494 ccode = "ge"; break;
15495 case GT: case GTU: case UNGT:
15496 ccode = "gt"; break;
15497 case LE: case LEU:
15498 ccode = "le"; break;
15499 case LT: case LTU: case UNLT:
15500 ccode = "lt"; break;
15501 case UNORDERED: ccode = "un"; break;
15502 case ORDERED: ccode = "nu"; break;
15503 case UNGE: ccode = "nl"; break;
15504 case UNLE: ccode = "ng"; break;
15505 default:
15506 gcc_unreachable ();
15507 }
15508
15509 /* Maybe we have a guess as to how likely the branch is. */
15510 pred = "";
15511 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
15512 if (note != NULL_RTX)
15513 {
15514 /* PROB is the difference from 50%. */
15515 int prob = profile_probability::from_reg_br_prob_note (XINT (note, 0))
15516 .to_reg_br_prob_base () - REG_BR_PROB_BASE / 2;
15517
15518 /* Only hint for highly probable/improbable branches on newer cpus when
15519 we have real profile data, as static prediction overrides processor
15520 dynamic prediction. For older cpus we may as well always hint, but
15521 assume not taken for branches that are very close to 50% as a
15522 mispredicted taken branch is more expensive than a
15523 mispredicted not-taken branch. */
15524 if (rs6000_always_hint
15525 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
15526 && (profile_status_for_fn (cfun) != PROFILE_GUESSED)
15527 && br_prob_note_reliable_p (note)))
15528 {
15529 if (abs (prob) > REG_BR_PROB_BASE / 20
15530 && ((prob > 0) ^ need_longbranch))
15531 pred = "+";
15532 else
15533 pred = "-";
15534 }
15535 }
15536
15537 if (label == NULL)
15538 s += sprintf (s, "b%slr%s ", ccode, pred);
15539 else
15540 s += sprintf (s, "b%s%s ", ccode, pred);
15541
15542 /* We need to escape any '%' characters in the reg_names string.
15543 Assume they'd only be the first character.... */
15544 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
15545 *s++ = '%';
15546 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
15547
15548 if (label != NULL)
15549 {
15550 /* If the branch distance was too far, we may have to use an
15551 unconditional branch to go the distance. */
15552 if (need_longbranch)
15553 s += sprintf (s, ",$+8\n\tb %s", label);
15554 else
15555 s += sprintf (s, ",%s", label);
15556 }
15557
15558 return string;
15559 }
15560
15561 /* Return insn for VSX or Altivec comparisons. */
15562
15563 static rtx
15564 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
15565 {
15566 rtx mask;
15567 machine_mode mode = GET_MODE (op0);
15568
15569 switch (code)
15570 {
15571 default:
15572 break;
15573
15574 case GE:
15575 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15576 return NULL_RTX;
15577 /* FALLTHRU */
15578
15579 case EQ:
15580 case GT:
15581 case GTU:
15582 case ORDERED:
15583 case UNORDERED:
15584 case UNEQ:
15585 case LTGT:
15586 mask = gen_reg_rtx (mode);
15587 emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
15588 return mask;
15589 }
15590
15591 return NULL_RTX;
15592 }
15593
15594 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
15595 DMODE is expected destination mode. This is a recursive function. */
15596
15597 static rtx
15598 rs6000_emit_vector_compare (enum rtx_code rcode,
15599 rtx op0, rtx op1,
15600 machine_mode dmode)
15601 {
15602 rtx mask;
15603 bool swap_operands = false;
15604 bool try_again = false;
15605
15606 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
15607 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
15608
15609 /* See if the comparison works as is. */
15610 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
15611 if (mask)
15612 return mask;
15613
15614 switch (rcode)
15615 {
15616 case LT:
15617 rcode = GT;
15618 swap_operands = true;
15619 try_again = true;
15620 break;
15621 case LTU:
15622 rcode = GTU;
15623 swap_operands = true;
15624 try_again = true;
15625 break;
15626 case NE:
15627 case UNLE:
15628 case UNLT:
15629 case UNGE:
15630 case UNGT:
15631 /* Invert condition and try again.
15632 e.g., A != B becomes ~(A==B). */
15633 {
15634 enum rtx_code rev_code;
15635 enum insn_code nor_code;
15636 rtx mask2;
15637
15638 rev_code = reverse_condition_maybe_unordered (rcode);
15639 if (rev_code == UNKNOWN)
15640 return NULL_RTX;
15641
15642 nor_code = optab_handler (one_cmpl_optab, dmode);
15643 if (nor_code == CODE_FOR_nothing)
15644 return NULL_RTX;
15645
15646 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
15647 if (!mask2)
15648 return NULL_RTX;
15649
15650 mask = gen_reg_rtx (dmode);
15651 emit_insn (GEN_FCN (nor_code) (mask, mask2));
15652 return mask;
15653 }
15654 break;
15655 case GE:
15656 case GEU:
15657 case LE:
15658 case LEU:
15659 /* Try GT/GTU/LT/LTU OR EQ */
15660 {
15661 rtx c_rtx, eq_rtx;
15662 enum insn_code ior_code;
15663 enum rtx_code new_code;
15664
15665 switch (rcode)
15666 {
15667 case GE:
15668 new_code = GT;
15669 break;
15670
15671 case GEU:
15672 new_code = GTU;
15673 break;
15674
15675 case LE:
15676 new_code = LT;
15677 break;
15678
15679 case LEU:
15680 new_code = LTU;
15681 break;
15682
15683 default:
15684 gcc_unreachable ();
15685 }
15686
15687 ior_code = optab_handler (ior_optab, dmode);
15688 if (ior_code == CODE_FOR_nothing)
15689 return NULL_RTX;
15690
15691 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
15692 if (!c_rtx)
15693 return NULL_RTX;
15694
15695 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
15696 if (!eq_rtx)
15697 return NULL_RTX;
15698
15699 mask = gen_reg_rtx (dmode);
15700 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
15701 return mask;
15702 }
15703 break;
15704 default:
15705 return NULL_RTX;
15706 }
15707
15708 if (try_again)
15709 {
15710 if (swap_operands)
15711 std::swap (op0, op1);
15712
15713 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
15714 if (mask)
15715 return mask;
15716 }
15717
15718 /* You only get two chances. */
15719 return NULL_RTX;
15720 }
15721
15722 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
15723 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
15724 operands for the relation operation COND. */
15725
15726 int
15727 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
15728 rtx cond, rtx cc_op0, rtx cc_op1)
15729 {
15730 machine_mode dest_mode = GET_MODE (dest);
15731 machine_mode mask_mode = GET_MODE (cc_op0);
15732 enum rtx_code rcode = GET_CODE (cond);
15733 rtx mask;
15734 bool invert_move = false;
15735
15736 if (VECTOR_UNIT_NONE_P (dest_mode))
15737 return 0;
15738
15739 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
15740 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
15741
15742 switch (rcode)
15743 {
15744 /* Swap operands if we can, and fall back to doing the operation as
15745 specified, and doing a NOR to invert the test. */
15746 case NE:
15747 case UNLE:
15748 case UNLT:
15749 case UNGE:
15750 case UNGT:
15751 /* Invert condition and try again.
15752 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
15753 invert_move = true;
15754 rcode = reverse_condition_maybe_unordered (rcode);
15755 if (rcode == UNKNOWN)
15756 return 0;
15757 break;
15758
15759 case GE:
15760 case LE:
15761 if (GET_MODE_CLASS (mask_mode) == MODE_VECTOR_INT)
15762 {
15763 /* Invert condition to avoid compound test. */
15764 invert_move = true;
15765 rcode = reverse_condition (rcode);
15766 }
15767 break;
15768
15769 case GTU:
15770 case GEU:
15771 case LTU:
15772 case LEU:
15773
15774 /* Invert condition to avoid compound test if necessary. */
15775 if (rcode == GEU || rcode == LEU)
15776 {
15777 invert_move = true;
15778 rcode = reverse_condition (rcode);
15779 }
15780 break;
15781
15782 default:
15783 break;
15784 }
15785
15786 /* Get the vector mask for the given relational operations. */
15787 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
15788
15789 if (!mask)
15790 return 0;
15791
15792 if (mask_mode != dest_mode)
15793 mask = simplify_gen_subreg (dest_mode, mask, mask_mode, 0);
15794
15795 if (invert_move)
15796 std::swap (op_true, op_false);
15797
15798 /* Optimize vec1 == vec2, to know the mask generates -1/0. */
15799 if (GET_MODE_CLASS (dest_mode) == MODE_VECTOR_INT
15800 && (GET_CODE (op_true) == CONST_VECTOR
15801 || GET_CODE (op_false) == CONST_VECTOR))
15802 {
15803 rtx constant_0 = CONST0_RTX (dest_mode);
15804 rtx constant_m1 = CONSTM1_RTX (dest_mode);
15805
15806 if (op_true == constant_m1 && op_false == constant_0)
15807 {
15808 emit_move_insn (dest, mask);
15809 return 1;
15810 }
15811
15812 else if (op_true == constant_0 && op_false == constant_m1)
15813 {
15814 emit_insn (gen_rtx_SET (dest, gen_rtx_NOT (dest_mode, mask)));
15815 return 1;
15816 }
15817
15818 /* If we can't use the vector comparison directly, perhaps we can use
15819 the mask for the true or false fields, instead of loading up a
15820 constant. */
15821 if (op_true == constant_m1)
15822 op_true = mask;
15823
15824 if (op_false == constant_0)
15825 op_false = mask;
15826 }
15827
15828 if (!REG_P (op_true) && !SUBREG_P (op_true))
15829 op_true = force_reg (dest_mode, op_true);
15830
15831 if (!REG_P (op_false) && !SUBREG_P (op_false))
15832 op_false = force_reg (dest_mode, op_false);
15833
15834 rtx tmp = gen_rtx_IOR (dest_mode,
15835 gen_rtx_AND (dest_mode, gen_rtx_NOT (dest_mode, mask),
15836 op_false),
15837 gen_rtx_AND (dest_mode, mask, op_true));
15838 emit_insn (gen_rtx_SET (dest, tmp));
15839 return 1;
15840 }
15841
15842 /* Possibly emit the xsmaxc{dp,qp} and xsminc{dp,qp} instructions to emit a
15843 maximum or minimum with "C" semantics.
15844
15845 Unless you use -ffast-math, you can't use these instructions to replace
15846 conditions that implicitly reverse the condition because the comparison
15847 might generate a NaN or signed zer0.
15848
15849 I.e. the following can be replaced all of the time
15850 ret = (op1 > op2) ? op1 : op2 ; generate xsmaxcdp
15851 ret = (op1 >= op2) ? op1 : op2 ; generate xsmaxcdp
15852 ret = (op1 < op2) ? op1 : op2; ; generate xsmincdp
15853 ret = (op1 <= op2) ? op1 : op2; ; generate xsmincdp
15854
15855 The following can be replaced only if -ffast-math is used:
15856 ret = (op1 < op2) ? op2 : op1 ; generate xsmaxcdp
15857 ret = (op1 <= op2) ? op2 : op1 ; generate xsmaxcdp
15858 ret = (op1 > op2) ? op2 : op1; ; generate xsmincdp
15859 ret = (op1 >= op2) ? op2 : op1; ; generate xsmincdp
15860
15861 Move TRUE_COND to DEST if OP of the operands of the last comparison is
15862 nonzero/true, FALSE_COND if it is zero/false.
15863
15864 Return false if we can't generate the appropriate minimum or maximum, and
15865 true if we can did the minimum or maximum. */
15866
15867 static bool
15868 rs6000_maybe_emit_maxc_minc (rtx dest, rtx op, rtx true_cond, rtx false_cond)
15869 {
15870 enum rtx_code code = GET_CODE (op);
15871 rtx op0 = XEXP (op, 0);
15872 rtx op1 = XEXP (op, 1);
15873 machine_mode compare_mode = GET_MODE (op0);
15874 machine_mode result_mode = GET_MODE (dest);
15875
15876 if (result_mode != compare_mode)
15877 return false;
15878
15879 /* See the comments of this function, it simply expects GE/GT/LE/LT in
15880 the checks, but for the reversible equivalent UNLT/UNLE/UNGT/UNGE,
15881 we need to do the reversions first to make the following checks
15882 support fewer cases, like:
15883
15884 (a UNLT b) ? op1 : op2 => (a >= b) ? op2 : op1;
15885 (a UNLE b) ? op1 : op2 => (a > b) ? op2 : op1;
15886 (a UNGT b) ? op1 : op2 => (a <= b) ? op2 : op1;
15887 (a UNGE b) ? op1 : op2 => (a < b) ? op2 : op1;
15888
15889 By the way, if we see these UNLT/UNLE/UNGT/UNGE it's guaranteed
15890 that we have 4-way condition codes (LT/GT/EQ/UN), so we do not
15891 have to check for fast-math or the like. */
15892 if (code == UNGE || code == UNGT || code == UNLE || code == UNLT)
15893 {
15894 code = reverse_condition_maybe_unordered (code);
15895 std::swap (true_cond, false_cond);
15896 }
15897
15898 bool max_p;
15899 if (code == GE || code == GT)
15900 max_p = true;
15901 else if (code == LE || code == LT)
15902 max_p = false;
15903 else
15904 return false;
15905
15906 if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond))
15907 ;
15908
15909 /* Only when NaNs and signed-zeros are not in effect, smax could be
15910 used for `op0 < op1 ? op1 : op0`, and smin could be used for
15911 `op0 > op1 ? op1 : op0`. */
15912 else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond)
15913 && !HONOR_NANS (compare_mode) && !HONOR_SIGNED_ZEROS (compare_mode))
15914 max_p = !max_p;
15915
15916 else
15917 return false;
15918
15919 rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1);
15920 return true;
15921 }
15922
15923 /* Possibly emit a floating point conditional move by generating a compare that
15924 sets a mask instruction and a XXSEL select instruction.
15925
15926 Move TRUE_COND to DEST if OP of the operands of the last comparison is
15927 nonzero/true, FALSE_COND if it is zero/false.
15928
15929 Return false if the operation cannot be generated, and true if we could
15930 generate the instruction. */
15931
15932 static bool
15933 rs6000_maybe_emit_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
15934 {
15935 enum rtx_code code = GET_CODE (op);
15936 rtx op0 = XEXP (op, 0);
15937 rtx op1 = XEXP (op, 1);
15938 machine_mode compare_mode = GET_MODE (op0);
15939 machine_mode result_mode = GET_MODE (dest);
15940 rtx compare_rtx;
15941 rtx cmove_rtx;
15942 rtx clobber_rtx;
15943
15944 if (!can_create_pseudo_p ())
15945 return 0;
15946
15947 /* We allow the comparison to be either SFmode/DFmode and the true/false
15948 condition to be either SFmode/DFmode. I.e. we allow:
15949
15950 float a, b;
15951 double c, d, r;
15952
15953 r = (a == b) ? c : d;
15954
15955 and:
15956
15957 double a, b;
15958 float c, d, r;
15959
15960 r = (a == b) ? c : d;
15961
15962 but we don't allow intermixing the IEEE 128-bit floating point types with
15963 the 32/64-bit scalar types. */
15964
15965 if (!(compare_mode == result_mode
15966 || (compare_mode == SFmode && result_mode == DFmode)
15967 || (compare_mode == DFmode && result_mode == SFmode)))
15968 return false;
15969
15970 switch (code)
15971 {
15972 case EQ:
15973 case GE:
15974 case GT:
15975 break;
15976
15977 case NE:
15978 case LT:
15979 case LE:
15980 code = swap_condition (code);
15981 std::swap (op0, op1);
15982 break;
15983
15984 default:
15985 return false;
15986 }
15987
15988 /* Generate: [(parallel [(set (dest)
15989 (if_then_else (op (cmp1) (cmp2))
15990 (true)
15991 (false)))
15992 (clobber (scratch))])]. */
15993
15994 compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1);
15995 cmove_rtx = gen_rtx_SET (dest,
15996 gen_rtx_IF_THEN_ELSE (result_mode,
15997 compare_rtx,
15998 true_cond,
15999 false_cond));
16000
16001 clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode));
16002 emit_insn (gen_rtx_PARALLEL (VOIDmode,
16003 gen_rtvec (2, cmove_rtx, clobber_rtx)));
16004
16005 return true;
16006 }
16007
16008 /* Helper function to return true if the target has instructions to do a
16009 compare and set mask instruction that can be used with XXSEL to implement a
16010 conditional move. It is also assumed that such a target also supports the
16011 "C" minimum and maximum instructions. */
16012
16013 static bool
16014 have_compare_and_set_mask (machine_mode mode)
16015 {
16016 switch (mode)
16017 {
16018 case E_SFmode:
16019 case E_DFmode:
16020 return TARGET_P9_MINMAX;
16021
16022 case E_KFmode:
16023 case E_TFmode:
16024 return TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode);
16025
16026 default:
16027 break;
16028 }
16029
16030 return false;
16031 }
16032
16033 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
16034 operands of the last comparison is nonzero/true, FALSE_COND if it
16035 is zero/false. Return 0 if the hardware has no such operation. */
16036
16037 bool
16038 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
16039 {
16040 enum rtx_code code = GET_CODE (op);
16041 rtx op0 = XEXP (op, 0);
16042 rtx op1 = XEXP (op, 1);
16043 machine_mode compare_mode = GET_MODE (op0);
16044 machine_mode result_mode = GET_MODE (dest);
16045 rtx temp;
16046 bool is_against_zero;
16047
16048 /* These modes should always match. */
16049 if (GET_MODE (op1) != compare_mode
16050 /* In the isel case however, we can use a compare immediate, so
16051 op1 may be a small constant. */
16052 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
16053 return false;
16054 if (GET_MODE (true_cond) != result_mode)
16055 return false;
16056 if (GET_MODE (false_cond) != result_mode)
16057 return false;
16058
16059 /* See if we can use the "C" minimum, "C" maximum, and compare and set mask
16060 instructions. */
16061 if (have_compare_and_set_mask (compare_mode)
16062 && have_compare_and_set_mask (result_mode))
16063 {
16064 if (rs6000_maybe_emit_maxc_minc (dest, op, true_cond, false_cond))
16065 return true;
16066
16067 if (rs6000_maybe_emit_fp_cmove (dest, op, true_cond, false_cond))
16068 return true;
16069 }
16070
16071 /* Don't allow using floating point comparisons for integer results for
16072 now. */
16073 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
16074 return false;
16075
16076 /* First, work out if the hardware can do this at all, or
16077 if it's too slow.... */
16078 if (!FLOAT_MODE_P (compare_mode))
16079 {
16080 if (TARGET_ISEL)
16081 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
16082 return false;
16083 }
16084
16085 is_against_zero = op1 == CONST0_RTX (compare_mode);
16086
16087 /* A floating-point subtract might overflow, underflow, or produce
16088 an inexact result, thus changing the floating-point flags, so it
16089 can't be generated if we care about that. It's safe if one side
16090 of the construct is zero, since then no subtract will be
16091 generated. */
16092 if (SCALAR_FLOAT_MODE_P (compare_mode)
16093 && flag_trapping_math && ! is_against_zero)
16094 return false;
16095
16096 /* Eliminate half of the comparisons by switching operands, this
16097 makes the remaining code simpler. */
16098 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
16099 || code == LTGT || code == LT || code == UNLE)
16100 {
16101 code = reverse_condition_maybe_unordered (code);
16102 temp = true_cond;
16103 true_cond = false_cond;
16104 false_cond = temp;
16105 }
16106
16107 /* UNEQ and LTGT take four instructions for a comparison with zero,
16108 it'll probably be faster to use a branch here too. */
16109 if (code == UNEQ && HONOR_NANS (compare_mode))
16110 return false;
16111
16112 /* We're going to try to implement comparisons by performing
16113 a subtract, then comparing against zero. Unfortunately,
16114 Inf - Inf is NaN which is not zero, and so if we don't
16115 know that the operand is finite and the comparison
16116 would treat EQ different to UNORDERED, we can't do it. */
16117 if (HONOR_INFINITIES (compare_mode)
16118 && code != GT && code != UNGE
16119 && (!CONST_DOUBLE_P (op1)
16120 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1)))
16121 /* Constructs of the form (a OP b ? a : b) are safe. */
16122 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
16123 || (! rtx_equal_p (op0, true_cond)
16124 && ! rtx_equal_p (op1, true_cond))))
16125 return false;
16126
16127 /* At this point we know we can use fsel. */
16128
16129 /* Don't allow compare_mode other than SFmode or DFmode, for others there
16130 is no fsel instruction. */
16131 if (compare_mode != SFmode && compare_mode != DFmode)
16132 return false;
16133
16134 /* Reduce the comparison to a comparison against zero. */
16135 if (! is_against_zero)
16136 {
16137 temp = gen_reg_rtx (compare_mode);
16138 emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
16139 op0 = temp;
16140 op1 = CONST0_RTX (compare_mode);
16141 }
16142
16143 /* If we don't care about NaNs we can reduce some of the comparisons
16144 down to faster ones. */
16145 if (! HONOR_NANS (compare_mode))
16146 switch (code)
16147 {
16148 case GT:
16149 code = LE;
16150 temp = true_cond;
16151 true_cond = false_cond;
16152 false_cond = temp;
16153 break;
16154 case UNGE:
16155 code = GE;
16156 break;
16157 case UNEQ:
16158 code = EQ;
16159 break;
16160 default:
16161 break;
16162 }
16163
16164 /* Now, reduce everything down to a GE. */
16165 switch (code)
16166 {
16167 case GE:
16168 break;
16169
16170 case LE:
16171 temp = gen_reg_rtx (compare_mode);
16172 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
16173 op0 = temp;
16174 break;
16175
16176 case ORDERED:
16177 temp = gen_reg_rtx (compare_mode);
16178 emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
16179 op0 = temp;
16180 break;
16181
16182 case EQ:
16183 temp = gen_reg_rtx (compare_mode);
16184 emit_insn (gen_rtx_SET (temp,
16185 gen_rtx_NEG (compare_mode,
16186 gen_rtx_ABS (compare_mode, op0))));
16187 op0 = temp;
16188 break;
16189
16190 case UNGE:
16191 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
16192 temp = gen_reg_rtx (result_mode);
16193 emit_insn (gen_rtx_SET (temp,
16194 gen_rtx_IF_THEN_ELSE (result_mode,
16195 gen_rtx_GE (VOIDmode,
16196 op0, op1),
16197 true_cond, false_cond)));
16198 false_cond = true_cond;
16199 true_cond = temp;
16200
16201 temp = gen_reg_rtx (compare_mode);
16202 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
16203 op0 = temp;
16204 break;
16205
16206 case GT:
16207 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
16208 temp = gen_reg_rtx (result_mode);
16209 emit_insn (gen_rtx_SET (temp,
16210 gen_rtx_IF_THEN_ELSE (result_mode,
16211 gen_rtx_GE (VOIDmode,
16212 op0, op1),
16213 true_cond, false_cond)));
16214 true_cond = false_cond;
16215 false_cond = temp;
16216
16217 temp = gen_reg_rtx (compare_mode);
16218 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
16219 op0 = temp;
16220 break;
16221
16222 default:
16223 gcc_unreachable ();
16224 }
16225
16226 emit_insn (gen_rtx_SET (dest,
16227 gen_rtx_IF_THEN_ELSE (result_mode,
16228 gen_rtx_GE (VOIDmode,
16229 op0, op1),
16230 true_cond, false_cond)));
16231 return true;
16232 }
16233
16234 /* Same as above, but for ints (isel). */
16235
16236 bool
16237 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
16238 {
16239 rtx condition_rtx, cr;
16240 machine_mode mode = GET_MODE (dest);
16241 enum rtx_code cond_code;
16242 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
16243 bool signedp;
16244
16245 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
16246 return false;
16247
16248 /* PR104335: We now need to expect CC-mode "comparisons"
16249 coming from ifcvt. The following code expects proper
16250 comparisons so better abort here. */
16251 if (GET_MODE_CLASS (GET_MODE (XEXP (op, 0))) == MODE_CC)
16252 return false;
16253
16254 /* We still have to do the compare, because isel doesn't do a
16255 compare, it just looks at the CRx bits set by a previous compare
16256 instruction. */
16257 condition_rtx = rs6000_generate_compare (op, mode);
16258 cond_code = GET_CODE (condition_rtx);
16259 cr = XEXP (condition_rtx, 0);
16260 signedp = GET_MODE (cr) == CCmode;
16261
16262 isel_func = (mode == SImode
16263 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
16264 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
16265
16266 switch (cond_code)
16267 {
16268 case LT: case GT: case LTU: case GTU: case EQ:
16269 /* isel handles these directly. */
16270 break;
16271
16272 default:
16273 /* We need to swap the sense of the comparison. */
16274 {
16275 std::swap (false_cond, true_cond);
16276 PUT_CODE (condition_rtx, reverse_condition (cond_code));
16277 }
16278 break;
16279 }
16280
16281 false_cond = force_reg (mode, false_cond);
16282 if (true_cond != const0_rtx)
16283 true_cond = force_reg (mode, true_cond);
16284
16285 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
16286
16287 return true;
16288 }
16289
16290 void
16291 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
16292 {
16293 machine_mode mode = GET_MODE (op0);
16294 enum rtx_code c;
16295 rtx target;
16296
16297 /* VSX/altivec have direct min/max insns. */
16298 if ((code == SMAX || code == SMIN)
16299 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
16300 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))
16301 || (TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode))))
16302 {
16303 emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
16304 return;
16305 }
16306
16307 if (code == SMAX || code == SMIN)
16308 c = GE;
16309 else
16310 c = GEU;
16311
16312 if (code == SMAX || code == UMAX)
16313 target = emit_conditional_move (dest, { c, op0, op1, mode },
16314 op0, op1, mode, 0);
16315 else
16316 target = emit_conditional_move (dest, { c, op0, op1, mode },
16317 op1, op0, mode, 0);
16318 gcc_assert (target);
16319 if (target != dest)
16320 emit_move_insn (dest, target);
16321 }
16322
16323 /* A subroutine of the atomic operation splitters. Jump to LABEL if
16324 COND is true. Mark the jump as unlikely to be taken. */
16325
16326 static void
16327 emit_unlikely_jump (rtx cond, rtx label)
16328 {
16329 rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
16330 rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
16331 add_reg_br_prob_note (insn, profile_probability::very_unlikely ());
16332 }
16333
16334 /* A subroutine of the atomic operation splitters. Emit a load-locked
16335 instruction in MODE. For QI/HImode, possibly use a pattern than includes
16336 the zero_extend operation. */
16337
16338 static void
16339 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
16340 {
16341 rtx (*fn) (rtx, rtx) = NULL;
16342
16343 switch (mode)
16344 {
16345 case E_QImode:
16346 fn = gen_load_lockedqi;
16347 break;
16348 case E_HImode:
16349 fn = gen_load_lockedhi;
16350 break;
16351 case E_SImode:
16352 if (GET_MODE (mem) == QImode)
16353 fn = gen_load_lockedqi_si;
16354 else if (GET_MODE (mem) == HImode)
16355 fn = gen_load_lockedhi_si;
16356 else
16357 fn = gen_load_lockedsi;
16358 break;
16359 case E_DImode:
16360 fn = gen_load_lockeddi;
16361 break;
16362 case E_TImode:
16363 fn = gen_load_lockedti;
16364 break;
16365 default:
16366 gcc_unreachable ();
16367 }
16368 emit_insn (fn (reg, mem));
16369 }
16370
16371 /* A subroutine of the atomic operation splitters. Emit a store-conditional
16372 instruction in MODE. */
16373
16374 static void
16375 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
16376 {
16377 rtx (*fn) (rtx, rtx, rtx) = NULL;
16378
16379 switch (mode)
16380 {
16381 case E_QImode:
16382 fn = gen_store_conditionalqi;
16383 break;
16384 case E_HImode:
16385 fn = gen_store_conditionalhi;
16386 break;
16387 case E_SImode:
16388 fn = gen_store_conditionalsi;
16389 break;
16390 case E_DImode:
16391 fn = gen_store_conditionaldi;
16392 break;
16393 case E_TImode:
16394 fn = gen_store_conditionalti;
16395 break;
16396 default:
16397 gcc_unreachable ();
16398 }
16399
16400 /* Emit sync before stwcx. to address PPC405 Erratum. */
16401 if (PPC405_ERRATUM77)
16402 emit_insn (gen_hwsync ());
16403
16404 emit_insn (fn (res, mem, val));
16405 }
16406
16407 /* Expand barriers before and after a load_locked/store_cond sequence. */
16408
16409 static rtx
16410 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
16411 {
16412 rtx addr = XEXP (mem, 0);
16413
16414 if (!legitimate_indirect_address_p (addr, reload_completed)
16415 && !legitimate_indexed_address_p (addr, reload_completed))
16416 {
16417 addr = force_reg (Pmode, addr);
16418 mem = replace_equiv_address_nv (mem, addr);
16419 }
16420
16421 switch (model)
16422 {
16423 case MEMMODEL_RELAXED:
16424 case MEMMODEL_CONSUME:
16425 case MEMMODEL_ACQUIRE:
16426 break;
16427 case MEMMODEL_RELEASE:
16428 case MEMMODEL_ACQ_REL:
16429 emit_insn (gen_lwsync ());
16430 break;
16431 case MEMMODEL_SEQ_CST:
16432 emit_insn (gen_hwsync ());
16433 break;
16434 default:
16435 gcc_unreachable ();
16436 }
16437 return mem;
16438 }
16439
16440 static void
16441 rs6000_post_atomic_barrier (enum memmodel model)
16442 {
16443 switch (model)
16444 {
16445 case MEMMODEL_RELAXED:
16446 case MEMMODEL_CONSUME:
16447 case MEMMODEL_RELEASE:
16448 break;
16449 case MEMMODEL_ACQUIRE:
16450 case MEMMODEL_ACQ_REL:
16451 case MEMMODEL_SEQ_CST:
16452 emit_insn (gen_isync ());
16453 break;
16454 default:
16455 gcc_unreachable ();
16456 }
16457 }
16458
16459 /* A subroutine of the various atomic expanders. For sub-word operations,
16460 we must adjust things to operate on SImode. Given the original MEM,
16461 return a new aligned memory. Also build and return the quantities by
16462 which to shift and mask. */
16463
16464 static rtx
16465 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
16466 {
16467 rtx addr, align, shift, mask, mem;
16468 HOST_WIDE_INT shift_mask;
16469 machine_mode mode = GET_MODE (orig_mem);
16470
16471 /* For smaller modes, we have to implement this via SImode. */
16472 shift_mask = (mode == QImode ? 0x18 : 0x10);
16473
16474 addr = XEXP (orig_mem, 0);
16475 addr = force_reg (GET_MODE (addr), addr);
16476
16477 /* Aligned memory containing subword. Generate a new memory. We
16478 do not want any of the existing MEM_ATTR data, as we're now
16479 accessing memory outside the original object. */
16480 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
16481 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16482 mem = gen_rtx_MEM (SImode, align);
16483 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
16484 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
16485 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
16486
16487 /* Shift amount for subword relative to aligned word. */
16488 shift = gen_reg_rtx (SImode);
16489 addr = gen_lowpart (SImode, addr);
16490 rtx tmp = gen_reg_rtx (SImode);
16491 emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
16492 emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
16493 if (BYTES_BIG_ENDIAN)
16494 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
16495 shift, 1, OPTAB_LIB_WIDEN);
16496 *pshift = shift;
16497
16498 /* Mask for insertion. */
16499 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
16500 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
16501 *pmask = mask;
16502
16503 return mem;
16504 }
16505
16506 /* A subroutine of the various atomic expanders. For sub-word operands,
16507 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
16508
16509 static rtx
16510 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
16511 {
16512 rtx x;
16513
16514 x = gen_reg_rtx (SImode);
16515 emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
16516 gen_rtx_NOT (SImode, mask),
16517 oldval)));
16518
16519 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
16520
16521 return x;
16522 }
16523
16524 /* A subroutine of the various atomic expanders. For sub-word operands,
16525 extract WIDE to NARROW via SHIFT. */
16526
16527 static void
16528 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
16529 {
16530 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
16531 wide, 1, OPTAB_LIB_WIDEN);
16532 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
16533 }
16534
16535 /* Expand an atomic compare and swap operation. */
16536
16537 void
16538 rs6000_expand_atomic_compare_and_swap (rtx operands[])
16539 {
16540 rtx boolval, retval, mem, oldval, newval, cond;
16541 rtx label1, label2, x, mask, shift;
16542 machine_mode mode, orig_mode;
16543 enum memmodel mod_s, mod_f;
16544 bool is_weak;
16545
16546 boolval = operands[0];
16547 retval = operands[1];
16548 mem = operands[2];
16549 oldval = operands[3];
16550 newval = operands[4];
16551 is_weak = (INTVAL (operands[5]) != 0);
16552 mod_s = memmodel_base (INTVAL (operands[6]));
16553 mod_f = memmodel_base (INTVAL (operands[7]));
16554 orig_mode = mode = GET_MODE (mem);
16555
16556 mask = shift = NULL_RTX;
16557 if (mode == QImode || mode == HImode)
16558 {
16559 /* Before power8, we didn't have access to lbarx/lharx, so generate a
16560 lwarx and shift/mask operations. With power8, we need to do the
16561 comparison in SImode, but the store is still done in QI/HImode. */
16562 oldval = convert_modes (SImode, mode, oldval, 1);
16563
16564 if (!TARGET_SYNC_HI_QI)
16565 {
16566 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
16567
16568 /* Shift and mask OLDVAL into position with the word. */
16569 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
16570 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16571
16572 /* Shift and mask NEWVAL into position within the word. */
16573 newval = convert_modes (SImode, mode, newval, 1);
16574 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
16575 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16576 }
16577
16578 /* Prepare to adjust the return value. */
16579 retval = gen_reg_rtx (SImode);
16580 mode = SImode;
16581 }
16582 else if (reg_overlap_mentioned_p (retval, oldval))
16583 oldval = copy_to_reg (oldval);
16584
16585 if (mode != TImode && !reg_or_short_operand (oldval, mode))
16586 oldval = copy_to_mode_reg (mode, oldval);
16587
16588 if (reg_overlap_mentioned_p (retval, newval))
16589 newval = copy_to_reg (newval);
16590
16591 mem = rs6000_pre_atomic_barrier (mem, mod_s);
16592
16593 label1 = NULL_RTX;
16594 if (!is_weak)
16595 {
16596 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
16597 emit_label (XEXP (label1, 0));
16598 }
16599 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
16600
16601 emit_load_locked (mode, retval, mem);
16602
16603 x = retval;
16604 if (mask)
16605 x = expand_simple_binop (SImode, AND, retval, mask,
16606 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16607
16608 cond = gen_reg_rtx (CCmode);
16609 /* If we have TImode, synthesize a comparison. */
16610 if (mode != TImode)
16611 x = gen_rtx_COMPARE (CCmode, x, oldval);
16612 else
16613 {
16614 rtx xor1_result = gen_reg_rtx (DImode);
16615 rtx xor2_result = gen_reg_rtx (DImode);
16616 rtx or_result = gen_reg_rtx (DImode);
16617 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
16618 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
16619 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
16620 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
16621
16622 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
16623 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
16624 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
16625 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
16626 }
16627
16628 emit_insn (gen_rtx_SET (cond, x));
16629
16630 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
16631 emit_unlikely_jump (x, label2);
16632
16633 x = newval;
16634 if (mask)
16635 x = rs6000_mask_atomic_subword (retval, newval, mask);
16636
16637 emit_store_conditional (orig_mode, cond, mem, x);
16638
16639 if (!is_weak)
16640 {
16641 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
16642 emit_unlikely_jump (x, label1);
16643 }
16644
16645 if (!is_mm_relaxed (mod_f))
16646 emit_label (XEXP (label2, 0));
16647
16648 rs6000_post_atomic_barrier (mod_s);
16649
16650 if (is_mm_relaxed (mod_f))
16651 emit_label (XEXP (label2, 0));
16652
16653 if (shift)
16654 rs6000_finish_atomic_subword (operands[1], retval, shift);
16655 else if (mode != GET_MODE (operands[1]))
16656 convert_move (operands[1], retval, 1);
16657
16658 /* In all cases, CR0 contains EQ on success, and NE on failure. */
16659 x = gen_rtx_EQ (SImode, cond, const0_rtx);
16660 emit_insn (gen_rtx_SET (boolval, x));
16661 }
16662
16663 /* Expand an atomic exchange operation. */
16664
16665 void
16666 rs6000_expand_atomic_exchange (rtx operands[])
16667 {
16668 rtx retval, mem, val, cond;
16669 machine_mode mode;
16670 enum memmodel model;
16671 rtx label, x, mask, shift;
16672
16673 retval = operands[0];
16674 mem = operands[1];
16675 val = operands[2];
16676 model = memmodel_base (INTVAL (operands[3]));
16677 mode = GET_MODE (mem);
16678
16679 mask = shift = NULL_RTX;
16680 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
16681 {
16682 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
16683
16684 /* Shift and mask VAL into position with the word. */
16685 val = convert_modes (SImode, mode, val, 1);
16686 val = expand_simple_binop (SImode, ASHIFT, val, shift,
16687 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16688
16689 /* Prepare to adjust the return value. */
16690 retval = gen_reg_rtx (SImode);
16691 mode = SImode;
16692 }
16693
16694 mem = rs6000_pre_atomic_barrier (mem, model);
16695
16696 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
16697 emit_label (XEXP (label, 0));
16698
16699 emit_load_locked (mode, retval, mem);
16700
16701 x = val;
16702 if (mask)
16703 x = rs6000_mask_atomic_subword (retval, val, mask);
16704
16705 cond = gen_reg_rtx (CCmode);
16706 emit_store_conditional (mode, cond, mem, x);
16707
16708 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
16709 emit_unlikely_jump (x, label);
16710
16711 rs6000_post_atomic_barrier (model);
16712
16713 if (shift)
16714 rs6000_finish_atomic_subword (operands[0], retval, shift);
16715 }
16716
16717 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
16718 to perform. MEM is the memory on which to operate. VAL is the second
16719 operand of the binary operator. BEFORE and AFTER are optional locations to
16720 return the value of MEM either before of after the operation. MODEL_RTX
16721 is a CONST_INT containing the memory model to use. */
16722
16723 void
16724 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
16725 rtx orig_before, rtx orig_after, rtx model_rtx)
16726 {
16727 enum memmodel model = memmodel_base (INTVAL (model_rtx));
16728 machine_mode mode = GET_MODE (mem);
16729 machine_mode store_mode = mode;
16730 rtx label, x, cond, mask, shift;
16731 rtx before = orig_before, after = orig_after;
16732
16733 mask = shift = NULL_RTX;
16734 /* On power8, we want to use SImode for the operation. On previous systems,
16735 use the operation in a subword and shift/mask to get the proper byte or
16736 halfword. */
16737 if (mode == QImode || mode == HImode)
16738 {
16739 if (TARGET_SYNC_HI_QI)
16740 {
16741 val = convert_modes (SImode, mode, val, 1);
16742
16743 /* Prepare to adjust the return value. */
16744 before = gen_reg_rtx (SImode);
16745 if (after)
16746 after = gen_reg_rtx (SImode);
16747 mode = SImode;
16748 }
16749 else
16750 {
16751 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
16752
16753 /* Shift and mask VAL into position with the word. */
16754 val = convert_modes (SImode, mode, val, 1);
16755 val = expand_simple_binop (SImode, ASHIFT, val, shift,
16756 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16757
16758 switch (code)
16759 {
16760 case IOR:
16761 case XOR:
16762 /* We've already zero-extended VAL. That is sufficient to
16763 make certain that it does not affect other bits. */
16764 mask = NULL;
16765 break;
16766
16767 case AND:
16768 /* If we make certain that all of the other bits in VAL are
16769 set, that will be sufficient to not affect other bits. */
16770 x = gen_rtx_NOT (SImode, mask);
16771 x = gen_rtx_IOR (SImode, x, val);
16772 emit_insn (gen_rtx_SET (val, x));
16773 mask = NULL;
16774 break;
16775
16776 case NOT:
16777 case PLUS:
16778 case MINUS:
16779 /* These will all affect bits outside the field and need
16780 adjustment via MASK within the loop. */
16781 break;
16782
16783 default:
16784 gcc_unreachable ();
16785 }
16786
16787 /* Prepare to adjust the return value. */
16788 before = gen_reg_rtx (SImode);
16789 if (after)
16790 after = gen_reg_rtx (SImode);
16791 store_mode = mode = SImode;
16792 }
16793 }
16794
16795 mem = rs6000_pre_atomic_barrier (mem, model);
16796
16797 label = gen_label_rtx ();
16798 emit_label (label);
16799 label = gen_rtx_LABEL_REF (VOIDmode, label);
16800
16801 if (before == NULL_RTX)
16802 before = gen_reg_rtx (mode);
16803
16804 emit_load_locked (mode, before, mem);
16805
16806 if (code == NOT)
16807 {
16808 x = expand_simple_binop (mode, AND, before, val,
16809 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16810 after = expand_simple_unop (mode, NOT, x, after, 1);
16811 }
16812 else
16813 {
16814 after = expand_simple_binop (mode, code, before, val,
16815 after, 1, OPTAB_LIB_WIDEN);
16816 }
16817
16818 x = after;
16819 if (mask)
16820 {
16821 x = expand_simple_binop (SImode, AND, after, mask,
16822 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16823 x = rs6000_mask_atomic_subword (before, x, mask);
16824 }
16825 else if (store_mode != mode)
16826 x = convert_modes (store_mode, mode, x, 1);
16827
16828 cond = gen_reg_rtx (CCmode);
16829 emit_store_conditional (store_mode, cond, mem, x);
16830
16831 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
16832 emit_unlikely_jump (x, label);
16833
16834 rs6000_post_atomic_barrier (model);
16835
16836 if (shift)
16837 {
16838 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
16839 then do the calcuations in a SImode register. */
16840 if (orig_before)
16841 rs6000_finish_atomic_subword (orig_before, before, shift);
16842 if (orig_after)
16843 rs6000_finish_atomic_subword (orig_after, after, shift);
16844 }
16845 else if (store_mode != mode)
16846 {
16847 /* QImode/HImode on machines with lbarx/lharx where we do the native
16848 operation and then do the calcuations in a SImode register. */
16849 if (orig_before)
16850 convert_move (orig_before, before, 1);
16851 if (orig_after)
16852 convert_move (orig_after, after, 1);
16853 }
16854 else if (orig_after && after != orig_after)
16855 emit_move_insn (orig_after, after);
16856 }
16857
16858 static GTY(()) alias_set_type TOC_alias_set = -1;
16859
16860 alias_set_type
16861 get_TOC_alias_set (void)
16862 {
16863 if (TOC_alias_set == -1)
16864 TOC_alias_set = new_alias_set ();
16865 return TOC_alias_set;
16866 }
16867
16868 /* The mode the ABI uses for a word. This is not the same as word_mode
16869 for -m32 -mpowerpc64. This is used to implement various target hooks. */
16870
16871 static scalar_int_mode
16872 rs6000_abi_word_mode (void)
16873 {
16874 return TARGET_32BIT ? SImode : DImode;
16875 }
16876
16877 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
16878 static char *
16879 rs6000_offload_options (void)
16880 {
16881 if (TARGET_64BIT)
16882 return xstrdup ("-foffload-abi=lp64");
16883 else
16884 return xstrdup ("-foffload-abi=ilp32");
16885 }
16886
16887 \f
16888 /* A quick summary of the various types of 'constant-pool tables'
16889 under PowerPC:
16890
16891 Target Flags Name One table per
16892 AIX (none) AIX TOC object file
16893 AIX -mfull-toc AIX TOC object file
16894 AIX -mminimal-toc AIX minimal TOC translation unit
16895 SVR4/EABI (none) SVR4 SDATA object file
16896 SVR4/EABI -fpic SVR4 pic object file
16897 SVR4/EABI -fPIC SVR4 PIC translation unit
16898 SVR4/EABI -mrelocatable EABI TOC function
16899 SVR4/EABI -maix AIX TOC object file
16900 SVR4/EABI -maix -mminimal-toc
16901 AIX minimal TOC translation unit
16902
16903 Name Reg. Set by entries contains:
16904 made by addrs? fp? sum?
16905
16906 AIX TOC 2 crt0 as Y option option
16907 AIX minimal TOC 30 prolog gcc Y Y option
16908 SVR4 SDATA 13 crt0 gcc N Y N
16909 SVR4 pic 30 prolog ld Y not yet N
16910 SVR4 PIC 30 prolog gcc Y option option
16911 EABI TOC 30 prolog gcc Y option option
16912
16913 */
16914
16915 /* Hash functions for the hash table. */
16916
16917 static unsigned
16918 rs6000_hash_constant (rtx k)
16919 {
16920 enum rtx_code code = GET_CODE (k);
16921 machine_mode mode = GET_MODE (k);
16922 unsigned result = (code << 3) ^ mode;
16923 const char *format;
16924 int flen, fidx;
16925
16926 format = GET_RTX_FORMAT (code);
16927 flen = strlen (format);
16928 fidx = 0;
16929
16930 switch (code)
16931 {
16932 case LABEL_REF:
16933 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
16934
16935 case CONST_WIDE_INT:
16936 {
16937 int i;
16938 flen = CONST_WIDE_INT_NUNITS (k);
16939 for (i = 0; i < flen; i++)
16940 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
16941 return result;
16942 }
16943
16944 case CONST_DOUBLE:
16945 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
16946
16947 case CODE_LABEL:
16948 fidx = 3;
16949 break;
16950
16951 default:
16952 break;
16953 }
16954
16955 for (; fidx < flen; fidx++)
16956 switch (format[fidx])
16957 {
16958 case 's':
16959 {
16960 unsigned i, len;
16961 const char *str = XSTR (k, fidx);
16962 len = strlen (str);
16963 result = result * 613 + len;
16964 for (i = 0; i < len; i++)
16965 result = result * 613 + (unsigned) str[i];
16966 break;
16967 }
16968 case 'u':
16969 case 'e':
16970 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
16971 break;
16972 case 'i':
16973 case 'n':
16974 result = result * 613 + (unsigned) XINT (k, fidx);
16975 break;
16976 case 'w':
16977 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
16978 result = result * 613 + (unsigned) XWINT (k, fidx);
16979 else
16980 {
16981 size_t i;
16982 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
16983 result = result * 613 + (unsigned) (XWINT (k, fidx)
16984 >> CHAR_BIT * i);
16985 }
16986 break;
16987 case '0':
16988 break;
16989 default:
16990 gcc_unreachable ();
16991 }
16992
16993 return result;
16994 }
16995
16996 hashval_t
16997 toc_hasher::hash (toc_hash_struct *thc)
16998 {
16999 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
17000 }
17001
17002 /* Compare H1 and H2 for equivalence. */
17003
17004 bool
17005 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
17006 {
17007 rtx r1 = h1->key;
17008 rtx r2 = h2->key;
17009
17010 if (h1->key_mode != h2->key_mode)
17011 return 0;
17012
17013 return rtx_equal_p (r1, r2);
17014 }
17015
17016 /* These are the names given by the C++ front-end to vtables, and
17017 vtable-like objects. Ideally, this logic should not be here;
17018 instead, there should be some programmatic way of inquiring as
17019 to whether or not an object is a vtable. */
17020
17021 #define VTABLE_NAME_P(NAME) \
17022 (startswith (name, "_vt.") \
17023 || startswith (name, "_ZTV") \
17024 || startswith (name, "_ZTT") \
17025 || startswith (name, "_ZTI") \
17026 || startswith (name, "_ZTC"))
17027
17028 #ifdef NO_DOLLAR_IN_LABEL
17029 /* Return a GGC-allocated character string translating dollar signs in
17030 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
17031
17032 const char *
17033 rs6000_xcoff_strip_dollar (const char *name)
17034 {
17035 char *strip, *p;
17036 const char *q;
17037 size_t len;
17038
17039 q = (const char *) strchr (name, '$');
17040
17041 if (q == 0 || q == name)
17042 return name;
17043
17044 len = strlen (name);
17045 strip = XALLOCAVEC (char, len + 1);
17046 strcpy (strip, name);
17047 p = strip + (q - name);
17048 while (p)
17049 {
17050 *p = '_';
17051 p = strchr (p + 1, '$');
17052 }
17053
17054 return ggc_alloc_string (strip, len);
17055 }
17056 #endif
17057
17058 void
17059 rs6000_output_symbol_ref (FILE *file, rtx x)
17060 {
17061 const char *name = XSTR (x, 0);
17062
17063 /* Currently C++ toc references to vtables can be emitted before it
17064 is decided whether the vtable is public or private. If this is
17065 the case, then the linker will eventually complain that there is
17066 a reference to an unknown section. Thus, for vtables only,
17067 we emit the TOC reference to reference the identifier and not the
17068 symbol. */
17069 if (VTABLE_NAME_P (name))
17070 {
17071 RS6000_OUTPUT_BASENAME (file, name);
17072 }
17073 else
17074 assemble_name (file, name);
17075 }
17076
17077 /* Output a TOC entry. We derive the entry name from what is being
17078 written. */
17079
17080 void
17081 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
17082 {
17083 char buf[256];
17084 const char *name = buf;
17085 rtx base = x;
17086 HOST_WIDE_INT offset = 0;
17087
17088 gcc_assert (!TARGET_NO_TOC_OR_PCREL);
17089
17090 /* When the linker won't eliminate them, don't output duplicate
17091 TOC entries (this happens on AIX if there is any kind of TOC,
17092 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
17093 CODE_LABELs. */
17094 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
17095 {
17096 struct toc_hash_struct *h;
17097
17098 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
17099 time because GGC is not initialized at that point. */
17100 if (toc_hash_table == NULL)
17101 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
17102
17103 h = ggc_alloc<toc_hash_struct> ();
17104 h->key = x;
17105 h->key_mode = mode;
17106 h->labelno = labelno;
17107
17108 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
17109 if (*found == NULL)
17110 *found = h;
17111 else /* This is indeed a duplicate.
17112 Set this label equal to that label. */
17113 {
17114 fputs ("\t.set ", file);
17115 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
17116 fprintf (file, "%d,", labelno);
17117 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
17118 fprintf (file, "%d\n", ((*found)->labelno));
17119
17120 #ifdef HAVE_AS_TLS
17121 if (TARGET_XCOFF && SYMBOL_REF_P (x)
17122 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
17123 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
17124 {
17125 fputs ("\t.set ", file);
17126 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
17127 fprintf (file, "%d,", labelno);
17128 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
17129 fprintf (file, "%d\n", ((*found)->labelno));
17130 }
17131 #endif
17132 return;
17133 }
17134 }
17135
17136 /* If we're going to put a double constant in the TOC, make sure it's
17137 aligned properly when strict alignment is on. */
17138 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
17139 && STRICT_ALIGNMENT
17140 && GET_MODE_BITSIZE (mode) >= 64
17141 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
17142 ASM_OUTPUT_ALIGN (file, 3);
17143 }
17144
17145 (*targetm.asm_out.internal_label) (file, "LC", labelno);
17146
17147 /* Handle FP constants specially. Note that if we have a minimal
17148 TOC, things we put here aren't actually in the TOC, so we can allow
17149 FP constants. */
17150 if (CONST_DOUBLE_P (x)
17151 && (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode
17152 || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode))
17153 {
17154 long k[4];
17155
17156 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
17157 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k);
17158 else
17159 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
17160
17161 if (TARGET_64BIT)
17162 {
17163 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17164 fputs (DOUBLE_INT_ASM_OP, file);
17165 else
17166 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
17167 k[0] & 0xffffffff, k[1] & 0xffffffff,
17168 k[2] & 0xffffffff, k[3] & 0xffffffff);
17169 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
17170 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
17171 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
17172 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
17173 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
17174 return;
17175 }
17176 else
17177 {
17178 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17179 fputs ("\t.long ", file);
17180 else
17181 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
17182 k[0] & 0xffffffff, k[1] & 0xffffffff,
17183 k[2] & 0xffffffff, k[3] & 0xffffffff);
17184 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
17185 k[0] & 0xffffffff, k[1] & 0xffffffff,
17186 k[2] & 0xffffffff, k[3] & 0xffffffff);
17187 return;
17188 }
17189 }
17190 else if (CONST_DOUBLE_P (x)
17191 && (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
17192 {
17193 long k[2];
17194
17195 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
17196 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k);
17197 else
17198 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
17199
17200 if (TARGET_64BIT)
17201 {
17202 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17203 fputs (DOUBLE_INT_ASM_OP, file);
17204 else
17205 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
17206 k[0] & 0xffffffff, k[1] & 0xffffffff);
17207 fprintf (file, "0x%lx%08lx\n",
17208 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
17209 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
17210 return;
17211 }
17212 else
17213 {
17214 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17215 fputs ("\t.long ", file);
17216 else
17217 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
17218 k[0] & 0xffffffff, k[1] & 0xffffffff);
17219 fprintf (file, "0x%lx,0x%lx\n",
17220 k[0] & 0xffffffff, k[1] & 0xffffffff);
17221 return;
17222 }
17223 }
17224 else if (CONST_DOUBLE_P (x)
17225 && (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
17226 {
17227 long l;
17228
17229 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
17230 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l);
17231 else
17232 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
17233
17234 if (TARGET_64BIT)
17235 {
17236 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17237 fputs (DOUBLE_INT_ASM_OP, file);
17238 else
17239 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
17240 if (WORDS_BIG_ENDIAN)
17241 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
17242 else
17243 fprintf (file, "0x%lx\n", l & 0xffffffff);
17244 return;
17245 }
17246 else
17247 {
17248 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17249 fputs ("\t.long ", file);
17250 else
17251 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
17252 fprintf (file, "0x%lx\n", l & 0xffffffff);
17253 return;
17254 }
17255 }
17256 else if (GET_MODE (x) == VOIDmode && CONST_INT_P (x))
17257 {
17258 unsigned HOST_WIDE_INT low;
17259 HOST_WIDE_INT high;
17260
17261 low = INTVAL (x) & 0xffffffff;
17262 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
17263
17264 /* TOC entries are always Pmode-sized, so when big-endian
17265 smaller integer constants in the TOC need to be padded.
17266 (This is still a win over putting the constants in
17267 a separate constant pool, because then we'd have
17268 to have both a TOC entry _and_ the actual constant.)
17269
17270 For a 32-bit target, CONST_INT values are loaded and shifted
17271 entirely within `low' and can be stored in one TOC entry. */
17272
17273 /* It would be easy to make this work, but it doesn't now. */
17274 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
17275
17276 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
17277 {
17278 low |= high << 32;
17279 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
17280 high = (HOST_WIDE_INT) low >> 32;
17281 low &= 0xffffffff;
17282 }
17283
17284 if (TARGET_64BIT)
17285 {
17286 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17287 fputs (DOUBLE_INT_ASM_OP, file);
17288 else
17289 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
17290 (long) high & 0xffffffff, (long) low & 0xffffffff);
17291 fprintf (file, "0x%lx%08lx\n",
17292 (long) high & 0xffffffff, (long) low & 0xffffffff);
17293 return;
17294 }
17295 else
17296 {
17297 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
17298 {
17299 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17300 fputs ("\t.long ", file);
17301 else
17302 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
17303 (long) high & 0xffffffff, (long) low & 0xffffffff);
17304 fprintf (file, "0x%lx,0x%lx\n",
17305 (long) high & 0xffffffff, (long) low & 0xffffffff);
17306 }
17307 else
17308 {
17309 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17310 fputs ("\t.long ", file);
17311 else
17312 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
17313 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
17314 }
17315 return;
17316 }
17317 }
17318
17319 if (GET_CODE (x) == CONST)
17320 {
17321 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
17322 && CONST_INT_P (XEXP (XEXP (x, 0), 1)));
17323
17324 base = XEXP (XEXP (x, 0), 0);
17325 offset = INTVAL (XEXP (XEXP (x, 0), 1));
17326 }
17327
17328 switch (GET_CODE (base))
17329 {
17330 case SYMBOL_REF:
17331 name = XSTR (base, 0);
17332 break;
17333
17334 case LABEL_REF:
17335 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
17336 CODE_LABEL_NUMBER (XEXP (base, 0)));
17337 break;
17338
17339 case CODE_LABEL:
17340 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
17341 break;
17342
17343 default:
17344 gcc_unreachable ();
17345 }
17346
17347 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17348 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
17349 else
17350 {
17351 fputs ("\t.tc ", file);
17352 RS6000_OUTPUT_BASENAME (file, name);
17353
17354 if (offset < 0)
17355 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
17356 else if (offset)
17357 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
17358
17359 /* Mark large TOC symbols on AIX with [TE] so they are mapped
17360 after other TOC symbols, reducing overflow of small TOC access
17361 to [TC] symbols. */
17362 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
17363 ? "[TE]," : "[TC],", file);
17364 }
17365
17366 /* Currently C++ toc references to vtables can be emitted before it
17367 is decided whether the vtable is public or private. If this is
17368 the case, then the linker will eventually complain that there is
17369 a TOC reference to an unknown section. Thus, for vtables only,
17370 we emit the TOC reference to reference the symbol and not the
17371 section. */
17372 if (VTABLE_NAME_P (name))
17373 {
17374 RS6000_OUTPUT_BASENAME (file, name);
17375 if (offset < 0)
17376 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
17377 else if (offset > 0)
17378 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
17379 }
17380 else
17381 output_addr_const (file, x);
17382
17383 #if HAVE_AS_TLS
17384 if (TARGET_XCOFF && SYMBOL_REF_P (base))
17385 {
17386 switch (SYMBOL_REF_TLS_MODEL (base))
17387 {
17388 case 0:
17389 break;
17390 case TLS_MODEL_LOCAL_EXEC:
17391 fputs ("@le", file);
17392 break;
17393 case TLS_MODEL_INITIAL_EXEC:
17394 fputs ("@ie", file);
17395 break;
17396 /* Use global-dynamic for local-dynamic. */
17397 case TLS_MODEL_GLOBAL_DYNAMIC:
17398 case TLS_MODEL_LOCAL_DYNAMIC:
17399 putc ('\n', file);
17400 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
17401 fputs ("\t.tc .", file);
17402 RS6000_OUTPUT_BASENAME (file, name);
17403 fputs ("[TC],", file);
17404 output_addr_const (file, x);
17405 fputs ("@m", file);
17406 break;
17407 default:
17408 gcc_unreachable ();
17409 }
17410 }
17411 #endif
17412
17413 putc ('\n', file);
17414 }
17415 \f
17416 /* Output an assembler pseudo-op to write an ASCII string of N characters
17417 starting at P to FILE.
17418
17419 On the RS/6000, we have to do this using the .byte operation and
17420 write out special characters outside the quoted string.
17421 Also, the assembler is broken; very long strings are truncated,
17422 so we must artificially break them up early. */
17423
17424 void
17425 output_ascii (FILE *file, const char *p, int n)
17426 {
17427 char c;
17428 int i, count_string;
17429 const char *for_string = "\t.byte \"";
17430 const char *for_decimal = "\t.byte ";
17431 const char *to_close = NULL;
17432
17433 count_string = 0;
17434 for (i = 0; i < n; i++)
17435 {
17436 c = *p++;
17437 if (c >= ' ' && c < 0177)
17438 {
17439 if (for_string)
17440 fputs (for_string, file);
17441 putc (c, file);
17442
17443 /* Write two quotes to get one. */
17444 if (c == '"')
17445 {
17446 putc (c, file);
17447 ++count_string;
17448 }
17449
17450 for_string = NULL;
17451 for_decimal = "\"\n\t.byte ";
17452 to_close = "\"\n";
17453 ++count_string;
17454
17455 if (count_string >= 512)
17456 {
17457 fputs (to_close, file);
17458
17459 for_string = "\t.byte \"";
17460 for_decimal = "\t.byte ";
17461 to_close = NULL;
17462 count_string = 0;
17463 }
17464 }
17465 else
17466 {
17467 if (for_decimal)
17468 fputs (for_decimal, file);
17469 fprintf (file, "%d", c);
17470
17471 for_string = "\n\t.byte \"";
17472 for_decimal = ", ";
17473 to_close = "\n";
17474 count_string = 0;
17475 }
17476 }
17477
17478 /* Now close the string if we have written one. Then end the line. */
17479 if (to_close)
17480 fputs (to_close, file);
17481 }
17482 \f
17483 /* Generate a unique section name for FILENAME for a section type
17484 represented by SECTION_DESC. Output goes into BUF.
17485
17486 SECTION_DESC can be any string, as long as it is different for each
17487 possible section type.
17488
17489 We name the section in the same manner as xlc. The name begins with an
17490 underscore followed by the filename (after stripping any leading directory
17491 names) with the last period replaced by the string SECTION_DESC. If
17492 FILENAME does not contain a period, SECTION_DESC is appended to the end of
17493 the name. */
17494
17495 void
17496 rs6000_gen_section_name (char **buf, const char *filename,
17497 const char *section_desc)
17498 {
17499 const char *q, *after_last_slash, *last_period = 0;
17500 char *p;
17501 int len;
17502
17503 after_last_slash = filename;
17504 for (q = filename; *q; q++)
17505 {
17506 if (*q == '/')
17507 after_last_slash = q + 1;
17508 else if (*q == '.')
17509 last_period = q;
17510 }
17511
17512 len = strlen (after_last_slash) + strlen (section_desc) + 2;
17513 *buf = (char *) xmalloc (len);
17514
17515 p = *buf;
17516 *p++ = '_';
17517
17518 for (q = after_last_slash; *q; q++)
17519 {
17520 if (q == last_period)
17521 {
17522 strcpy (p, section_desc);
17523 p += strlen (section_desc);
17524 break;
17525 }
17526
17527 else if (ISALNUM (*q))
17528 *p++ = *q;
17529 }
17530
17531 if (last_period == 0)
17532 strcpy (p, section_desc);
17533 else
17534 *p = '\0';
17535 }
17536 \f
17537 /* Emit profile function. */
17538
17539 void
17540 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
17541 {
17542 /* Non-standard profiling for kernels, which just saves LR then calls
17543 _mcount without worrying about arg saves. The idea is to change
17544 the function prologue as little as possible as it isn't easy to
17545 account for arg save/restore code added just for _mcount. */
17546 if (TARGET_PROFILE_KERNEL)
17547 return;
17548
17549 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
17550 {
17551 #ifndef NO_PROFILE_COUNTERS
17552 # define NO_PROFILE_COUNTERS 0
17553 #endif
17554 if (NO_PROFILE_COUNTERS)
17555 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
17556 LCT_NORMAL, VOIDmode);
17557 else
17558 {
17559 char buf[30];
17560 const char *label_name;
17561 rtx fun;
17562
17563 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
17564 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
17565 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
17566
17567 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
17568 LCT_NORMAL, VOIDmode, fun, Pmode);
17569 }
17570 }
17571 else if (DEFAULT_ABI == ABI_DARWIN)
17572 {
17573 const char *mcount_name = RS6000_MCOUNT;
17574 int caller_addr_regno = LR_REGNO;
17575
17576 /* Be conservative and always set this, at least for now. */
17577 crtl->uses_pic_offset_table = 1;
17578
17579 #if TARGET_MACHO
17580 /* For PIC code, set up a stub and collect the caller's address
17581 from r0, which is where the prologue puts it. */
17582 if (MACHOPIC_INDIRECT
17583 && crtl->uses_pic_offset_table)
17584 caller_addr_regno = 0;
17585 #endif
17586 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
17587 LCT_NORMAL, VOIDmode,
17588 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
17589 }
17590 }
17591
17592 /* Write function profiler code. */
17593
17594 void
17595 output_function_profiler (FILE *file, int labelno)
17596 {
17597 char buf[100];
17598
17599 switch (DEFAULT_ABI)
17600 {
17601 default:
17602 gcc_unreachable ();
17603
17604 case ABI_V4:
17605 if (!TARGET_32BIT)
17606 {
17607 warning (0, "no profiling of 64-bit code for this ABI");
17608 return;
17609 }
17610 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
17611 fprintf (file, "\tmflr %s\n", reg_names[0]);
17612 if (NO_PROFILE_COUNTERS)
17613 {
17614 asm_fprintf (file, "\tstw %s,4(%s)\n",
17615 reg_names[0], reg_names[1]);
17616 }
17617 else if (TARGET_SECURE_PLT && flag_pic)
17618 {
17619 if (TARGET_LINK_STACK)
17620 {
17621 char name[32];
17622 get_ppc476_thunk_name (name);
17623 asm_fprintf (file, "\tbl %s\n", name);
17624 }
17625 else
17626 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
17627 asm_fprintf (file, "\tstw %s,4(%s)\n",
17628 reg_names[0], reg_names[1]);
17629 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
17630 asm_fprintf (file, "\taddis %s,%s,",
17631 reg_names[12], reg_names[12]);
17632 assemble_name (file, buf);
17633 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
17634 assemble_name (file, buf);
17635 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
17636 }
17637 else if (flag_pic == 1)
17638 {
17639 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
17640 asm_fprintf (file, "\tstw %s,4(%s)\n",
17641 reg_names[0], reg_names[1]);
17642 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
17643 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
17644 assemble_name (file, buf);
17645 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
17646 }
17647 else if (flag_pic > 1)
17648 {
17649 asm_fprintf (file, "\tstw %s,4(%s)\n",
17650 reg_names[0], reg_names[1]);
17651 /* Now, we need to get the address of the label. */
17652 if (TARGET_LINK_STACK)
17653 {
17654 char name[32];
17655 get_ppc476_thunk_name (name);
17656 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
17657 assemble_name (file, buf);
17658 fputs ("-.\n1:", file);
17659 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
17660 asm_fprintf (file, "\taddi %s,%s,4\n",
17661 reg_names[11], reg_names[11]);
17662 }
17663 else
17664 {
17665 fputs ("\tbcl 20,31,1f\n\t.long ", file);
17666 assemble_name (file, buf);
17667 fputs ("-.\n1:", file);
17668 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
17669 }
17670 asm_fprintf (file, "\tlwz %s,0(%s)\n",
17671 reg_names[0], reg_names[11]);
17672 asm_fprintf (file, "\tadd %s,%s,%s\n",
17673 reg_names[0], reg_names[0], reg_names[11]);
17674 }
17675 else
17676 {
17677 asm_fprintf (file, "\tlis %s,", reg_names[12]);
17678 assemble_name (file, buf);
17679 fputs ("@ha\n", file);
17680 asm_fprintf (file, "\tstw %s,4(%s)\n",
17681 reg_names[0], reg_names[1]);
17682 asm_fprintf (file, "\tla %s,", reg_names[0]);
17683 assemble_name (file, buf);
17684 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
17685 }
17686
17687 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
17688 fprintf (file, "\tbl %s%s\n",
17689 RS6000_MCOUNT, flag_pic ? "@plt" : "");
17690 break;
17691
17692 case ABI_AIX:
17693 case ABI_ELFv2:
17694 case ABI_DARWIN:
17695 /* Don't do anything, done in output_profile_hook (). */
17696 break;
17697 }
17698 }
17699
17700 \f
17701
17702 /* The following variable value is the last issued insn. */
17703
17704 static rtx_insn *last_scheduled_insn;
17705
17706 /* The following variable helps to balance issuing of load and
17707 store instructions */
17708
17709 static int load_store_pendulum;
17710
17711 /* The following variable helps pair divide insns during scheduling. */
17712 static int divide_cnt;
17713 /* The following variable helps pair and alternate vector and vector load
17714 insns during scheduling. */
17715 static int vec_pairing;
17716
17717
17718 /* Power4 load update and store update instructions are cracked into a
17719 load or store and an integer insn which are executed in the same cycle.
17720 Branches have their own dispatch slot which does not count against the
17721 GCC issue rate, but it changes the program flow so there are no other
17722 instructions to issue in this cycle. */
17723
17724 static int
17725 rs6000_variable_issue_1 (rtx_insn *insn, int more)
17726 {
17727 last_scheduled_insn = insn;
17728 if (GET_CODE (PATTERN (insn)) == USE
17729 || GET_CODE (PATTERN (insn)) == CLOBBER)
17730 {
17731 cached_can_issue_more = more;
17732 return cached_can_issue_more;
17733 }
17734
17735 if (insn_terminates_group_p (insn, current_group))
17736 {
17737 cached_can_issue_more = 0;
17738 return cached_can_issue_more;
17739 }
17740
17741 /* If no reservation, but reach here */
17742 if (recog_memoized (insn) < 0)
17743 return more;
17744
17745 if (rs6000_sched_groups)
17746 {
17747 if (is_microcoded_insn (insn))
17748 cached_can_issue_more = 0;
17749 else if (is_cracked_insn (insn))
17750 cached_can_issue_more = more > 2 ? more - 2 : 0;
17751 else
17752 cached_can_issue_more = more - 1;
17753
17754 return cached_can_issue_more;
17755 }
17756
17757 if (rs6000_tune == PROCESSOR_CELL && is_nonpipeline_insn (insn))
17758 return 0;
17759
17760 cached_can_issue_more = more - 1;
17761 return cached_can_issue_more;
17762 }
17763
17764 static int
17765 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
17766 {
17767 int r = rs6000_variable_issue_1 (insn, more);
17768 if (verbose)
17769 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
17770 return r;
17771 }
17772
17773 /* Adjust the cost of a scheduling dependency. Return the new cost of
17774 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
17775
17776 static int
17777 rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
17778 unsigned int)
17779 {
17780 enum attr_type attr_type;
17781
17782 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
17783 return cost;
17784
17785 switch (dep_type)
17786 {
17787 case REG_DEP_TRUE:
17788 {
17789 /* Data dependency; DEP_INSN writes a register that INSN reads
17790 some cycles later. */
17791
17792 /* Separate a load from a narrower, dependent store. */
17793 if ((rs6000_sched_groups || rs6000_tune == PROCESSOR_POWER9
17794 || rs6000_tune == PROCESSOR_POWER10)
17795 && GET_CODE (PATTERN (insn)) == SET
17796 && GET_CODE (PATTERN (dep_insn)) == SET
17797 && MEM_P (XEXP (PATTERN (insn), 1))
17798 && MEM_P (XEXP (PATTERN (dep_insn), 0))
17799 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
17800 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
17801 return cost + 14;
17802
17803 attr_type = get_attr_type (insn);
17804
17805 switch (attr_type)
17806 {
17807 case TYPE_JMPREG:
17808 /* Tell the first scheduling pass about the latency between
17809 a mtctr and bctr (and mtlr and br/blr). The first
17810 scheduling pass will not know about this latency since
17811 the mtctr instruction, which has the latency associated
17812 to it, will be generated by reload. */
17813 return 4;
17814 case TYPE_BRANCH:
17815 /* Leave some extra cycles between a compare and its
17816 dependent branch, to inhibit expensive mispredicts. */
17817 if ((rs6000_tune == PROCESSOR_PPC603
17818 || rs6000_tune == PROCESSOR_PPC604
17819 || rs6000_tune == PROCESSOR_PPC604e
17820 || rs6000_tune == PROCESSOR_PPC620
17821 || rs6000_tune == PROCESSOR_PPC630
17822 || rs6000_tune == PROCESSOR_PPC750
17823 || rs6000_tune == PROCESSOR_PPC7400
17824 || rs6000_tune == PROCESSOR_PPC7450
17825 || rs6000_tune == PROCESSOR_PPCE5500
17826 || rs6000_tune == PROCESSOR_PPCE6500
17827 || rs6000_tune == PROCESSOR_POWER4
17828 || rs6000_tune == PROCESSOR_POWER5
17829 || rs6000_tune == PROCESSOR_POWER7
17830 || rs6000_tune == PROCESSOR_POWER8
17831 || rs6000_tune == PROCESSOR_POWER9
17832 || rs6000_tune == PROCESSOR_POWER10
17833 || rs6000_tune == PROCESSOR_CELL)
17834 && recog_memoized (dep_insn)
17835 && (INSN_CODE (dep_insn) >= 0))
17836
17837 switch (get_attr_type (dep_insn))
17838 {
17839 case TYPE_CMP:
17840 case TYPE_FPCOMPARE:
17841 case TYPE_CR_LOGICAL:
17842 return cost + 2;
17843 case TYPE_EXTS:
17844 case TYPE_MUL:
17845 if (get_attr_dot (dep_insn) == DOT_YES)
17846 return cost + 2;
17847 else
17848 break;
17849 case TYPE_SHIFT:
17850 if (get_attr_dot (dep_insn) == DOT_YES
17851 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
17852 return cost + 2;
17853 else
17854 break;
17855 default:
17856 break;
17857 }
17858 break;
17859
17860 case TYPE_STORE:
17861 case TYPE_FPSTORE:
17862 if ((rs6000_tune == PROCESSOR_POWER6)
17863 && recog_memoized (dep_insn)
17864 && (INSN_CODE (dep_insn) >= 0))
17865 {
17866
17867 if (GET_CODE (PATTERN (insn)) != SET)
17868 /* If this happens, we have to extend this to schedule
17869 optimally. Return default for now. */
17870 return cost;
17871
17872 /* Adjust the cost for the case where the value written
17873 by a fixed point operation is used as the address
17874 gen value on a store. */
17875 switch (get_attr_type (dep_insn))
17876 {
17877 case TYPE_LOAD:
17878 case TYPE_CNTLZ:
17879 {
17880 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17881 return get_attr_sign_extend (dep_insn)
17882 == SIGN_EXTEND_YES ? 6 : 4;
17883 break;
17884 }
17885 case TYPE_SHIFT:
17886 {
17887 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17888 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
17889 6 : 3;
17890 break;
17891 }
17892 case TYPE_INTEGER:
17893 case TYPE_ADD:
17894 case TYPE_LOGICAL:
17895 case TYPE_EXTS:
17896 case TYPE_INSERT:
17897 {
17898 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17899 return 3;
17900 break;
17901 }
17902 case TYPE_STORE:
17903 case TYPE_FPLOAD:
17904 case TYPE_FPSTORE:
17905 {
17906 if (get_attr_update (dep_insn) == UPDATE_YES
17907 && ! rs6000_store_data_bypass_p (dep_insn, insn))
17908 return 3;
17909 break;
17910 }
17911 case TYPE_MUL:
17912 {
17913 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17914 return 17;
17915 break;
17916 }
17917 case TYPE_DIV:
17918 {
17919 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17920 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
17921 break;
17922 }
17923 default:
17924 break;
17925 }
17926 }
17927 break;
17928
17929 case TYPE_LOAD:
17930 if ((rs6000_tune == PROCESSOR_POWER6)
17931 && recog_memoized (dep_insn)
17932 && (INSN_CODE (dep_insn) >= 0))
17933 {
17934
17935 /* Adjust the cost for the case where the value written
17936 by a fixed point instruction is used within the address
17937 gen portion of a subsequent load(u)(x) */
17938 switch (get_attr_type (dep_insn))
17939 {
17940 case TYPE_LOAD:
17941 case TYPE_CNTLZ:
17942 {
17943 if (set_to_load_agen (dep_insn, insn))
17944 return get_attr_sign_extend (dep_insn)
17945 == SIGN_EXTEND_YES ? 6 : 4;
17946 break;
17947 }
17948 case TYPE_SHIFT:
17949 {
17950 if (set_to_load_agen (dep_insn, insn))
17951 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
17952 6 : 3;
17953 break;
17954 }
17955 case TYPE_INTEGER:
17956 case TYPE_ADD:
17957 case TYPE_LOGICAL:
17958 case TYPE_EXTS:
17959 case TYPE_INSERT:
17960 {
17961 if (set_to_load_agen (dep_insn, insn))
17962 return 3;
17963 break;
17964 }
17965 case TYPE_STORE:
17966 case TYPE_FPLOAD:
17967 case TYPE_FPSTORE:
17968 {
17969 if (get_attr_update (dep_insn) == UPDATE_YES
17970 && set_to_load_agen (dep_insn, insn))
17971 return 3;
17972 break;
17973 }
17974 case TYPE_MUL:
17975 {
17976 if (set_to_load_agen (dep_insn, insn))
17977 return 17;
17978 break;
17979 }
17980 case TYPE_DIV:
17981 {
17982 if (set_to_load_agen (dep_insn, insn))
17983 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
17984 break;
17985 }
17986 default:
17987 break;
17988 }
17989 }
17990 break;
17991
17992 default:
17993 break;
17994 }
17995
17996 /* Fall out to return default cost. */
17997 }
17998 break;
17999
18000 case REG_DEP_OUTPUT:
18001 /* Output dependency; DEP_INSN writes a register that INSN writes some
18002 cycles later. */
18003 if ((rs6000_tune == PROCESSOR_POWER6)
18004 && recog_memoized (dep_insn)
18005 && (INSN_CODE (dep_insn) >= 0))
18006 {
18007 attr_type = get_attr_type (insn);
18008
18009 switch (attr_type)
18010 {
18011 case TYPE_FP:
18012 case TYPE_FPSIMPLE:
18013 if (get_attr_type (dep_insn) == TYPE_FP
18014 || get_attr_type (dep_insn) == TYPE_FPSIMPLE)
18015 return 1;
18016 break;
18017 default:
18018 break;
18019 }
18020 }
18021 /* Fall through, no cost for output dependency. */
18022 /* FALLTHRU */
18023
18024 case REG_DEP_ANTI:
18025 /* Anti dependency; DEP_INSN reads a register that INSN writes some
18026 cycles later. */
18027 return 0;
18028
18029 default:
18030 gcc_unreachable ();
18031 }
18032
18033 return cost;
18034 }
18035
18036 /* Debug version of rs6000_adjust_cost. */
18037
18038 static int
18039 rs6000_debug_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
18040 int cost, unsigned int dw)
18041 {
18042 int ret = rs6000_adjust_cost (insn, dep_type, dep_insn, cost, dw);
18043
18044 if (ret != cost)
18045 {
18046 const char *dep;
18047
18048 switch (dep_type)
18049 {
18050 default: dep = "unknown depencency"; break;
18051 case REG_DEP_TRUE: dep = "data dependency"; break;
18052 case REG_DEP_OUTPUT: dep = "output dependency"; break;
18053 case REG_DEP_ANTI: dep = "anti depencency"; break;
18054 }
18055
18056 fprintf (stderr,
18057 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
18058 "%s, insn:\n", ret, cost, dep);
18059
18060 debug_rtx (insn);
18061 }
18062
18063 return ret;
18064 }
18065
18066 /* The function returns a true if INSN is microcoded.
18067 Return false otherwise. */
18068
18069 static bool
18070 is_microcoded_insn (rtx_insn *insn)
18071 {
18072 if (!insn || !NONDEBUG_INSN_P (insn)
18073 || GET_CODE (PATTERN (insn)) == USE
18074 || GET_CODE (PATTERN (insn)) == CLOBBER)
18075 return false;
18076
18077 if (rs6000_tune == PROCESSOR_CELL)
18078 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
18079
18080 if (rs6000_sched_groups
18081 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
18082 {
18083 enum attr_type type = get_attr_type (insn);
18084 if ((type == TYPE_LOAD
18085 && get_attr_update (insn) == UPDATE_YES
18086 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
18087 || ((type == TYPE_LOAD || type == TYPE_STORE)
18088 && get_attr_update (insn) == UPDATE_YES
18089 && get_attr_indexed (insn) == INDEXED_YES)
18090 || type == TYPE_MFCR)
18091 return true;
18092 }
18093
18094 return false;
18095 }
18096
18097 /* The function returns true if INSN is cracked into 2 instructions
18098 by the processor (and therefore occupies 2 issue slots). */
18099
18100 static bool
18101 is_cracked_insn (rtx_insn *insn)
18102 {
18103 if (!insn || !NONDEBUG_INSN_P (insn)
18104 || GET_CODE (PATTERN (insn)) == USE
18105 || GET_CODE (PATTERN (insn)) == CLOBBER)
18106 return false;
18107
18108 if (rs6000_sched_groups
18109 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
18110 {
18111 enum attr_type type = get_attr_type (insn);
18112 if ((type == TYPE_LOAD
18113 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
18114 && get_attr_update (insn) == UPDATE_NO)
18115 || (type == TYPE_LOAD
18116 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
18117 && get_attr_update (insn) == UPDATE_YES
18118 && get_attr_indexed (insn) == INDEXED_NO)
18119 || (type == TYPE_STORE
18120 && get_attr_update (insn) == UPDATE_YES
18121 && get_attr_indexed (insn) == INDEXED_NO)
18122 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
18123 && get_attr_update (insn) == UPDATE_YES)
18124 || (type == TYPE_CR_LOGICAL
18125 && get_attr_cr_logical_3op (insn) == CR_LOGICAL_3OP_YES)
18126 || (type == TYPE_EXTS
18127 && get_attr_dot (insn) == DOT_YES)
18128 || (type == TYPE_SHIFT
18129 && get_attr_dot (insn) == DOT_YES
18130 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
18131 || (type == TYPE_MUL
18132 && get_attr_dot (insn) == DOT_YES)
18133 || type == TYPE_DIV
18134 || (type == TYPE_INSERT
18135 && get_attr_size (insn) == SIZE_32))
18136 return true;
18137 }
18138
18139 return false;
18140 }
18141
18142 /* The function returns true if INSN can be issued only from
18143 the branch slot. */
18144
18145 static bool
18146 is_branch_slot_insn (rtx_insn *insn)
18147 {
18148 if (!insn || !NONDEBUG_INSN_P (insn)
18149 || GET_CODE (PATTERN (insn)) == USE
18150 || GET_CODE (PATTERN (insn)) == CLOBBER)
18151 return false;
18152
18153 if (rs6000_sched_groups)
18154 {
18155 enum attr_type type = get_attr_type (insn);
18156 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
18157 return true;
18158 return false;
18159 }
18160
18161 return false;
18162 }
18163
18164 /* The function returns true if out_inst sets a value that is
18165 used in the address generation computation of in_insn */
18166 static bool
18167 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
18168 {
18169 rtx out_set, in_set;
18170
18171 /* For performance reasons, only handle the simple case where
18172 both loads are a single_set. */
18173 out_set = single_set (out_insn);
18174 if (out_set)
18175 {
18176 in_set = single_set (in_insn);
18177 if (in_set)
18178 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
18179 }
18180
18181 return false;
18182 }
18183
18184 /* Try to determine base/offset/size parts of the given MEM.
18185 Return true if successful, false if all the values couldn't
18186 be determined.
18187
18188 This function only looks for REG or REG+CONST address forms.
18189 REG+REG address form will return false. */
18190
18191 static bool
18192 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
18193 HOST_WIDE_INT *size)
18194 {
18195 rtx addr_rtx;
18196 if MEM_SIZE_KNOWN_P (mem)
18197 *size = MEM_SIZE (mem);
18198 else
18199 return false;
18200
18201 addr_rtx = (XEXP (mem, 0));
18202 if (GET_CODE (addr_rtx) == PRE_MODIFY)
18203 addr_rtx = XEXP (addr_rtx, 1);
18204
18205 *offset = 0;
18206 while (GET_CODE (addr_rtx) == PLUS
18207 && CONST_INT_P (XEXP (addr_rtx, 1)))
18208 {
18209 *offset += INTVAL (XEXP (addr_rtx, 1));
18210 addr_rtx = XEXP (addr_rtx, 0);
18211 }
18212 if (!REG_P (addr_rtx))
18213 return false;
18214
18215 *base = addr_rtx;
18216 return true;
18217 }
18218
18219 /* If the target storage locations of arguments MEM1 and MEM2 are
18220 adjacent, then return the argument that has the lower address.
18221 Otherwise, return NULL_RTX. */
18222
18223 static rtx
18224 adjacent_mem_locations (rtx mem1, rtx mem2)
18225 {
18226 rtx reg1, reg2;
18227 HOST_WIDE_INT off1, size1, off2, size2;
18228
18229 if (MEM_P (mem1)
18230 && MEM_P (mem2)
18231 && get_memref_parts (mem1, &reg1, &off1, &size1)
18232 && get_memref_parts (mem2, &reg2, &off2, &size2)
18233 && REGNO (reg1) == REGNO (reg2))
18234 {
18235 if (off1 + size1 == off2)
18236 return mem1;
18237 else if (off2 + size2 == off1)
18238 return mem2;
18239 }
18240
18241 return NULL_RTX;
18242 }
18243
18244 /* This function returns true if it can be determined that the two MEM
18245 locations overlap by at least 1 byte based on base reg/offset/size. */
18246
18247 static bool
18248 mem_locations_overlap (rtx mem1, rtx mem2)
18249 {
18250 rtx reg1, reg2;
18251 HOST_WIDE_INT off1, size1, off2, size2;
18252
18253 if (get_memref_parts (mem1, &reg1, &off1, &size1)
18254 && get_memref_parts (mem2, &reg2, &off2, &size2))
18255 return ((REGNO (reg1) == REGNO (reg2))
18256 && (((off1 <= off2) && (off1 + size1 > off2))
18257 || ((off2 <= off1) && (off2 + size2 > off1))));
18258
18259 return false;
18260 }
18261
18262 /* A C statement (sans semicolon) to update the integer scheduling
18263 priority INSN_PRIORITY (INSN). Increase the priority to execute the
18264 INSN earlier, reduce the priority to execute INSN later. Do not
18265 define this macro if you do not need to adjust the scheduling
18266 priorities of insns. */
18267
18268 static int
18269 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
18270 {
18271 rtx load_mem, str_mem;
18272 /* On machines (like the 750) which have asymmetric integer units,
18273 where one integer unit can do multiply and divides and the other
18274 can't, reduce the priority of multiply/divide so it is scheduled
18275 before other integer operations. */
18276
18277 #if 0
18278 if (! INSN_P (insn))
18279 return priority;
18280
18281 if (GET_CODE (PATTERN (insn)) == USE)
18282 return priority;
18283
18284 switch (rs6000_tune) {
18285 case PROCESSOR_PPC750:
18286 switch (get_attr_type (insn))
18287 {
18288 default:
18289 break;
18290
18291 case TYPE_MUL:
18292 case TYPE_DIV:
18293 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
18294 priority, priority);
18295 if (priority >= 0 && priority < 0x01000000)
18296 priority >>= 3;
18297 break;
18298 }
18299 }
18300 #endif
18301
18302 if (insn_must_be_first_in_group (insn)
18303 && reload_completed
18304 && current_sched_info->sched_max_insns_priority
18305 && rs6000_sched_restricted_insns_priority)
18306 {
18307
18308 /* Prioritize insns that can be dispatched only in the first
18309 dispatch slot. */
18310 if (rs6000_sched_restricted_insns_priority == 1)
18311 /* Attach highest priority to insn. This means that in
18312 haifa-sched.cc:ready_sort(), dispatch-slot restriction considerations
18313 precede 'priority' (critical path) considerations. */
18314 return current_sched_info->sched_max_insns_priority;
18315 else if (rs6000_sched_restricted_insns_priority == 2)
18316 /* Increase priority of insn by a minimal amount. This means that in
18317 haifa-sched.cc:ready_sort(), only 'priority' (critical path)
18318 considerations precede dispatch-slot restriction considerations. */
18319 return (priority + 1);
18320 }
18321
18322 if (rs6000_tune == PROCESSOR_POWER6
18323 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
18324 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
18325 /* Attach highest priority to insn if the scheduler has just issued two
18326 stores and this instruction is a load, or two loads and this instruction
18327 is a store. Power6 wants loads and stores scheduled alternately
18328 when possible */
18329 return current_sched_info->sched_max_insns_priority;
18330
18331 return priority;
18332 }
18333
18334 /* Return true if the instruction is nonpipelined on the Cell. */
18335 static bool
18336 is_nonpipeline_insn (rtx_insn *insn)
18337 {
18338 enum attr_type type;
18339 if (!insn || !NONDEBUG_INSN_P (insn)
18340 || GET_CODE (PATTERN (insn)) == USE
18341 || GET_CODE (PATTERN (insn)) == CLOBBER)
18342 return false;
18343
18344 type = get_attr_type (insn);
18345 if (type == TYPE_MUL
18346 || type == TYPE_DIV
18347 || type == TYPE_SDIV
18348 || type == TYPE_DDIV
18349 || type == TYPE_SSQRT
18350 || type == TYPE_DSQRT
18351 || type == TYPE_MFCR
18352 || type == TYPE_MFCRF
18353 || type == TYPE_MFJMPR)
18354 {
18355 return true;
18356 }
18357 return false;
18358 }
18359
18360
18361 /* Return how many instructions the machine can issue per cycle. */
18362
18363 static int
18364 rs6000_issue_rate (void)
18365 {
18366 /* Unless scheduling for register pressure, use issue rate of 1 for
18367 first scheduling pass to decrease degradation. */
18368 if (!reload_completed && !flag_sched_pressure)
18369 return 1;
18370
18371 switch (rs6000_tune) {
18372 case PROCESSOR_RS64A:
18373 case PROCESSOR_PPC601: /* ? */
18374 case PROCESSOR_PPC7450:
18375 return 3;
18376 case PROCESSOR_PPC440:
18377 case PROCESSOR_PPC603:
18378 case PROCESSOR_PPC750:
18379 case PROCESSOR_PPC7400:
18380 case PROCESSOR_PPC8540:
18381 case PROCESSOR_PPC8548:
18382 case PROCESSOR_CELL:
18383 case PROCESSOR_PPCE300C2:
18384 case PROCESSOR_PPCE300C3:
18385 case PROCESSOR_PPCE500MC:
18386 case PROCESSOR_PPCE500MC64:
18387 case PROCESSOR_PPCE5500:
18388 case PROCESSOR_PPCE6500:
18389 case PROCESSOR_TITAN:
18390 return 2;
18391 case PROCESSOR_PPC476:
18392 case PROCESSOR_PPC604:
18393 case PROCESSOR_PPC604e:
18394 case PROCESSOR_PPC620:
18395 case PROCESSOR_PPC630:
18396 return 4;
18397 case PROCESSOR_POWER4:
18398 case PROCESSOR_POWER5:
18399 case PROCESSOR_POWER6:
18400 case PROCESSOR_POWER7:
18401 return 5;
18402 case PROCESSOR_POWER8:
18403 return 7;
18404 case PROCESSOR_POWER9:
18405 return 6;
18406 case PROCESSOR_POWER10:
18407 return 8;
18408 default:
18409 return 1;
18410 }
18411 }
18412
18413 /* Return how many instructions to look ahead for better insn
18414 scheduling. */
18415
18416 static int
18417 rs6000_use_sched_lookahead (void)
18418 {
18419 switch (rs6000_tune)
18420 {
18421 case PROCESSOR_PPC8540:
18422 case PROCESSOR_PPC8548:
18423 return 4;
18424
18425 case PROCESSOR_CELL:
18426 return (reload_completed ? 8 : 0);
18427
18428 default:
18429 return 0;
18430 }
18431 }
18432
18433 /* We are choosing insn from the ready queue. Return zero if INSN can be
18434 chosen. */
18435 static int
18436 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
18437 {
18438 if (ready_index == 0)
18439 return 0;
18440
18441 if (rs6000_tune != PROCESSOR_CELL)
18442 return 0;
18443
18444 gcc_assert (insn != NULL_RTX && INSN_P (insn));
18445
18446 if (!reload_completed
18447 || is_nonpipeline_insn (insn)
18448 || is_microcoded_insn (insn))
18449 return 1;
18450
18451 return 0;
18452 }
18453
18454 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
18455 and return true. */
18456
18457 static bool
18458 find_mem_ref (rtx pat, rtx *mem_ref)
18459 {
18460 const char * fmt;
18461 int i, j;
18462
18463 /* stack_tie does not produce any real memory traffic. */
18464 if (tie_operand (pat, VOIDmode))
18465 return false;
18466
18467 if (MEM_P (pat))
18468 {
18469 *mem_ref = pat;
18470 return true;
18471 }
18472
18473 /* Recursively process the pattern. */
18474 fmt = GET_RTX_FORMAT (GET_CODE (pat));
18475
18476 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
18477 {
18478 if (fmt[i] == 'e')
18479 {
18480 if (find_mem_ref (XEXP (pat, i), mem_ref))
18481 return true;
18482 }
18483 else if (fmt[i] == 'E')
18484 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
18485 {
18486 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
18487 return true;
18488 }
18489 }
18490
18491 return false;
18492 }
18493
18494 /* Determine if PAT is a PATTERN of a load insn. */
18495
18496 static bool
18497 is_load_insn1 (rtx pat, rtx *load_mem)
18498 {
18499 if (!pat || pat == NULL_RTX)
18500 return false;
18501
18502 if (GET_CODE (pat) == SET)
18503 {
18504 if (REG_P (SET_DEST (pat)))
18505 return find_mem_ref (SET_SRC (pat), load_mem);
18506 else
18507 return false;
18508 }
18509
18510 if (GET_CODE (pat) == PARALLEL)
18511 {
18512 int i;
18513
18514 for (i = 0; i < XVECLEN (pat, 0); i++)
18515 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
18516 return true;
18517 }
18518
18519 return false;
18520 }
18521
18522 /* Determine if INSN loads from memory. */
18523
18524 static bool
18525 is_load_insn (rtx insn, rtx *load_mem)
18526 {
18527 if (!insn || !INSN_P (insn))
18528 return false;
18529
18530 if (CALL_P (insn))
18531 return false;
18532
18533 return is_load_insn1 (PATTERN (insn), load_mem);
18534 }
18535
18536 /* Determine if PAT is a PATTERN of a store insn. */
18537
18538 static bool
18539 is_store_insn1 (rtx pat, rtx *str_mem)
18540 {
18541 if (!pat || pat == NULL_RTX)
18542 return false;
18543
18544 if (GET_CODE (pat) == SET)
18545 {
18546 if (REG_P (SET_SRC (pat)) || SUBREG_P (SET_SRC (pat)))
18547 return find_mem_ref (SET_DEST (pat), str_mem);
18548 else
18549 return false;
18550 }
18551
18552 if (GET_CODE (pat) == PARALLEL)
18553 {
18554 int i;
18555
18556 for (i = 0; i < XVECLEN (pat, 0); i++)
18557 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
18558 return true;
18559 }
18560
18561 return false;
18562 }
18563
18564 /* Determine if INSN stores to memory. */
18565
18566 static bool
18567 is_store_insn (rtx insn, rtx *str_mem)
18568 {
18569 if (!insn || !INSN_P (insn))
18570 return false;
18571
18572 return is_store_insn1 (PATTERN (insn), str_mem);
18573 }
18574
18575 /* Return whether TYPE is a Power9 pairable vector instruction type. */
18576
18577 static bool
18578 is_power9_pairable_vec_type (enum attr_type type)
18579 {
18580 switch (type)
18581 {
18582 case TYPE_VECSIMPLE:
18583 case TYPE_VECCOMPLEX:
18584 case TYPE_VECDIV:
18585 case TYPE_VECCMP:
18586 case TYPE_VECPERM:
18587 case TYPE_VECFLOAT:
18588 case TYPE_VECFDIV:
18589 case TYPE_VECDOUBLE:
18590 return true;
18591 default:
18592 break;
18593 }
18594 return false;
18595 }
18596
18597 /* Returns whether the dependence between INSN and NEXT is considered
18598 costly by the given target. */
18599
18600 static bool
18601 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
18602 {
18603 rtx insn;
18604 rtx next;
18605 rtx load_mem, str_mem;
18606
18607 /* If the flag is not enabled - no dependence is considered costly;
18608 allow all dependent insns in the same group.
18609 This is the most aggressive option. */
18610 if (rs6000_sched_costly_dep == no_dep_costly)
18611 return false;
18612
18613 /* If the flag is set to 1 - a dependence is always considered costly;
18614 do not allow dependent instructions in the same group.
18615 This is the most conservative option. */
18616 if (rs6000_sched_costly_dep == all_deps_costly)
18617 return true;
18618
18619 insn = DEP_PRO (dep);
18620 next = DEP_CON (dep);
18621
18622 if (rs6000_sched_costly_dep == store_to_load_dep_costly
18623 && is_load_insn (next, &load_mem)
18624 && is_store_insn (insn, &str_mem))
18625 /* Prevent load after store in the same group. */
18626 return true;
18627
18628 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
18629 && is_load_insn (next, &load_mem)
18630 && is_store_insn (insn, &str_mem)
18631 && DEP_TYPE (dep) == REG_DEP_TRUE
18632 && mem_locations_overlap(str_mem, load_mem))
18633 /* Prevent load after store in the same group if it is a true
18634 dependence. */
18635 return true;
18636
18637 /* The flag is set to X; dependences with latency >= X are considered costly,
18638 and will not be scheduled in the same group. */
18639 if (rs6000_sched_costly_dep <= max_dep_latency
18640 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
18641 return true;
18642
18643 return false;
18644 }
18645
18646 /* Return the next insn after INSN that is found before TAIL is reached,
18647 skipping any "non-active" insns - insns that will not actually occupy
18648 an issue slot. Return NULL_RTX if such an insn is not found. */
18649
18650 static rtx_insn *
18651 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
18652 {
18653 if (insn == NULL_RTX || insn == tail)
18654 return NULL;
18655
18656 while (1)
18657 {
18658 insn = NEXT_INSN (insn);
18659 if (insn == NULL_RTX || insn == tail)
18660 return NULL;
18661
18662 if (CALL_P (insn)
18663 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
18664 || (NONJUMP_INSN_P (insn)
18665 && GET_CODE (PATTERN (insn)) != USE
18666 && GET_CODE (PATTERN (insn)) != CLOBBER
18667 && INSN_CODE (insn) != CODE_FOR_stack_tie))
18668 break;
18669 }
18670 return insn;
18671 }
18672
18673 /* Move instruction at POS to the end of the READY list. */
18674
18675 static void
18676 move_to_end_of_ready (rtx_insn **ready, int pos, int lastpos)
18677 {
18678 rtx_insn *tmp;
18679 int i;
18680
18681 tmp = ready[pos];
18682 for (i = pos; i < lastpos; i++)
18683 ready[i] = ready[i + 1];
18684 ready[lastpos] = tmp;
18685 }
18686
18687 /* Do Power6 specific sched_reorder2 reordering of ready list. */
18688
18689 static int
18690 power6_sched_reorder2 (rtx_insn **ready, int lastpos)
18691 {
18692 /* For Power6, we need to handle some special cases to try and keep the
18693 store queue from overflowing and triggering expensive flushes.
18694
18695 This code monitors how load and store instructions are being issued
18696 and skews the ready list one way or the other to increase the likelihood
18697 that a desired instruction is issued at the proper time.
18698
18699 A couple of things are done. First, we maintain a "load_store_pendulum"
18700 to track the current state of load/store issue.
18701
18702 - If the pendulum is at zero, then no loads or stores have been
18703 issued in the current cycle so we do nothing.
18704
18705 - If the pendulum is 1, then a single load has been issued in this
18706 cycle and we attempt to locate another load in the ready list to
18707 issue with it.
18708
18709 - If the pendulum is -2, then two stores have already been
18710 issued in this cycle, so we increase the priority of the first load
18711 in the ready list to increase it's likelihood of being chosen first
18712 in the next cycle.
18713
18714 - If the pendulum is -1, then a single store has been issued in this
18715 cycle and we attempt to locate another store in the ready list to
18716 issue with it, preferring a store to an adjacent memory location to
18717 facilitate store pairing in the store queue.
18718
18719 - If the pendulum is 2, then two loads have already been
18720 issued in this cycle, so we increase the priority of the first store
18721 in the ready list to increase it's likelihood of being chosen first
18722 in the next cycle.
18723
18724 - If the pendulum < -2 or > 2, then do nothing.
18725
18726 Note: This code covers the most common scenarios. There exist non
18727 load/store instructions which make use of the LSU and which
18728 would need to be accounted for to strictly model the behavior
18729 of the machine. Those instructions are currently unaccounted
18730 for to help minimize compile time overhead of this code.
18731 */
18732 int pos;
18733 rtx load_mem, str_mem;
18734
18735 if (is_store_insn (last_scheduled_insn, &str_mem))
18736 /* Issuing a store, swing the load_store_pendulum to the left */
18737 load_store_pendulum--;
18738 else if (is_load_insn (last_scheduled_insn, &load_mem))
18739 /* Issuing a load, swing the load_store_pendulum to the right */
18740 load_store_pendulum++;
18741 else
18742 return cached_can_issue_more;
18743
18744 /* If the pendulum is balanced, or there is only one instruction on
18745 the ready list, then all is well, so return. */
18746 if ((load_store_pendulum == 0) || (lastpos <= 0))
18747 return cached_can_issue_more;
18748
18749 if (load_store_pendulum == 1)
18750 {
18751 /* A load has been issued in this cycle. Scan the ready list
18752 for another load to issue with it */
18753 pos = lastpos;
18754
18755 while (pos >= 0)
18756 {
18757 if (is_load_insn (ready[pos], &load_mem))
18758 {
18759 /* Found a load. Move it to the head of the ready list,
18760 and adjust it's priority so that it is more likely to
18761 stay there */
18762 move_to_end_of_ready (ready, pos, lastpos);
18763
18764 if (!sel_sched_p ()
18765 && INSN_PRIORITY_KNOWN (ready[lastpos]))
18766 INSN_PRIORITY (ready[lastpos])++;
18767 break;
18768 }
18769 pos--;
18770 }
18771 }
18772 else if (load_store_pendulum == -2)
18773 {
18774 /* Two stores have been issued in this cycle. Increase the
18775 priority of the first load in the ready list to favor it for
18776 issuing in the next cycle. */
18777 pos = lastpos;
18778
18779 while (pos >= 0)
18780 {
18781 if (is_load_insn (ready[pos], &load_mem)
18782 && !sel_sched_p ()
18783 && INSN_PRIORITY_KNOWN (ready[pos]))
18784 {
18785 INSN_PRIORITY (ready[pos])++;
18786
18787 /* Adjust the pendulum to account for the fact that a load
18788 was found and increased in priority. This is to prevent
18789 increasing the priority of multiple loads */
18790 load_store_pendulum--;
18791
18792 break;
18793 }
18794 pos--;
18795 }
18796 }
18797 else if (load_store_pendulum == -1)
18798 {
18799 /* A store has been issued in this cycle. Scan the ready list for
18800 another store to issue with it, preferring a store to an adjacent
18801 memory location */
18802 int first_store_pos = -1;
18803
18804 pos = lastpos;
18805
18806 while (pos >= 0)
18807 {
18808 if (is_store_insn (ready[pos], &str_mem))
18809 {
18810 rtx str_mem2;
18811 /* Maintain the index of the first store found on the
18812 list */
18813 if (first_store_pos == -1)
18814 first_store_pos = pos;
18815
18816 if (is_store_insn (last_scheduled_insn, &str_mem2)
18817 && adjacent_mem_locations (str_mem, str_mem2))
18818 {
18819 /* Found an adjacent store. Move it to the head of the
18820 ready list, and adjust it's priority so that it is
18821 more likely to stay there */
18822 move_to_end_of_ready (ready, pos, lastpos);
18823
18824 if (!sel_sched_p ()
18825 && INSN_PRIORITY_KNOWN (ready[lastpos]))
18826 INSN_PRIORITY (ready[lastpos])++;
18827
18828 first_store_pos = -1;
18829
18830 break;
18831 };
18832 }
18833 pos--;
18834 }
18835
18836 if (first_store_pos >= 0)
18837 {
18838 /* An adjacent store wasn't found, but a non-adjacent store was,
18839 so move the non-adjacent store to the front of the ready
18840 list, and adjust its priority so that it is more likely to
18841 stay there. */
18842 move_to_end_of_ready (ready, first_store_pos, lastpos);
18843 if (!sel_sched_p ()
18844 && INSN_PRIORITY_KNOWN (ready[lastpos]))
18845 INSN_PRIORITY (ready[lastpos])++;
18846 }
18847 }
18848 else if (load_store_pendulum == 2)
18849 {
18850 /* Two loads have been issued in this cycle. Increase the priority
18851 of the first store in the ready list to favor it for issuing in
18852 the next cycle. */
18853 pos = lastpos;
18854
18855 while (pos >= 0)
18856 {
18857 if (is_store_insn (ready[pos], &str_mem)
18858 && !sel_sched_p ()
18859 && INSN_PRIORITY_KNOWN (ready[pos]))
18860 {
18861 INSN_PRIORITY (ready[pos])++;
18862
18863 /* Adjust the pendulum to account for the fact that a store
18864 was found and increased in priority. This is to prevent
18865 increasing the priority of multiple stores */
18866 load_store_pendulum++;
18867
18868 break;
18869 }
18870 pos--;
18871 }
18872 }
18873
18874 return cached_can_issue_more;
18875 }
18876
18877 /* Do Power9 specific sched_reorder2 reordering of ready list. */
18878
18879 static int
18880 power9_sched_reorder2 (rtx_insn **ready, int lastpos)
18881 {
18882 int pos;
18883 enum attr_type type, type2;
18884
18885 type = get_attr_type (last_scheduled_insn);
18886
18887 /* Try to issue fixed point divides back-to-back in pairs so they will be
18888 routed to separate execution units and execute in parallel. */
18889 if (type == TYPE_DIV && divide_cnt == 0)
18890 {
18891 /* First divide has been scheduled. */
18892 divide_cnt = 1;
18893
18894 /* Scan the ready list looking for another divide, if found move it
18895 to the end of the list so it is chosen next. */
18896 pos = lastpos;
18897 while (pos >= 0)
18898 {
18899 if (recog_memoized (ready[pos]) >= 0
18900 && get_attr_type (ready[pos]) == TYPE_DIV)
18901 {
18902 move_to_end_of_ready (ready, pos, lastpos);
18903 break;
18904 }
18905 pos--;
18906 }
18907 }
18908 else
18909 {
18910 /* Last insn was the 2nd divide or not a divide, reset the counter. */
18911 divide_cnt = 0;
18912
18913 /* The best dispatch throughput for vector and vector load insns can be
18914 achieved by interleaving a vector and vector load such that they'll
18915 dispatch to the same superslice. If this pairing cannot be achieved
18916 then it is best to pair vector insns together and vector load insns
18917 together.
18918
18919 To aid in this pairing, vec_pairing maintains the current state with
18920 the following values:
18921
18922 0 : Initial state, no vecload/vector pairing has been started.
18923
18924 1 : A vecload or vector insn has been issued and a candidate for
18925 pairing has been found and moved to the end of the ready
18926 list. */
18927 if (type == TYPE_VECLOAD)
18928 {
18929 /* Issued a vecload. */
18930 if (vec_pairing == 0)
18931 {
18932 int vecload_pos = -1;
18933 /* We issued a single vecload, look for a vector insn to pair it
18934 with. If one isn't found, try to pair another vecload. */
18935 pos = lastpos;
18936 while (pos >= 0)
18937 {
18938 if (recog_memoized (ready[pos]) >= 0)
18939 {
18940 type2 = get_attr_type (ready[pos]);
18941 if (is_power9_pairable_vec_type (type2))
18942 {
18943 /* Found a vector insn to pair with, move it to the
18944 end of the ready list so it is scheduled next. */
18945 move_to_end_of_ready (ready, pos, lastpos);
18946 vec_pairing = 1;
18947 return cached_can_issue_more;
18948 }
18949 else if (type2 == TYPE_VECLOAD && vecload_pos == -1)
18950 /* Remember position of first vecload seen. */
18951 vecload_pos = pos;
18952 }
18953 pos--;
18954 }
18955 if (vecload_pos >= 0)
18956 {
18957 /* Didn't find a vector to pair with but did find a vecload,
18958 move it to the end of the ready list. */
18959 move_to_end_of_ready (ready, vecload_pos, lastpos);
18960 vec_pairing = 1;
18961 return cached_can_issue_more;
18962 }
18963 }
18964 }
18965 else if (is_power9_pairable_vec_type (type))
18966 {
18967 /* Issued a vector operation. */
18968 if (vec_pairing == 0)
18969 {
18970 int vec_pos = -1;
18971 /* We issued a single vector insn, look for a vecload to pair it
18972 with. If one isn't found, try to pair another vector. */
18973 pos = lastpos;
18974 while (pos >= 0)
18975 {
18976 if (recog_memoized (ready[pos]) >= 0)
18977 {
18978 type2 = get_attr_type (ready[pos]);
18979 if (type2 == TYPE_VECLOAD)
18980 {
18981 /* Found a vecload insn to pair with, move it to the
18982 end of the ready list so it is scheduled next. */
18983 move_to_end_of_ready (ready, pos, lastpos);
18984 vec_pairing = 1;
18985 return cached_can_issue_more;
18986 }
18987 else if (is_power9_pairable_vec_type (type2)
18988 && vec_pos == -1)
18989 /* Remember position of first vector insn seen. */
18990 vec_pos = pos;
18991 }
18992 pos--;
18993 }
18994 if (vec_pos >= 0)
18995 {
18996 /* Didn't find a vecload to pair with but did find a vector
18997 insn, move it to the end of the ready list. */
18998 move_to_end_of_ready (ready, vec_pos, lastpos);
18999 vec_pairing = 1;
19000 return cached_can_issue_more;
19001 }
19002 }
19003 }
19004
19005 /* We've either finished a vec/vecload pair, couldn't find an insn to
19006 continue the current pair, or the last insn had nothing to do with
19007 with pairing. In any case, reset the state. */
19008 vec_pairing = 0;
19009 }
19010
19011 return cached_can_issue_more;
19012 }
19013
19014 /* Determine if INSN is a store to memory that can be fused with a similar
19015 adjacent store. */
19016
19017 static bool
19018 is_fusable_store (rtx_insn *insn, rtx *str_mem)
19019 {
19020 /* Insn must be a non-prefixed base+disp form store. */
19021 if (is_store_insn (insn, str_mem)
19022 && get_attr_prefixed (insn) == PREFIXED_NO
19023 && get_attr_update (insn) == UPDATE_NO
19024 && get_attr_indexed (insn) == INDEXED_NO)
19025 {
19026 /* Further restrictions by mode and size. */
19027 if (!MEM_SIZE_KNOWN_P (*str_mem))
19028 return false;
19029
19030 machine_mode mode = GET_MODE (*str_mem);
19031 HOST_WIDE_INT size = MEM_SIZE (*str_mem);
19032
19033 if (INTEGRAL_MODE_P (mode))
19034 /* Must be word or dword size. */
19035 return (size == 4 || size == 8);
19036 else if (FLOAT_MODE_P (mode))
19037 /* Must be dword size. */
19038 return (size == 8);
19039 }
19040
19041 return false;
19042 }
19043
19044 /* Do Power10 specific reordering of the ready list. */
19045
19046 static int
19047 power10_sched_reorder (rtx_insn **ready, int lastpos)
19048 {
19049 rtx mem1;
19050
19051 /* Do store fusion during sched2 only. */
19052 if (!reload_completed)
19053 return cached_can_issue_more;
19054
19055 /* If the prior insn finished off a store fusion pair then simply
19056 reset the counter and return, nothing more to do. */
19057 if (load_store_pendulum != 0)
19058 {
19059 load_store_pendulum = 0;
19060 return cached_can_issue_more;
19061 }
19062
19063 /* Try to pair certain store insns to adjacent memory locations
19064 so that the hardware will fuse them to a single operation. */
19065 if (TARGET_P10_FUSION && is_fusable_store (last_scheduled_insn, &mem1))
19066 {
19067
19068 /* A fusable store was just scheduled. Scan the ready list for another
19069 store that it can fuse with. */
19070 int pos = lastpos;
19071 while (pos >= 0)
19072 {
19073 rtx mem2;
19074 /* GPR stores can be ascending or descending offsets, FPR/VSR stores
19075 must be ascending only. */
19076 if (is_fusable_store (ready[pos], &mem2)
19077 && ((INTEGRAL_MODE_P (GET_MODE (mem1))
19078 && adjacent_mem_locations (mem1, mem2))
19079 || (FLOAT_MODE_P (GET_MODE (mem1))
19080 && (adjacent_mem_locations (mem1, mem2) == mem1))))
19081 {
19082 /* Found a fusable store. Move it to the end of the ready list
19083 so it is scheduled next. */
19084 move_to_end_of_ready (ready, pos, lastpos);
19085
19086 load_store_pendulum = -1;
19087 break;
19088 }
19089 pos--;
19090 }
19091 }
19092
19093 return cached_can_issue_more;
19094 }
19095
19096 /* We are about to begin issuing insns for this clock cycle. */
19097
19098 static int
19099 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
19100 rtx_insn **ready ATTRIBUTE_UNUSED,
19101 int *pn_ready ATTRIBUTE_UNUSED,
19102 int clock_var ATTRIBUTE_UNUSED)
19103 {
19104 int n_ready = *pn_ready;
19105
19106 if (sched_verbose)
19107 fprintf (dump, "// rs6000_sched_reorder :\n");
19108
19109 /* Reorder the ready list, if the second to last ready insn
19110 is a nonepipeline insn. */
19111 if (rs6000_tune == PROCESSOR_CELL && n_ready > 1)
19112 {
19113 if (is_nonpipeline_insn (ready[n_ready - 1])
19114 && (recog_memoized (ready[n_ready - 2]) > 0))
19115 /* Simply swap first two insns. */
19116 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
19117 }
19118
19119 if (rs6000_tune == PROCESSOR_POWER6)
19120 load_store_pendulum = 0;
19121
19122 /* Do Power10 dependent reordering. */
19123 if (rs6000_tune == PROCESSOR_POWER10 && last_scheduled_insn)
19124 power10_sched_reorder (ready, n_ready - 1);
19125
19126 return rs6000_issue_rate ();
19127 }
19128
19129 /* Like rs6000_sched_reorder, but called after issuing each insn. */
19130
19131 static int
19132 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
19133 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
19134 {
19135 if (sched_verbose)
19136 fprintf (dump, "// rs6000_sched_reorder2 :\n");
19137
19138 /* Do Power6 dependent reordering if necessary. */
19139 if (rs6000_tune == PROCESSOR_POWER6 && last_scheduled_insn)
19140 return power6_sched_reorder2 (ready, *pn_ready - 1);
19141
19142 /* Do Power9 dependent reordering if necessary. */
19143 if (rs6000_tune == PROCESSOR_POWER9 && last_scheduled_insn
19144 && recog_memoized (last_scheduled_insn) >= 0)
19145 return power9_sched_reorder2 (ready, *pn_ready - 1);
19146
19147 /* Do Power10 dependent reordering. */
19148 if (rs6000_tune == PROCESSOR_POWER10 && last_scheduled_insn)
19149 return power10_sched_reorder (ready, *pn_ready - 1);
19150
19151 return cached_can_issue_more;
19152 }
19153
19154 /* Return whether the presence of INSN causes a dispatch group termination
19155 of group WHICH_GROUP.
19156
19157 If WHICH_GROUP == current_group, this function will return true if INSN
19158 causes the termination of the current group (i.e, the dispatch group to
19159 which INSN belongs). This means that INSN will be the last insn in the
19160 group it belongs to.
19161
19162 If WHICH_GROUP == previous_group, this function will return true if INSN
19163 causes the termination of the previous group (i.e, the dispatch group that
19164 precedes the group to which INSN belongs). This means that INSN will be
19165 the first insn in the group it belongs to). */
19166
19167 static bool
19168 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
19169 {
19170 bool first, last;
19171
19172 if (! insn)
19173 return false;
19174
19175 first = insn_must_be_first_in_group (insn);
19176 last = insn_must_be_last_in_group (insn);
19177
19178 if (first && last)
19179 return true;
19180
19181 if (which_group == current_group)
19182 return last;
19183 else if (which_group == previous_group)
19184 return first;
19185
19186 return false;
19187 }
19188
19189
19190 static bool
19191 insn_must_be_first_in_group (rtx_insn *insn)
19192 {
19193 enum attr_type type;
19194
19195 if (!insn
19196 || NOTE_P (insn)
19197 || DEBUG_INSN_P (insn)
19198 || GET_CODE (PATTERN (insn)) == USE
19199 || GET_CODE (PATTERN (insn)) == CLOBBER)
19200 return false;
19201
19202 switch (rs6000_tune)
19203 {
19204 case PROCESSOR_POWER5:
19205 if (is_cracked_insn (insn))
19206 return true;
19207 /* FALLTHRU */
19208 case PROCESSOR_POWER4:
19209 if (is_microcoded_insn (insn))
19210 return true;
19211
19212 if (!rs6000_sched_groups)
19213 return false;
19214
19215 type = get_attr_type (insn);
19216
19217 switch (type)
19218 {
19219 case TYPE_MFCR:
19220 case TYPE_MFCRF:
19221 case TYPE_MTCR:
19222 case TYPE_CR_LOGICAL:
19223 case TYPE_MTJMPR:
19224 case TYPE_MFJMPR:
19225 case TYPE_DIV:
19226 case TYPE_LOAD_L:
19227 case TYPE_STORE_C:
19228 case TYPE_ISYNC:
19229 case TYPE_SYNC:
19230 return true;
19231 default:
19232 break;
19233 }
19234 break;
19235 case PROCESSOR_POWER6:
19236 type = get_attr_type (insn);
19237
19238 switch (type)
19239 {
19240 case TYPE_EXTS:
19241 case TYPE_CNTLZ:
19242 case TYPE_TRAP:
19243 case TYPE_MUL:
19244 case TYPE_INSERT:
19245 case TYPE_FPCOMPARE:
19246 case TYPE_MFCR:
19247 case TYPE_MTCR:
19248 case TYPE_MFJMPR:
19249 case TYPE_MTJMPR:
19250 case TYPE_ISYNC:
19251 case TYPE_SYNC:
19252 case TYPE_LOAD_L:
19253 case TYPE_STORE_C:
19254 return true;
19255 case TYPE_SHIFT:
19256 if (get_attr_dot (insn) == DOT_NO
19257 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
19258 return true;
19259 else
19260 break;
19261 case TYPE_DIV:
19262 if (get_attr_size (insn) == SIZE_32)
19263 return true;
19264 else
19265 break;
19266 case TYPE_LOAD:
19267 case TYPE_STORE:
19268 case TYPE_FPLOAD:
19269 case TYPE_FPSTORE:
19270 if (get_attr_update (insn) == UPDATE_YES)
19271 return true;
19272 else
19273 break;
19274 default:
19275 break;
19276 }
19277 break;
19278 case PROCESSOR_POWER7:
19279 type = get_attr_type (insn);
19280
19281 switch (type)
19282 {
19283 case TYPE_CR_LOGICAL:
19284 case TYPE_MFCR:
19285 case TYPE_MFCRF:
19286 case TYPE_MTCR:
19287 case TYPE_DIV:
19288 case TYPE_ISYNC:
19289 case TYPE_LOAD_L:
19290 case TYPE_STORE_C:
19291 case TYPE_MFJMPR:
19292 case TYPE_MTJMPR:
19293 return true;
19294 case TYPE_MUL:
19295 case TYPE_SHIFT:
19296 case TYPE_EXTS:
19297 if (get_attr_dot (insn) == DOT_YES)
19298 return true;
19299 else
19300 break;
19301 case TYPE_LOAD:
19302 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19303 || get_attr_update (insn) == UPDATE_YES)
19304 return true;
19305 else
19306 break;
19307 case TYPE_STORE:
19308 case TYPE_FPLOAD:
19309 case TYPE_FPSTORE:
19310 if (get_attr_update (insn) == UPDATE_YES)
19311 return true;
19312 else
19313 break;
19314 default:
19315 break;
19316 }
19317 break;
19318 case PROCESSOR_POWER8:
19319 type = get_attr_type (insn);
19320
19321 switch (type)
19322 {
19323 case TYPE_CR_LOGICAL:
19324 case TYPE_MFCR:
19325 case TYPE_MFCRF:
19326 case TYPE_MTCR:
19327 case TYPE_SYNC:
19328 case TYPE_ISYNC:
19329 case TYPE_LOAD_L:
19330 case TYPE_STORE_C:
19331 case TYPE_VECSTORE:
19332 case TYPE_MFJMPR:
19333 case TYPE_MTJMPR:
19334 return true;
19335 case TYPE_SHIFT:
19336 case TYPE_EXTS:
19337 case TYPE_MUL:
19338 if (get_attr_dot (insn) == DOT_YES)
19339 return true;
19340 else
19341 break;
19342 case TYPE_LOAD:
19343 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19344 || get_attr_update (insn) == UPDATE_YES)
19345 return true;
19346 else
19347 break;
19348 case TYPE_STORE:
19349 if (get_attr_update (insn) == UPDATE_YES
19350 && get_attr_indexed (insn) == INDEXED_YES)
19351 return true;
19352 else
19353 break;
19354 default:
19355 break;
19356 }
19357 break;
19358 default:
19359 break;
19360 }
19361
19362 return false;
19363 }
19364
19365 static bool
19366 insn_must_be_last_in_group (rtx_insn *insn)
19367 {
19368 enum attr_type type;
19369
19370 if (!insn
19371 || NOTE_P (insn)
19372 || DEBUG_INSN_P (insn)
19373 || GET_CODE (PATTERN (insn)) == USE
19374 || GET_CODE (PATTERN (insn)) == CLOBBER)
19375 return false;
19376
19377 switch (rs6000_tune) {
19378 case PROCESSOR_POWER4:
19379 case PROCESSOR_POWER5:
19380 if (is_microcoded_insn (insn))
19381 return true;
19382
19383 if (is_branch_slot_insn (insn))
19384 return true;
19385
19386 break;
19387 case PROCESSOR_POWER6:
19388 type = get_attr_type (insn);
19389
19390 switch (type)
19391 {
19392 case TYPE_EXTS:
19393 case TYPE_CNTLZ:
19394 case TYPE_TRAP:
19395 case TYPE_MUL:
19396 case TYPE_FPCOMPARE:
19397 case TYPE_MFCR:
19398 case TYPE_MTCR:
19399 case TYPE_MFJMPR:
19400 case TYPE_MTJMPR:
19401 case TYPE_ISYNC:
19402 case TYPE_SYNC:
19403 case TYPE_LOAD_L:
19404 case TYPE_STORE_C:
19405 return true;
19406 case TYPE_SHIFT:
19407 if (get_attr_dot (insn) == DOT_NO
19408 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
19409 return true;
19410 else
19411 break;
19412 case TYPE_DIV:
19413 if (get_attr_size (insn) == SIZE_32)
19414 return true;
19415 else
19416 break;
19417 default:
19418 break;
19419 }
19420 break;
19421 case PROCESSOR_POWER7:
19422 type = get_attr_type (insn);
19423
19424 switch (type)
19425 {
19426 case TYPE_ISYNC:
19427 case TYPE_SYNC:
19428 case TYPE_LOAD_L:
19429 case TYPE_STORE_C:
19430 return true;
19431 case TYPE_LOAD:
19432 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19433 && get_attr_update (insn) == UPDATE_YES)
19434 return true;
19435 else
19436 break;
19437 case TYPE_STORE:
19438 if (get_attr_update (insn) == UPDATE_YES
19439 && get_attr_indexed (insn) == INDEXED_YES)
19440 return true;
19441 else
19442 break;
19443 default:
19444 break;
19445 }
19446 break;
19447 case PROCESSOR_POWER8:
19448 type = get_attr_type (insn);
19449
19450 switch (type)
19451 {
19452 case TYPE_MFCR:
19453 case TYPE_MTCR:
19454 case TYPE_ISYNC:
19455 case TYPE_SYNC:
19456 case TYPE_LOAD_L:
19457 case TYPE_STORE_C:
19458 return true;
19459 case TYPE_LOAD:
19460 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19461 && get_attr_update (insn) == UPDATE_YES)
19462 return true;
19463 else
19464 break;
19465 case TYPE_STORE:
19466 if (get_attr_update (insn) == UPDATE_YES
19467 && get_attr_indexed (insn) == INDEXED_YES)
19468 return true;
19469 else
19470 break;
19471 default:
19472 break;
19473 }
19474 break;
19475 default:
19476 break;
19477 }
19478
19479 return false;
19480 }
19481
19482 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
19483 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
19484
19485 static bool
19486 is_costly_group (rtx *group_insns, rtx next_insn)
19487 {
19488 int i;
19489 int issue_rate = rs6000_issue_rate ();
19490
19491 for (i = 0; i < issue_rate; i++)
19492 {
19493 sd_iterator_def sd_it;
19494 dep_t dep;
19495 rtx insn = group_insns[i];
19496
19497 if (!insn)
19498 continue;
19499
19500 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
19501 {
19502 rtx next = DEP_CON (dep);
19503
19504 if (next == next_insn
19505 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
19506 return true;
19507 }
19508 }
19509
19510 return false;
19511 }
19512
19513 /* Utility of the function redefine_groups.
19514 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
19515 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
19516 to keep it "far" (in a separate group) from GROUP_INSNS, following
19517 one of the following schemes, depending on the value of the flag
19518 -minsert_sched_nops = X:
19519 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
19520 in order to force NEXT_INSN into a separate group.
19521 (2) X < sched_finish_regroup_exact: insert exactly X nops.
19522 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
19523 insertion (has a group just ended, how many vacant issue slots remain in the
19524 last group, and how many dispatch groups were encountered so far). */
19525
19526 static int
19527 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
19528 rtx_insn *next_insn, bool *group_end, int can_issue_more,
19529 int *group_count)
19530 {
19531 rtx nop;
19532 bool force;
19533 int issue_rate = rs6000_issue_rate ();
19534 bool end = *group_end;
19535 int i;
19536
19537 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
19538 return can_issue_more;
19539
19540 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
19541 return can_issue_more;
19542
19543 force = is_costly_group (group_insns, next_insn);
19544 if (!force)
19545 return can_issue_more;
19546
19547 if (sched_verbose > 6)
19548 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
19549 *group_count ,can_issue_more);
19550
19551 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
19552 {
19553 if (*group_end)
19554 can_issue_more = 0;
19555
19556 /* Since only a branch can be issued in the last issue_slot, it is
19557 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
19558 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
19559 in this case the last nop will start a new group and the branch
19560 will be forced to the new group. */
19561 if (can_issue_more && !is_branch_slot_insn (next_insn))
19562 can_issue_more--;
19563
19564 /* Do we have a special group ending nop? */
19565 if (rs6000_tune == PROCESSOR_POWER6 || rs6000_tune == PROCESSOR_POWER7
19566 || rs6000_tune == PROCESSOR_POWER8)
19567 {
19568 nop = gen_group_ending_nop ();
19569 emit_insn_before (nop, next_insn);
19570 can_issue_more = 0;
19571 }
19572 else
19573 while (can_issue_more > 0)
19574 {
19575 nop = gen_nop ();
19576 emit_insn_before (nop, next_insn);
19577 can_issue_more--;
19578 }
19579
19580 *group_end = true;
19581 return 0;
19582 }
19583
19584 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
19585 {
19586 int n_nops = rs6000_sched_insert_nops;
19587
19588 /* Nops can't be issued from the branch slot, so the effective
19589 issue_rate for nops is 'issue_rate - 1'. */
19590 if (can_issue_more == 0)
19591 can_issue_more = issue_rate;
19592 can_issue_more--;
19593 if (can_issue_more == 0)
19594 {
19595 can_issue_more = issue_rate - 1;
19596 (*group_count)++;
19597 end = true;
19598 for (i = 0; i < issue_rate; i++)
19599 {
19600 group_insns[i] = 0;
19601 }
19602 }
19603
19604 while (n_nops > 0)
19605 {
19606 nop = gen_nop ();
19607 emit_insn_before (nop, next_insn);
19608 if (can_issue_more == issue_rate - 1) /* new group begins */
19609 end = false;
19610 can_issue_more--;
19611 if (can_issue_more == 0)
19612 {
19613 can_issue_more = issue_rate - 1;
19614 (*group_count)++;
19615 end = true;
19616 for (i = 0; i < issue_rate; i++)
19617 {
19618 group_insns[i] = 0;
19619 }
19620 }
19621 n_nops--;
19622 }
19623
19624 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
19625 can_issue_more++;
19626
19627 /* Is next_insn going to start a new group? */
19628 *group_end
19629 = (end
19630 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
19631 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
19632 || (can_issue_more < issue_rate &&
19633 insn_terminates_group_p (next_insn, previous_group)));
19634 if (*group_end && end)
19635 (*group_count)--;
19636
19637 if (sched_verbose > 6)
19638 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
19639 *group_count, can_issue_more);
19640 return can_issue_more;
19641 }
19642
19643 return can_issue_more;
19644 }
19645
19646 /* This function tries to synch the dispatch groups that the compiler "sees"
19647 with the dispatch groups that the processor dispatcher is expected to
19648 form in practice. It tries to achieve this synchronization by forcing the
19649 estimated processor grouping on the compiler (as opposed to the function
19650 'pad_goups' which tries to force the scheduler's grouping on the processor).
19651
19652 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
19653 examines the (estimated) dispatch groups that will be formed by the processor
19654 dispatcher. It marks these group boundaries to reflect the estimated
19655 processor grouping, overriding the grouping that the scheduler had marked.
19656 Depending on the value of the flag '-minsert-sched-nops' this function can
19657 force certain insns into separate groups or force a certain distance between
19658 them by inserting nops, for example, if there exists a "costly dependence"
19659 between the insns.
19660
19661 The function estimates the group boundaries that the processor will form as
19662 follows: It keeps track of how many vacant issue slots are available after
19663 each insn. A subsequent insn will start a new group if one of the following
19664 4 cases applies:
19665 - no more vacant issue slots remain in the current dispatch group.
19666 - only the last issue slot, which is the branch slot, is vacant, but the next
19667 insn is not a branch.
19668 - only the last 2 or less issue slots, including the branch slot, are vacant,
19669 which means that a cracked insn (which occupies two issue slots) can't be
19670 issued in this group.
19671 - less than 'issue_rate' slots are vacant, and the next insn always needs to
19672 start a new group. */
19673
19674 static int
19675 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
19676 rtx_insn *tail)
19677 {
19678 rtx_insn *insn, *next_insn;
19679 int issue_rate;
19680 int can_issue_more;
19681 int slot, i;
19682 bool group_end;
19683 int group_count = 0;
19684 rtx *group_insns;
19685
19686 /* Initialize. */
19687 issue_rate = rs6000_issue_rate ();
19688 group_insns = XALLOCAVEC (rtx, issue_rate);
19689 for (i = 0; i < issue_rate; i++)
19690 {
19691 group_insns[i] = 0;
19692 }
19693 can_issue_more = issue_rate;
19694 slot = 0;
19695 insn = get_next_active_insn (prev_head_insn, tail);
19696 group_end = false;
19697
19698 while (insn != NULL_RTX)
19699 {
19700 slot = (issue_rate - can_issue_more);
19701 group_insns[slot] = insn;
19702 can_issue_more =
19703 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
19704 if (insn_terminates_group_p (insn, current_group))
19705 can_issue_more = 0;
19706
19707 next_insn = get_next_active_insn (insn, tail);
19708 if (next_insn == NULL_RTX)
19709 return group_count + 1;
19710
19711 /* Is next_insn going to start a new group? */
19712 group_end
19713 = (can_issue_more == 0
19714 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
19715 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
19716 || (can_issue_more < issue_rate &&
19717 insn_terminates_group_p (next_insn, previous_group)));
19718
19719 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
19720 next_insn, &group_end, can_issue_more,
19721 &group_count);
19722
19723 if (group_end)
19724 {
19725 group_count++;
19726 can_issue_more = 0;
19727 for (i = 0; i < issue_rate; i++)
19728 {
19729 group_insns[i] = 0;
19730 }
19731 }
19732
19733 if (GET_MODE (next_insn) == TImode && can_issue_more)
19734 PUT_MODE (next_insn, VOIDmode);
19735 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
19736 PUT_MODE (next_insn, TImode);
19737
19738 insn = next_insn;
19739 if (can_issue_more == 0)
19740 can_issue_more = issue_rate;
19741 } /* while */
19742
19743 return group_count;
19744 }
19745
19746 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
19747 dispatch group boundaries that the scheduler had marked. Pad with nops
19748 any dispatch groups which have vacant issue slots, in order to force the
19749 scheduler's grouping on the processor dispatcher. The function
19750 returns the number of dispatch groups found. */
19751
19752 static int
19753 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
19754 rtx_insn *tail)
19755 {
19756 rtx_insn *insn, *next_insn;
19757 rtx nop;
19758 int issue_rate;
19759 int can_issue_more;
19760 int group_end;
19761 int group_count = 0;
19762
19763 /* Initialize issue_rate. */
19764 issue_rate = rs6000_issue_rate ();
19765 can_issue_more = issue_rate;
19766
19767 insn = get_next_active_insn (prev_head_insn, tail);
19768 next_insn = get_next_active_insn (insn, tail);
19769
19770 while (insn != NULL_RTX)
19771 {
19772 can_issue_more =
19773 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
19774
19775 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
19776
19777 if (next_insn == NULL_RTX)
19778 break;
19779
19780 if (group_end)
19781 {
19782 /* If the scheduler had marked group termination at this location
19783 (between insn and next_insn), and neither insn nor next_insn will
19784 force group termination, pad the group with nops to force group
19785 termination. */
19786 if (can_issue_more
19787 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
19788 && !insn_terminates_group_p (insn, current_group)
19789 && !insn_terminates_group_p (next_insn, previous_group))
19790 {
19791 if (!is_branch_slot_insn (next_insn))
19792 can_issue_more--;
19793
19794 while (can_issue_more)
19795 {
19796 nop = gen_nop ();
19797 emit_insn_before (nop, next_insn);
19798 can_issue_more--;
19799 }
19800 }
19801
19802 can_issue_more = issue_rate;
19803 group_count++;
19804 }
19805
19806 insn = next_insn;
19807 next_insn = get_next_active_insn (insn, tail);
19808 }
19809
19810 return group_count;
19811 }
19812
19813 /* We're beginning a new block. Initialize data structures as necessary. */
19814
19815 static void
19816 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
19817 int sched_verbose ATTRIBUTE_UNUSED,
19818 int max_ready ATTRIBUTE_UNUSED)
19819 {
19820 last_scheduled_insn = NULL;
19821 load_store_pendulum = 0;
19822 divide_cnt = 0;
19823 vec_pairing = 0;
19824 }
19825
19826 /* The following function is called at the end of scheduling BB.
19827 After reload, it inserts nops at insn group bundling. */
19828
19829 static void
19830 rs6000_sched_finish (FILE *dump, int sched_verbose)
19831 {
19832 int n_groups;
19833
19834 if (sched_verbose)
19835 fprintf (dump, "=== Finishing schedule.\n");
19836
19837 if (reload_completed && rs6000_sched_groups)
19838 {
19839 /* Do not run sched_finish hook when selective scheduling enabled. */
19840 if (sel_sched_p ())
19841 return;
19842
19843 if (rs6000_sched_insert_nops == sched_finish_none)
19844 return;
19845
19846 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
19847 n_groups = pad_groups (dump, sched_verbose,
19848 current_sched_info->prev_head,
19849 current_sched_info->next_tail);
19850 else
19851 n_groups = redefine_groups (dump, sched_verbose,
19852 current_sched_info->prev_head,
19853 current_sched_info->next_tail);
19854
19855 if (sched_verbose >= 6)
19856 {
19857 fprintf (dump, "ngroups = %d\n", n_groups);
19858 print_rtl (dump, current_sched_info->prev_head);
19859 fprintf (dump, "Done finish_sched\n");
19860 }
19861 }
19862 }
19863
19864 struct rs6000_sched_context
19865 {
19866 short cached_can_issue_more;
19867 rtx_insn *last_scheduled_insn;
19868 int load_store_pendulum;
19869 int divide_cnt;
19870 int vec_pairing;
19871 };
19872
19873 typedef struct rs6000_sched_context rs6000_sched_context_def;
19874 typedef rs6000_sched_context_def *rs6000_sched_context_t;
19875
19876 /* Allocate store for new scheduling context. */
19877 static void *
19878 rs6000_alloc_sched_context (void)
19879 {
19880 return xmalloc (sizeof (rs6000_sched_context_def));
19881 }
19882
19883 /* If CLEAN_P is true then initializes _SC with clean data,
19884 and from the global context otherwise. */
19885 static void
19886 rs6000_init_sched_context (void *_sc, bool clean_p)
19887 {
19888 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
19889
19890 if (clean_p)
19891 {
19892 sc->cached_can_issue_more = 0;
19893 sc->last_scheduled_insn = NULL;
19894 sc->load_store_pendulum = 0;
19895 sc->divide_cnt = 0;
19896 sc->vec_pairing = 0;
19897 }
19898 else
19899 {
19900 sc->cached_can_issue_more = cached_can_issue_more;
19901 sc->last_scheduled_insn = last_scheduled_insn;
19902 sc->load_store_pendulum = load_store_pendulum;
19903 sc->divide_cnt = divide_cnt;
19904 sc->vec_pairing = vec_pairing;
19905 }
19906 }
19907
19908 /* Sets the global scheduling context to the one pointed to by _SC. */
19909 static void
19910 rs6000_set_sched_context (void *_sc)
19911 {
19912 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
19913
19914 gcc_assert (sc != NULL);
19915
19916 cached_can_issue_more = sc->cached_can_issue_more;
19917 last_scheduled_insn = sc->last_scheduled_insn;
19918 load_store_pendulum = sc->load_store_pendulum;
19919 divide_cnt = sc->divide_cnt;
19920 vec_pairing = sc->vec_pairing;
19921 }
19922
19923 /* Free _SC. */
19924 static void
19925 rs6000_free_sched_context (void *_sc)
19926 {
19927 gcc_assert (_sc != NULL);
19928
19929 free (_sc);
19930 }
19931
19932 static bool
19933 rs6000_sched_can_speculate_insn (rtx_insn *insn)
19934 {
19935 switch (get_attr_type (insn))
19936 {
19937 case TYPE_DIV:
19938 case TYPE_SDIV:
19939 case TYPE_DDIV:
19940 case TYPE_VECDIV:
19941 case TYPE_SSQRT:
19942 case TYPE_DSQRT:
19943 return false;
19944
19945 default:
19946 return true;
19947 }
19948 }
19949 \f
19950 /* Length in units of the trampoline for entering a nested function. */
19951
19952 int
19953 rs6000_trampoline_size (void)
19954 {
19955 int ret = 0;
19956
19957 switch (DEFAULT_ABI)
19958 {
19959 default:
19960 gcc_unreachable ();
19961
19962 case ABI_AIX:
19963 ret = (TARGET_32BIT) ? 12 : 24;
19964 break;
19965
19966 case ABI_ELFv2:
19967 gcc_assert (!TARGET_32BIT);
19968 ret = 32;
19969 break;
19970
19971 case ABI_DARWIN:
19972 case ABI_V4:
19973 ret = (TARGET_32BIT) ? 40 : 48;
19974 break;
19975 }
19976
19977 return ret;
19978 }
19979
19980 /* Emit RTL insns to initialize the variable parts of a trampoline.
19981 FNADDR is an RTX for the address of the function's pure code.
19982 CXT is an RTX for the static chain value for the function. */
19983
19984 static void
19985 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
19986 {
19987 int regsize = (TARGET_32BIT) ? 4 : 8;
19988 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
19989 rtx ctx_reg = force_reg (Pmode, cxt);
19990 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
19991
19992 switch (DEFAULT_ABI)
19993 {
19994 default:
19995 gcc_unreachable ();
19996
19997 /* Under AIX, just build the 3 word function descriptor */
19998 case ABI_AIX:
19999 {
20000 rtx fnmem, fn_reg, toc_reg;
20001
20002 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
20003 error ("you cannot take the address of a nested function if you use "
20004 "the %qs option", "-mno-pointers-to-nested-functions");
20005
20006 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
20007 fn_reg = gen_reg_rtx (Pmode);
20008 toc_reg = gen_reg_rtx (Pmode);
20009
20010 /* Macro to shorten the code expansions below. */
20011 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
20012
20013 m_tramp = replace_equiv_address (m_tramp, addr);
20014
20015 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
20016 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
20017 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
20018 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
20019 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
20020
20021 # undef MEM_PLUS
20022 }
20023 break;
20024
20025 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
20026 case ABI_ELFv2:
20027 case ABI_DARWIN:
20028 case ABI_V4:
20029 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
20030 LCT_NORMAL, VOIDmode,
20031 addr, Pmode,
20032 GEN_INT (rs6000_trampoline_size ()), SImode,
20033 fnaddr, Pmode,
20034 ctx_reg, Pmode);
20035 break;
20036 }
20037 }
20038
20039 \f
20040 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
20041 identifier as an argument, so the front end shouldn't look it up. */
20042
20043 static bool
20044 rs6000_attribute_takes_identifier_p (const_tree attr_id)
20045 {
20046 return is_attribute_p ("altivec", attr_id);
20047 }
20048
20049 /* Handle the "altivec" attribute. The attribute may have
20050 arguments as follows:
20051
20052 __attribute__((altivec(vector__)))
20053 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
20054 __attribute__((altivec(bool__))) (always followed by 'unsigned')
20055
20056 and may appear more than once (e.g., 'vector bool char') in a
20057 given declaration. */
20058
20059 static tree
20060 rs6000_handle_altivec_attribute (tree *node,
20061 tree name ATTRIBUTE_UNUSED,
20062 tree args,
20063 int flags ATTRIBUTE_UNUSED,
20064 bool *no_add_attrs)
20065 {
20066 tree type = *node, result = NULL_TREE;
20067 machine_mode mode;
20068 int unsigned_p;
20069 char altivec_type
20070 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
20071 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
20072 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
20073 : '?');
20074
20075 while (POINTER_TYPE_P (type)
20076 || TREE_CODE (type) == FUNCTION_TYPE
20077 || TREE_CODE (type) == METHOD_TYPE
20078 || TREE_CODE (type) == ARRAY_TYPE)
20079 type = TREE_TYPE (type);
20080
20081 mode = TYPE_MODE (type);
20082
20083 /* Check for invalid AltiVec type qualifiers. */
20084 if (type == long_double_type_node)
20085 error ("use of %<long double%> in AltiVec types is invalid");
20086 else if (type == boolean_type_node)
20087 error ("use of boolean types in AltiVec types is invalid");
20088 else if (TREE_CODE (type) == COMPLEX_TYPE)
20089 error ("use of %<complex%> in AltiVec types is invalid");
20090 else if (DECIMAL_FLOAT_MODE_P (mode))
20091 error ("use of decimal floating-point types in AltiVec types is invalid");
20092 else if (!TARGET_VSX)
20093 {
20094 if (type == long_unsigned_type_node || type == long_integer_type_node)
20095 {
20096 if (TARGET_64BIT)
20097 error ("use of %<long%> in AltiVec types is invalid for "
20098 "64-bit code without %qs", "-mvsx");
20099 else if (rs6000_warn_altivec_long)
20100 warning (0, "use of %<long%> in AltiVec types is deprecated; "
20101 "use %<int%>");
20102 }
20103 else if (type == long_long_unsigned_type_node
20104 || type == long_long_integer_type_node)
20105 error ("use of %<long long%> in AltiVec types is invalid without %qs",
20106 "-mvsx");
20107 else if (type == double_type_node)
20108 error ("use of %<double%> in AltiVec types is invalid without %qs",
20109 "-mvsx");
20110 }
20111
20112 switch (altivec_type)
20113 {
20114 case 'v':
20115 unsigned_p = TYPE_UNSIGNED (type);
20116 switch (mode)
20117 {
20118 case E_TImode:
20119 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
20120 break;
20121 case E_DImode:
20122 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
20123 break;
20124 case E_SImode:
20125 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
20126 break;
20127 case E_HImode:
20128 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
20129 break;
20130 case E_QImode:
20131 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
20132 break;
20133 case E_SFmode: result = V4SF_type_node; break;
20134 case E_DFmode: result = V2DF_type_node; break;
20135 /* If the user says 'vector int bool', we may be handed the 'bool'
20136 attribute _before_ the 'vector' attribute, and so select the
20137 proper type in the 'b' case below. */
20138 case E_V4SImode: case E_V8HImode: case E_V16QImode: case E_V4SFmode:
20139 case E_V2DImode: case E_V2DFmode:
20140 result = type;
20141 default: break;
20142 }
20143 break;
20144 case 'b':
20145 switch (mode)
20146 {
20147 case E_TImode: case E_V1TImode: result = bool_V1TI_type_node; break;
20148 case E_DImode: case E_V2DImode: result = bool_V2DI_type_node; break;
20149 case E_SImode: case E_V4SImode: result = bool_V4SI_type_node; break;
20150 case E_HImode: case E_V8HImode: result = bool_V8HI_type_node; break;
20151 case E_QImode: case E_V16QImode: result = bool_V16QI_type_node;
20152 default: break;
20153 }
20154 break;
20155 case 'p':
20156 switch (mode)
20157 {
20158 case E_V8HImode: result = pixel_V8HI_type_node;
20159 default: break;
20160 }
20161 default: break;
20162 }
20163
20164 /* Propagate qualifiers attached to the element type
20165 onto the vector type. */
20166 if (result && result != type && TYPE_QUALS (type))
20167 result = build_qualified_type (result, TYPE_QUALS (type));
20168
20169 *no_add_attrs = true; /* No need to hang on to the attribute. */
20170
20171 if (result)
20172 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
20173
20174 return NULL_TREE;
20175 }
20176
20177 /* AltiVec defines five built-in scalar types that serve as vector
20178 elements; we must teach the compiler how to mangle them. The 128-bit
20179 floating point mangling is target-specific as well. MMA defines
20180 two built-in types to be used as opaque vector types. */
20181
20182 static const char *
20183 rs6000_mangle_type (const_tree type)
20184 {
20185 type = TYPE_MAIN_VARIANT (type);
20186
20187 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
20188 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE
20189 && TREE_CODE (type) != OPAQUE_TYPE)
20190 return NULL;
20191
20192 if (type == bool_char_type_node) return "U6__boolc";
20193 if (type == bool_short_type_node) return "U6__bools";
20194 if (type == pixel_type_node) return "u7__pixel";
20195 if (type == bool_int_type_node) return "U6__booli";
20196 if (type == bool_long_long_type_node) return "U6__boolx";
20197
20198 /* If long double uses the IBM 128-bit extended format, we need to
20199 distinguish between __ibm128 and long double. */
20200 if (type == ibm128_float_type_node && TARGET_LONG_DOUBLE_128
20201 && !TARGET_IEEEQUAD)
20202 return "u8__ibm128";
20203
20204 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IBM_P (TYPE_MODE (type)))
20205 return "g";
20206 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IEEE_P (TYPE_MODE (type)))
20207 return "u9__ieee128";
20208
20209 if (type == vector_pair_type_node)
20210 return "u13__vector_pair";
20211 if (type == vector_quad_type_node)
20212 return "u13__vector_quad";
20213
20214 /* For all other types, use the default mangling. */
20215 return NULL;
20216 }
20217
20218 /* Handle a "longcall" or "shortcall" attribute; arguments as in
20219 struct attribute_spec.handler. */
20220
20221 static tree
20222 rs6000_handle_longcall_attribute (tree *node, tree name,
20223 tree args ATTRIBUTE_UNUSED,
20224 int flags ATTRIBUTE_UNUSED,
20225 bool *no_add_attrs)
20226 {
20227 if (TREE_CODE (*node) != FUNCTION_TYPE
20228 && TREE_CODE (*node) != FIELD_DECL
20229 && TREE_CODE (*node) != TYPE_DECL)
20230 {
20231 warning (OPT_Wattributes, "%qE attribute only applies to functions",
20232 name);
20233 *no_add_attrs = true;
20234 }
20235
20236 return NULL_TREE;
20237 }
20238
20239 /* Set longcall attributes on all functions declared when
20240 rs6000_default_long_calls is true. */
20241 static void
20242 rs6000_set_default_type_attributes (tree type)
20243 {
20244 if (rs6000_default_long_calls
20245 && (TREE_CODE (type) == FUNCTION_TYPE
20246 || TREE_CODE (type) == METHOD_TYPE))
20247 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
20248 NULL_TREE,
20249 TYPE_ATTRIBUTES (type));
20250
20251 #if TARGET_MACHO
20252 darwin_set_default_type_attributes (type);
20253 #endif
20254 }
20255
20256 /* Return a reference suitable for calling a function with the
20257 longcall attribute. */
20258
20259 static rtx
20260 rs6000_longcall_ref (rtx call_ref, rtx arg)
20261 {
20262 /* System V adds '.' to the internal name, so skip them. */
20263 const char *call_name = XSTR (call_ref, 0);
20264 if (*call_name == '.')
20265 {
20266 while (*call_name == '.')
20267 call_name++;
20268
20269 tree node = get_identifier (call_name);
20270 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
20271 }
20272
20273 if (TARGET_PLTSEQ)
20274 {
20275 rtx base = const0_rtx;
20276 int regno = 12;
20277 if (rs6000_pcrel_p ())
20278 {
20279 rtx reg = gen_rtx_REG (Pmode, regno);
20280 rtx u = gen_rtx_UNSPEC_VOLATILE (Pmode,
20281 gen_rtvec (3, base, call_ref, arg),
20282 UNSPECV_PLT_PCREL);
20283 emit_insn (gen_rtx_SET (reg, u));
20284 return reg;
20285 }
20286
20287 if (DEFAULT_ABI == ABI_ELFv2)
20288 base = gen_rtx_REG (Pmode, TOC_REGISTER);
20289 else
20290 {
20291 if (flag_pic)
20292 base = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
20293 regno = 11;
20294 }
20295 /* Reg must match that used by linker PLT stubs. For ELFv2, r12
20296 may be used by a function global entry point. For SysV4, r11
20297 is used by __glink_PLTresolve lazy resolver entry. */
20298 rtx reg = gen_rtx_REG (Pmode, regno);
20299 rtx hi = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, base, call_ref, arg),
20300 UNSPEC_PLT16_HA);
20301 rtx lo = gen_rtx_UNSPEC_VOLATILE (Pmode,
20302 gen_rtvec (3, reg, call_ref, arg),
20303 UNSPECV_PLT16_LO);
20304 emit_insn (gen_rtx_SET (reg, hi));
20305 emit_insn (gen_rtx_SET (reg, lo));
20306 return reg;
20307 }
20308
20309 return force_reg (Pmode, call_ref);
20310 }
20311 \f
20312 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
20313 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
20314 #endif
20315
20316 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
20317 struct attribute_spec.handler. */
20318 static tree
20319 rs6000_handle_struct_attribute (tree *node, tree name,
20320 tree args ATTRIBUTE_UNUSED,
20321 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
20322 {
20323 tree *type = NULL;
20324 if (DECL_P (*node))
20325 {
20326 if (TREE_CODE (*node) == TYPE_DECL)
20327 type = &TREE_TYPE (*node);
20328 }
20329 else
20330 type = node;
20331
20332 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
20333 || TREE_CODE (*type) == UNION_TYPE)))
20334 {
20335 warning (OPT_Wattributes, "%qE attribute ignored", name);
20336 *no_add_attrs = true;
20337 }
20338
20339 else if ((is_attribute_p ("ms_struct", name)
20340 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
20341 || ((is_attribute_p ("gcc_struct", name)
20342 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
20343 {
20344 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
20345 name);
20346 *no_add_attrs = true;
20347 }
20348
20349 return NULL_TREE;
20350 }
20351
20352 static bool
20353 rs6000_ms_bitfield_layout_p (const_tree record_type)
20354 {
20355 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
20356 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
20357 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
20358 }
20359 \f
20360 #ifdef USING_ELFOS_H
20361
20362 /* A get_unnamed_section callback, used for switching to toc_section. */
20363
20364 static void
20365 rs6000_elf_output_toc_section_asm_op (const char *data ATTRIBUTE_UNUSED)
20366 {
20367 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
20368 && TARGET_MINIMAL_TOC)
20369 {
20370 if (!toc_initialized)
20371 {
20372 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
20373 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20374 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
20375 fprintf (asm_out_file, "\t.tc ");
20376 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
20377 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
20378 fprintf (asm_out_file, "\n");
20379
20380 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
20381 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20382 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
20383 fprintf (asm_out_file, " = .+32768\n");
20384 toc_initialized = 1;
20385 }
20386 else
20387 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
20388 }
20389 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
20390 {
20391 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
20392 if (!toc_initialized)
20393 {
20394 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20395 toc_initialized = 1;
20396 }
20397 }
20398 else
20399 {
20400 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
20401 if (!toc_initialized)
20402 {
20403 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20404 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
20405 fprintf (asm_out_file, " = .+32768\n");
20406 toc_initialized = 1;
20407 }
20408 }
20409 }
20410
20411 /* Implement TARGET_ASM_INIT_SECTIONS. */
20412
20413 static void
20414 rs6000_elf_asm_init_sections (void)
20415 {
20416 toc_section
20417 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
20418
20419 sdata2_section
20420 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
20421 SDATA2_SECTION_ASM_OP);
20422 }
20423
20424 /* Implement TARGET_SELECT_RTX_SECTION. */
20425
20426 static section *
20427 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
20428 unsigned HOST_WIDE_INT align)
20429 {
20430 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
20431 return toc_section;
20432 else
20433 return default_elf_select_rtx_section (mode, x, align);
20434 }
20435 \f
20436 /* For a SYMBOL_REF, set generic flags and then perform some
20437 target-specific processing.
20438
20439 When the AIX ABI is requested on a non-AIX system, replace the
20440 function name with the real name (with a leading .) rather than the
20441 function descriptor name. This saves a lot of overriding code to
20442 read the prefixes. */
20443
20444 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
20445 static void
20446 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
20447 {
20448 default_encode_section_info (decl, rtl, first);
20449
20450 if (first
20451 && TREE_CODE (decl) == FUNCTION_DECL
20452 && !TARGET_AIX
20453 && DEFAULT_ABI == ABI_AIX)
20454 {
20455 rtx sym_ref = XEXP (rtl, 0);
20456 size_t len = strlen (XSTR (sym_ref, 0));
20457 char *str = XALLOCAVEC (char, len + 2);
20458 str[0] = '.';
20459 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
20460 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
20461 }
20462 }
20463
20464 static inline bool
20465 compare_section_name (const char *section, const char *templ)
20466 {
20467 int len;
20468
20469 len = strlen (templ);
20470 return (strncmp (section, templ, len) == 0
20471 && (section[len] == 0 || section[len] == '.'));
20472 }
20473
20474 bool
20475 rs6000_elf_in_small_data_p (const_tree decl)
20476 {
20477 if (rs6000_sdata == SDATA_NONE)
20478 return false;
20479
20480 /* We want to merge strings, so we never consider them small data. */
20481 if (TREE_CODE (decl) == STRING_CST)
20482 return false;
20483
20484 /* Functions are never in the small data area. */
20485 if (TREE_CODE (decl) == FUNCTION_DECL)
20486 return false;
20487
20488 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
20489 {
20490 const char *section = DECL_SECTION_NAME (decl);
20491 if (compare_section_name (section, ".sdata")
20492 || compare_section_name (section, ".sdata2")
20493 || compare_section_name (section, ".gnu.linkonce.s")
20494 || compare_section_name (section, ".sbss")
20495 || compare_section_name (section, ".sbss2")
20496 || compare_section_name (section, ".gnu.linkonce.sb")
20497 || strcmp (section, ".PPC.EMB.sdata0") == 0
20498 || strcmp (section, ".PPC.EMB.sbss0") == 0)
20499 return true;
20500 }
20501 else
20502 {
20503 /* If we are told not to put readonly data in sdata, then don't. */
20504 if (TREE_READONLY (decl) && rs6000_sdata != SDATA_EABI
20505 && !rs6000_readonly_in_sdata)
20506 return false;
20507
20508 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
20509
20510 if (size > 0
20511 && size <= g_switch_value
20512 /* If it's not public, and we're not going to reference it there,
20513 there's no need to put it in the small data section. */
20514 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
20515 return true;
20516 }
20517
20518 return false;
20519 }
20520
20521 #endif /* USING_ELFOS_H */
20522 \f
20523 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
20524
20525 static bool
20526 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
20527 {
20528 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
20529 }
20530
20531 /* Do not place thread-local symbols refs in the object blocks. */
20532
20533 static bool
20534 rs6000_use_blocks_for_decl_p (const_tree decl)
20535 {
20536 return !DECL_THREAD_LOCAL_P (decl);
20537 }
20538 \f
20539 /* Return a REG that occurs in ADDR with coefficient 1.
20540 ADDR can be effectively incremented by incrementing REG.
20541
20542 r0 is special and we must not select it as an address
20543 register by this routine since our caller will try to
20544 increment the returned register via an "la" instruction. */
20545
20546 rtx
20547 find_addr_reg (rtx addr)
20548 {
20549 while (GET_CODE (addr) == PLUS)
20550 {
20551 if (REG_P (XEXP (addr, 0))
20552 && REGNO (XEXP (addr, 0)) != 0)
20553 addr = XEXP (addr, 0);
20554 else if (REG_P (XEXP (addr, 1))
20555 && REGNO (XEXP (addr, 1)) != 0)
20556 addr = XEXP (addr, 1);
20557 else if (CONSTANT_P (XEXP (addr, 0)))
20558 addr = XEXP (addr, 1);
20559 else if (CONSTANT_P (XEXP (addr, 1)))
20560 addr = XEXP (addr, 0);
20561 else
20562 gcc_unreachable ();
20563 }
20564 gcc_assert (REG_P (addr) && REGNO (addr) != 0);
20565 return addr;
20566 }
20567
20568 void
20569 rs6000_fatal_bad_address (rtx op)
20570 {
20571 fatal_insn ("bad address", op);
20572 }
20573
20574 #if TARGET_MACHO
20575
20576 vec<branch_island, va_gc> *branch_islands;
20577
20578 /* Remember to generate a branch island for far calls to the given
20579 function. */
20580
20581 static void
20582 add_compiler_branch_island (tree label_name, tree function_name,
20583 int line_number)
20584 {
20585 branch_island bi = {function_name, label_name, line_number};
20586 vec_safe_push (branch_islands, bi);
20587 }
20588
20589 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
20590 already there or not. */
20591
20592 static int
20593 no_previous_def (tree function_name)
20594 {
20595 branch_island *bi;
20596 unsigned ix;
20597
20598 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
20599 if (function_name == bi->function_name)
20600 return 0;
20601 return 1;
20602 }
20603
20604 /* GET_PREV_LABEL gets the label name from the previous definition of
20605 the function. */
20606
20607 static tree
20608 get_prev_label (tree function_name)
20609 {
20610 branch_island *bi;
20611 unsigned ix;
20612
20613 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
20614 if (function_name == bi->function_name)
20615 return bi->label_name;
20616 return NULL_TREE;
20617 }
20618
20619 /* Generate external symbol indirection stubs (PIC and non-PIC). */
20620
20621 void
20622 machopic_output_stub (FILE *file, const char *symb, const char *stub)
20623 {
20624 unsigned int length;
20625 char *symbol_name, *lazy_ptr_name;
20626 char *local_label_0;
20627 static unsigned label = 0;
20628
20629 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
20630 symb = (*targetm.strip_name_encoding) (symb);
20631
20632 length = strlen (symb);
20633 symbol_name = XALLOCAVEC (char, length + 32);
20634 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
20635
20636 lazy_ptr_name = XALLOCAVEC (char, length + 32);
20637 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
20638
20639 if (MACHOPIC_PURE)
20640 {
20641 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
20642 fprintf (file, "\t.align 5\n");
20643
20644 fprintf (file, "%s:\n", stub);
20645 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20646
20647 label++;
20648 local_label_0 = XALLOCAVEC (char, 16);
20649 sprintf (local_label_0, "L%u$spb", label);
20650
20651 fprintf (file, "\tmflr r0\n");
20652 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
20653 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
20654 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
20655 lazy_ptr_name, local_label_0);
20656 fprintf (file, "\tmtlr r0\n");
20657 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
20658 (TARGET_64BIT ? "ldu" : "lwzu"),
20659 lazy_ptr_name, local_label_0);
20660 fprintf (file, "\tmtctr r12\n");
20661 fprintf (file, "\tbctr\n");
20662 }
20663 else /* mdynamic-no-pic or mkernel. */
20664 {
20665 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
20666 fprintf (file, "\t.align 4\n");
20667
20668 fprintf (file, "%s:\n", stub);
20669 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20670
20671 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
20672 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
20673 (TARGET_64BIT ? "ldu" : "lwzu"),
20674 lazy_ptr_name);
20675 fprintf (file, "\tmtctr r12\n");
20676 fprintf (file, "\tbctr\n");
20677 }
20678
20679 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
20680 fprintf (file, "%s:\n", lazy_ptr_name);
20681 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20682 fprintf (file, "%sdyld_stub_binding_helper\n",
20683 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
20684 }
20685
20686 /* Legitimize PIC addresses. If the address is already
20687 position-independent, we return ORIG. Newly generated
20688 position-independent addresses go into a reg. This is REG if non
20689 zero, otherwise we allocate register(s) as necessary. */
20690
20691 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
20692
20693 rtx
20694 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
20695 rtx reg)
20696 {
20697 rtx base, offset;
20698
20699 if (reg == NULL && !reload_completed)
20700 reg = gen_reg_rtx (Pmode);
20701
20702 if (GET_CODE (orig) == CONST)
20703 {
20704 rtx reg_temp;
20705
20706 if (GET_CODE (XEXP (orig, 0)) == PLUS
20707 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
20708 return orig;
20709
20710 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
20711
20712 /* Use a different reg for the intermediate value, as
20713 it will be marked UNCHANGING. */
20714 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
20715 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
20716 Pmode, reg_temp);
20717 offset =
20718 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
20719 Pmode, reg);
20720
20721 if (CONST_INT_P (offset))
20722 {
20723 if (SMALL_INT (offset))
20724 return plus_constant (Pmode, base, INTVAL (offset));
20725 else if (!reload_completed)
20726 offset = force_reg (Pmode, offset);
20727 else
20728 {
20729 rtx mem = force_const_mem (Pmode, orig);
20730 return machopic_legitimize_pic_address (mem, Pmode, reg);
20731 }
20732 }
20733 return gen_rtx_PLUS (Pmode, base, offset);
20734 }
20735
20736 /* Fall back on generic machopic code. */
20737 return machopic_legitimize_pic_address (orig, mode, reg);
20738 }
20739
20740 /* Output a .machine directive for the Darwin assembler, and call
20741 the generic start_file routine. */
20742
20743 static void
20744 rs6000_darwin_file_start (void)
20745 {
20746 static const struct
20747 {
20748 const char *arg;
20749 const char *name;
20750 HOST_WIDE_INT if_set;
20751 } mapping[] = {
20752 { "ppc64", "ppc64", MASK_64BIT },
20753 { "970", "ppc970", OPTION_MASK_PPC_GPOPT | OPTION_MASK_MFCRF \
20754 | MASK_POWERPC64 },
20755 { "power4", "ppc970", 0 },
20756 { "G5", "ppc970", 0 },
20757 { "7450", "ppc7450", 0 },
20758 { "7400", "ppc7400", OPTION_MASK_ALTIVEC },
20759 { "G4", "ppc7400", 0 },
20760 { "750", "ppc750", 0 },
20761 { "740", "ppc750", 0 },
20762 { "G3", "ppc750", 0 },
20763 { "604e", "ppc604e", 0 },
20764 { "604", "ppc604", 0 },
20765 { "603e", "ppc603", 0 },
20766 { "603", "ppc603", 0 },
20767 { "601", "ppc601", 0 },
20768 { NULL, "ppc", 0 } };
20769 const char *cpu_id = "";
20770 size_t i;
20771
20772 rs6000_file_start ();
20773 darwin_file_start ();
20774
20775 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
20776
20777 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
20778 cpu_id = rs6000_default_cpu;
20779
20780 if (OPTION_SET_P (rs6000_cpu_index))
20781 cpu_id = processor_target_table[rs6000_cpu_index].name;
20782
20783 /* Look through the mapping array. Pick the first name that either
20784 matches the argument, has a bit set in IF_SET that is also set
20785 in the target flags, or has a NULL name. */
20786
20787 i = 0;
20788 while (mapping[i].arg != NULL
20789 && strcmp (mapping[i].arg, cpu_id) != 0
20790 && (mapping[i].if_set & rs6000_isa_flags) == 0)
20791 i++;
20792
20793 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
20794 }
20795
20796 #endif /* TARGET_MACHO */
20797
20798 #if TARGET_ELF
20799 static int
20800 rs6000_elf_reloc_rw_mask (void)
20801 {
20802 if (flag_pic)
20803 return 3;
20804 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
20805 return 2;
20806 else
20807 return 0;
20808 }
20809
20810 /* Record an element in the table of global constructors. SYMBOL is
20811 a SYMBOL_REF of the function to be called; PRIORITY is a number
20812 between 0 and MAX_INIT_PRIORITY.
20813
20814 This differs from default_named_section_asm_out_constructor in
20815 that we have special handling for -mrelocatable. */
20816
20817 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
20818 static void
20819 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
20820 {
20821 const char *section = ".ctors";
20822 char buf[18];
20823
20824 if (priority != DEFAULT_INIT_PRIORITY)
20825 {
20826 sprintf (buf, ".ctors.%.5u",
20827 /* Invert the numbering so the linker puts us in the proper
20828 order; constructors are run from right to left, and the
20829 linker sorts in increasing order. */
20830 MAX_INIT_PRIORITY - priority);
20831 section = buf;
20832 }
20833
20834 switch_to_section (get_section (section, SECTION_WRITE, NULL));
20835 assemble_align (POINTER_SIZE);
20836
20837 if (DEFAULT_ABI == ABI_V4
20838 && (TARGET_RELOCATABLE || flag_pic > 1))
20839 {
20840 fputs ("\t.long (", asm_out_file);
20841 output_addr_const (asm_out_file, symbol);
20842 fputs (")@fixup\n", asm_out_file);
20843 }
20844 else
20845 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
20846 }
20847
20848 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
20849 static void
20850 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
20851 {
20852 const char *section = ".dtors";
20853 char buf[18];
20854
20855 if (priority != DEFAULT_INIT_PRIORITY)
20856 {
20857 sprintf (buf, ".dtors.%.5u",
20858 /* Invert the numbering so the linker puts us in the proper
20859 order; constructors are run from right to left, and the
20860 linker sorts in increasing order. */
20861 MAX_INIT_PRIORITY - priority);
20862 section = buf;
20863 }
20864
20865 switch_to_section (get_section (section, SECTION_WRITE, NULL));
20866 assemble_align (POINTER_SIZE);
20867
20868 if (DEFAULT_ABI == ABI_V4
20869 && (TARGET_RELOCATABLE || flag_pic > 1))
20870 {
20871 fputs ("\t.long (", asm_out_file);
20872 output_addr_const (asm_out_file, symbol);
20873 fputs (")@fixup\n", asm_out_file);
20874 }
20875 else
20876 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
20877 }
20878
20879 void
20880 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
20881 {
20882 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
20883 {
20884 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
20885 ASM_OUTPUT_LABEL (file, name);
20886 fputs (DOUBLE_INT_ASM_OP, file);
20887 rs6000_output_function_entry (file, name);
20888 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
20889 if (DOT_SYMBOLS)
20890 {
20891 fputs ("\t.size\t", file);
20892 assemble_name (file, name);
20893 fputs (",24\n\t.type\t.", file);
20894 assemble_name (file, name);
20895 fputs (",@function\n", file);
20896 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
20897 {
20898 fputs ("\t.globl\t.", file);
20899 assemble_name (file, name);
20900 putc ('\n', file);
20901 }
20902 }
20903 else
20904 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
20905 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
20906 rs6000_output_function_entry (file, name);
20907 fputs (":\n", file);
20908 return;
20909 }
20910
20911 int uses_toc;
20912 if (DEFAULT_ABI == ABI_V4
20913 && (TARGET_RELOCATABLE || flag_pic > 1)
20914 && !TARGET_SECURE_PLT
20915 && (!constant_pool_empty_p () || crtl->profile)
20916 && (uses_toc = uses_TOC ()))
20917 {
20918 char buf[256];
20919
20920 if (uses_toc == 2)
20921 switch_to_other_text_partition ();
20922 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
20923
20924 fprintf (file, "\t.long ");
20925 assemble_name (file, toc_label_name);
20926 need_toc_init = 1;
20927 putc ('-', file);
20928 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
20929 assemble_name (file, buf);
20930 putc ('\n', file);
20931 if (uses_toc == 2)
20932 switch_to_other_text_partition ();
20933 }
20934
20935 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
20936 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
20937
20938 if (TARGET_CMODEL == CMODEL_LARGE
20939 && rs6000_global_entry_point_prologue_needed_p ())
20940 {
20941 char buf[256];
20942
20943 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
20944
20945 fprintf (file, "\t.quad .TOC.-");
20946 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
20947 assemble_name (file, buf);
20948 putc ('\n', file);
20949 }
20950
20951 if (DEFAULT_ABI == ABI_AIX)
20952 {
20953 const char *desc_name, *orig_name;
20954
20955 orig_name = (*targetm.strip_name_encoding) (name);
20956 desc_name = orig_name;
20957 while (*desc_name == '.')
20958 desc_name++;
20959
20960 if (TREE_PUBLIC (decl))
20961 fprintf (file, "\t.globl %s\n", desc_name);
20962
20963 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
20964 fprintf (file, "%s:\n", desc_name);
20965 fprintf (file, "\t.long %s\n", orig_name);
20966 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
20967 fputs ("\t.long 0\n", file);
20968 fprintf (file, "\t.previous\n");
20969 }
20970 ASM_OUTPUT_LABEL (file, name);
20971 }
20972
20973 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
20974 static void
20975 rs6000_elf_file_end (void)
20976 {
20977 #ifdef HAVE_AS_GNU_ATTRIBUTE
20978 /* ??? The value emitted depends on options active at file end.
20979 Assume anyone using #pragma or attributes that might change
20980 options knows what they are doing. */
20981 if ((TARGET_64BIT || DEFAULT_ABI == ABI_V4)
20982 && rs6000_passes_float)
20983 {
20984 int fp;
20985
20986 if (TARGET_HARD_FLOAT)
20987 fp = 1;
20988 else
20989 fp = 2;
20990 if (rs6000_passes_long_double)
20991 {
20992 if (!TARGET_LONG_DOUBLE_128)
20993 fp |= 2 * 4;
20994 else if (TARGET_IEEEQUAD)
20995 fp |= 3 * 4;
20996 else
20997 fp |= 1 * 4;
20998 }
20999 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", fp);
21000 }
21001 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
21002 {
21003 if (rs6000_passes_vector)
21004 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
21005 (TARGET_ALTIVEC_ABI ? 2 : 1));
21006 if (rs6000_returns_struct)
21007 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
21008 aix_struct_return ? 2 : 1);
21009 }
21010 #endif
21011 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
21012 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
21013 file_end_indicate_exec_stack ();
21014 #endif
21015
21016 if (flag_split_stack)
21017 file_end_indicate_split_stack ();
21018
21019 if (cpu_builtin_p)
21020 {
21021 /* We have expanded a CPU builtin, so we need to emit a reference to
21022 the special symbol that LIBC uses to declare it supports the
21023 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
21024 switch_to_section (data_section);
21025 fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3);
21026 fprintf (asm_out_file, "\t%s %s\n",
21027 TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol);
21028 }
21029 }
21030 #endif
21031
21032 #if TARGET_XCOFF
21033
21034 #ifndef HAVE_XCOFF_DWARF_EXTRAS
21035 #define HAVE_XCOFF_DWARF_EXTRAS 0
21036 #endif
21037
21038 static enum unwind_info_type
21039 rs6000_xcoff_debug_unwind_info (void)
21040 {
21041 return UI_NONE;
21042 }
21043
21044 static void
21045 rs6000_xcoff_asm_output_anchor (rtx symbol)
21046 {
21047 char buffer[100];
21048
21049 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
21050 SYMBOL_REF_BLOCK_OFFSET (symbol));
21051 fprintf (asm_out_file, "%s", SET_ASM_OP);
21052 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
21053 fprintf (asm_out_file, ",");
21054 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
21055 fprintf (asm_out_file, "\n");
21056 }
21057
21058 static void
21059 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
21060 {
21061 fputs (GLOBAL_ASM_OP, stream);
21062 RS6000_OUTPUT_BASENAME (stream, name);
21063 putc ('\n', stream);
21064 }
21065
21066 /* A get_unnamed_decl callback, used for read-only sections. PTR
21067 points to the section string variable. */
21068
21069 static void
21070 rs6000_xcoff_output_readonly_section_asm_op (const char *directive)
21071 {
21072 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
21073 directive
21074 ? xcoff_private_rodata_section_name
21075 : xcoff_read_only_section_name,
21076 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
21077 }
21078
21079 /* Likewise for read-write sections. */
21080
21081 static void
21082 rs6000_xcoff_output_readwrite_section_asm_op (const char *)
21083 {
21084 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
21085 xcoff_private_data_section_name,
21086 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
21087 }
21088
21089 static void
21090 rs6000_xcoff_output_tls_section_asm_op (const char *directive)
21091 {
21092 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
21093 directive
21094 ? xcoff_private_data_section_name
21095 : xcoff_tls_data_section_name,
21096 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
21097 }
21098
21099 /* A get_unnamed_section callback, used for switching to toc_section. */
21100
21101 static void
21102 rs6000_xcoff_output_toc_section_asm_op (const char *data ATTRIBUTE_UNUSED)
21103 {
21104 if (TARGET_MINIMAL_TOC)
21105 {
21106 /* toc_section is always selected at least once from
21107 rs6000_xcoff_file_start, so this is guaranteed to
21108 always be defined once and only once in each file. */
21109 if (!toc_initialized)
21110 {
21111 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
21112 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
21113 toc_initialized = 1;
21114 }
21115 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
21116 (TARGET_32BIT ? "" : ",3"));
21117 }
21118 else
21119 fputs ("\t.toc\n", asm_out_file);
21120 }
21121
21122 /* Implement TARGET_ASM_INIT_SECTIONS. */
21123
21124 static void
21125 rs6000_xcoff_asm_init_sections (void)
21126 {
21127 read_only_data_section
21128 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
21129 NULL);
21130
21131 private_data_section
21132 = get_unnamed_section (SECTION_WRITE,
21133 rs6000_xcoff_output_readwrite_section_asm_op,
21134 NULL);
21135
21136 read_only_private_data_section
21137 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
21138 "");
21139
21140 tls_data_section
21141 = get_unnamed_section (SECTION_TLS,
21142 rs6000_xcoff_output_tls_section_asm_op,
21143 NULL);
21144
21145 tls_private_data_section
21146 = get_unnamed_section (SECTION_TLS,
21147 rs6000_xcoff_output_tls_section_asm_op,
21148 "");
21149
21150 toc_section
21151 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
21152
21153 readonly_data_section = read_only_data_section;
21154 }
21155
21156 static int
21157 rs6000_xcoff_reloc_rw_mask (void)
21158 {
21159 return 3;
21160 }
21161
21162 static void
21163 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
21164 tree decl ATTRIBUTE_UNUSED)
21165 {
21166 int smclass;
21167 static const char * const suffix[7]
21168 = { "PR", "RO", "RW", "BS", "TL", "UL", "XO" };
21169
21170 if (flags & SECTION_EXCLUDE)
21171 smclass = 6;
21172 else if (flags & SECTION_DEBUG)
21173 {
21174 fprintf (asm_out_file, "\t.dwsect %s\n", name);
21175 return;
21176 }
21177 else if (flags & SECTION_CODE)
21178 smclass = 0;
21179 else if (flags & SECTION_TLS)
21180 {
21181 if (flags & SECTION_BSS)
21182 smclass = 5;
21183 else
21184 smclass = 4;
21185 }
21186 else if (flags & SECTION_WRITE)
21187 {
21188 if (flags & SECTION_BSS)
21189 smclass = 3;
21190 else
21191 smclass = 2;
21192 }
21193 else
21194 smclass = 1;
21195
21196 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
21197 (flags & SECTION_CODE) ? "." : "",
21198 name, suffix[smclass], flags & SECTION_ENTSIZE);
21199 }
21200
21201 #define IN_NAMED_SECTION(DECL) \
21202 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
21203 && DECL_SECTION_NAME (DECL) != NULL)
21204
21205 static section *
21206 rs6000_xcoff_select_section (tree decl, int reloc,
21207 unsigned HOST_WIDE_INT align)
21208 {
21209 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
21210 named section. */
21211 if (align > BIGGEST_ALIGNMENT && VAR_OR_FUNCTION_DECL_P (decl))
21212 {
21213 resolve_unique_section (decl, reloc, true);
21214 if (IN_NAMED_SECTION (decl))
21215 return get_named_section (decl, NULL, reloc);
21216 }
21217
21218 if (decl_readonly_section (decl, reloc))
21219 {
21220 if (TREE_PUBLIC (decl))
21221 return read_only_data_section;
21222 else
21223 return read_only_private_data_section;
21224 }
21225 else
21226 {
21227 #if HAVE_AS_TLS
21228 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
21229 {
21230 if (bss_initializer_p (decl))
21231 return tls_comm_section;
21232 else if (TREE_PUBLIC (decl))
21233 return tls_data_section;
21234 else
21235 return tls_private_data_section;
21236 }
21237 else
21238 #endif
21239 if (TREE_PUBLIC (decl))
21240 return data_section;
21241 else
21242 return private_data_section;
21243 }
21244 }
21245
21246 static void
21247 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
21248 {
21249 const char *name;
21250
21251 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
21252 name = (*targetm.strip_name_encoding) (name);
21253 set_decl_section_name (decl, name);
21254 }
21255
21256 /* Select section for constant in constant pool.
21257
21258 On RS/6000, all constants are in the private read-only data area.
21259 However, if this is being placed in the TOC it must be output as a
21260 toc entry. */
21261
21262 static section *
21263 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
21264 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
21265 {
21266 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
21267 return toc_section;
21268 else
21269 return read_only_private_data_section;
21270 }
21271
21272 /* Remove any trailing [DS] or the like from the symbol name. */
21273
21274 static const char *
21275 rs6000_xcoff_strip_name_encoding (const char *name)
21276 {
21277 size_t len;
21278 if (*name == '*')
21279 name++;
21280 len = strlen (name);
21281 if (name[len - 1] == ']')
21282 return ggc_alloc_string (name, len - 4);
21283 else
21284 return name;
21285 }
21286
21287 /* Section attributes. AIX is always PIC. */
21288
21289 static unsigned int
21290 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
21291 {
21292 unsigned int align;
21293 unsigned int flags = default_section_type_flags (decl, name, reloc);
21294
21295 if (decl && DECL_P (decl) && VAR_P (decl) && bss_initializer_p (decl))
21296 flags |= SECTION_BSS;
21297
21298 /* Align to at least UNIT size. */
21299 if (!decl || !DECL_P (decl))
21300 align = MIN_UNITS_PER_WORD;
21301 /* Align code CSECT to at least 32 bytes. */
21302 else if ((flags & SECTION_CODE) != 0)
21303 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT), 32);
21304 else
21305 /* Increase alignment of large objects if not already stricter. */
21306 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
21307 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
21308 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
21309
21310 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
21311 }
21312
21313 /* Output at beginning of assembler file.
21314
21315 Initialize the section names for the RS/6000 at this point.
21316
21317 Specify filename, including full path, to assembler.
21318
21319 We want to go into the TOC section so at least one .toc will be emitted.
21320 Also, in order to output proper .bs/.es pairs, we need at least one static
21321 [RW] section emitted.
21322
21323 Finally, declare mcount when profiling to make the assembler happy. */
21324
21325 static void
21326 rs6000_xcoff_file_start (void)
21327 {
21328 rs6000_gen_section_name (&xcoff_bss_section_name,
21329 main_input_filename, ".bss_");
21330 rs6000_gen_section_name (&xcoff_private_data_section_name,
21331 main_input_filename, ".rw_");
21332 rs6000_gen_section_name (&xcoff_private_rodata_section_name,
21333 main_input_filename, ".rop_");
21334 rs6000_gen_section_name (&xcoff_read_only_section_name,
21335 main_input_filename, ".ro_");
21336 rs6000_gen_section_name (&xcoff_tls_data_section_name,
21337 main_input_filename, ".tls_");
21338
21339 fputs ("\t.file\t", asm_out_file);
21340 output_quoted_string (asm_out_file, main_input_filename);
21341 fputc ('\n', asm_out_file);
21342 if (write_symbols != NO_DEBUG)
21343 switch_to_section (private_data_section);
21344 switch_to_section (toc_section);
21345 switch_to_section (text_section);
21346 if (profile_flag)
21347 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
21348 rs6000_file_start ();
21349 }
21350
21351 /* Output at end of assembler file.
21352 On the RS/6000, referencing data should automatically pull in text. */
21353
21354 static void
21355 rs6000_xcoff_file_end (void)
21356 {
21357 switch_to_section (text_section);
21358 if (xcoff_tls_exec_model_detected)
21359 {
21360 /* Add a .ref to __tls_get_addr to force libpthread dependency. */
21361 fputs ("\t.extern __tls_get_addr\n\t.ref __tls_get_addr\n", asm_out_file);
21362 }
21363 fputs ("_section_.text:\n", asm_out_file);
21364 switch_to_section (data_section);
21365 fputs (TARGET_32BIT
21366 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
21367 asm_out_file);
21368
21369 }
21370
21371 struct declare_alias_data
21372 {
21373 FILE *file;
21374 bool function_descriptor;
21375 };
21376
21377 /* Declare alias N. A helper function for for_node_and_aliases. */
21378
21379 static bool
21380 rs6000_declare_alias (struct symtab_node *n, void *d)
21381 {
21382 struct declare_alias_data *data = (struct declare_alias_data *)d;
21383 /* Main symbol is output specially, because varasm machinery does part of
21384 the job for us - we do not need to declare .globl/lglobs and such. */
21385 if (!n->alias || n->weakref)
21386 return false;
21387
21388 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
21389 return false;
21390
21391 /* Prevent assemble_alias from trying to use .set pseudo operation
21392 that does not behave as expected by the middle-end. */
21393 TREE_ASM_WRITTEN (n->decl) = true;
21394
21395 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
21396 char *buffer = (char *) alloca (strlen (name) + 2);
21397 char *p;
21398 int dollar_inside = 0;
21399
21400 strcpy (buffer, name);
21401 p = strchr (buffer, '$');
21402 while (p) {
21403 *p = '_';
21404 dollar_inside++;
21405 p = strchr (p + 1, '$');
21406 }
21407 if (TREE_PUBLIC (n->decl))
21408 {
21409 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
21410 {
21411 if (dollar_inside) {
21412 if (data->function_descriptor)
21413 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
21414 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
21415 }
21416 if (data->function_descriptor)
21417 {
21418 fputs ("\t.globl .", data->file);
21419 RS6000_OUTPUT_BASENAME (data->file, buffer);
21420 putc ('\n', data->file);
21421 }
21422 fputs ("\t.globl ", data->file);
21423 assemble_name (data->file, buffer);
21424 putc ('\n', data->file);
21425 }
21426 #ifdef ASM_WEAKEN_DECL
21427 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
21428 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
21429 #endif
21430 }
21431 else
21432 {
21433 if (dollar_inside)
21434 {
21435 if (data->function_descriptor)
21436 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
21437 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
21438 }
21439 if (data->function_descriptor)
21440 {
21441 fputs ("\t.lglobl .", data->file);
21442 RS6000_OUTPUT_BASENAME (data->file, buffer);
21443 putc ('\n', data->file);
21444 }
21445 fputs ("\t.lglobl ", data->file);
21446 assemble_name (data->file, buffer);
21447 putc ('\n', data->file);
21448 }
21449 if (data->function_descriptor)
21450 putc ('.', data->file);
21451 ASM_OUTPUT_LABEL (data->file, buffer);
21452 return false;
21453 }
21454
21455
21456 #ifdef HAVE_GAS_HIDDEN
21457 /* Helper function to calculate visibility of a DECL
21458 and return the value as a const string. */
21459
21460 static const char *
21461 rs6000_xcoff_visibility (tree decl)
21462 {
21463 static const char * const visibility_types[] = {
21464 "", ",protected", ",hidden", ",internal"
21465 };
21466
21467 enum symbol_visibility vis = DECL_VISIBILITY (decl);
21468 return visibility_types[vis];
21469 }
21470 #endif
21471
21472
21473 /* This macro produces the initial definition of a function name.
21474 On the RS/6000, we need to place an extra '.' in the function name and
21475 output the function descriptor.
21476 Dollar signs are converted to underscores.
21477
21478 The csect for the function will have already been created when
21479 text_section was selected. We do have to go back to that csect, however.
21480
21481 The third and fourth parameters to the .function pseudo-op (16 and 044)
21482 are placeholders which no longer have any use.
21483
21484 Because AIX assembler's .set command has unexpected semantics, we output
21485 all aliases as alternative labels in front of the definition. */
21486
21487 void
21488 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
21489 {
21490 char *buffer = (char *) alloca (strlen (name) + 1);
21491 char *p;
21492 int dollar_inside = 0;
21493 struct declare_alias_data data = {file, false};
21494
21495 strcpy (buffer, name);
21496 p = strchr (buffer, '$');
21497 while (p) {
21498 *p = '_';
21499 dollar_inside++;
21500 p = strchr (p + 1, '$');
21501 }
21502 if (TREE_PUBLIC (decl))
21503 {
21504 if (!RS6000_WEAK || !DECL_WEAK (decl))
21505 {
21506 if (dollar_inside) {
21507 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
21508 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
21509 }
21510 fputs ("\t.globl .", file);
21511 RS6000_OUTPUT_BASENAME (file, buffer);
21512 #ifdef HAVE_GAS_HIDDEN
21513 fputs (rs6000_xcoff_visibility (decl), file);
21514 #endif
21515 putc ('\n', file);
21516 }
21517 }
21518 else
21519 {
21520 if (dollar_inside) {
21521 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
21522 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
21523 }
21524 fputs ("\t.lglobl .", file);
21525 RS6000_OUTPUT_BASENAME (file, buffer);
21526 putc ('\n', file);
21527 }
21528
21529 fputs ("\t.csect ", file);
21530 assemble_name (file, buffer);
21531 fputs (TARGET_32BIT ? "\n" : ",3\n", file);
21532
21533 ASM_OUTPUT_LABEL (file, buffer);
21534
21535 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
21536 &data, true);
21537 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
21538 RS6000_OUTPUT_BASENAME (file, buffer);
21539 fputs (", TOC[tc0], 0\n", file);
21540
21541 in_section = NULL;
21542 switch_to_section (function_section (decl));
21543 putc ('.', file);
21544 ASM_OUTPUT_LABEL (file, buffer);
21545
21546 data.function_descriptor = true;
21547 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
21548 &data, true);
21549 if (!DECL_IGNORED_P (decl))
21550 {
21551 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
21552 xcoffout_declare_function (file, decl, buffer);
21553 else if (dwarf_debuginfo_p ())
21554 {
21555 name = (*targetm.strip_name_encoding) (name);
21556 fprintf (file, "\t.function .%s,.%s,2,0\n", name, name);
21557 }
21558 }
21559 return;
21560 }
21561
21562
21563 /* Output assembly language to globalize a symbol from a DECL,
21564 possibly with visibility. */
21565
21566 void
21567 rs6000_xcoff_asm_globalize_decl_name (FILE *stream, tree decl)
21568 {
21569 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
21570 fputs (GLOBAL_ASM_OP, stream);
21571 assemble_name (stream, name);
21572 #ifdef HAVE_GAS_HIDDEN
21573 fputs (rs6000_xcoff_visibility (decl), stream);
21574 #endif
21575 putc ('\n', stream);
21576 }
21577
21578 /* Output assembly language to define a symbol as COMMON from a DECL,
21579 possibly with visibility. */
21580
21581 void
21582 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream,
21583 tree decl ATTRIBUTE_UNUSED,
21584 const char *name,
21585 unsigned HOST_WIDE_INT size,
21586 unsigned int align)
21587 {
21588 unsigned int align2 = 2;
21589
21590 if (align == 0)
21591 align = DATA_ABI_ALIGNMENT (TREE_TYPE (decl), DECL_ALIGN (decl));
21592
21593 if (align > 32)
21594 align2 = floor_log2 (align / BITS_PER_UNIT);
21595 else if (size > 4)
21596 align2 = 3;
21597
21598 if (! DECL_COMMON (decl))
21599 {
21600 /* Forget section. */
21601 in_section = NULL;
21602
21603 /* Globalize TLS BSS. */
21604 if (TREE_PUBLIC (decl) && DECL_THREAD_LOCAL_P (decl))
21605 {
21606 fputs (GLOBAL_ASM_OP, stream);
21607 assemble_name (stream, name);
21608 fputc ('\n', stream);
21609 }
21610
21611 /* Switch to section and skip space. */
21612 fputs ("\t.csect ", stream);
21613 assemble_name (stream, name);
21614 fprintf (stream, ",%u\n", align2);
21615 ASM_DECLARE_OBJECT_NAME (stream, name, decl);
21616 ASM_OUTPUT_SKIP (stream, size ? size : 1);
21617 return;
21618 }
21619
21620 if (TREE_PUBLIC (decl))
21621 {
21622 fprintf (stream,
21623 "\t.comm %s," HOST_WIDE_INT_PRINT_UNSIGNED ",%u" ,
21624 name, size, align2);
21625
21626 #ifdef HAVE_GAS_HIDDEN
21627 if (decl != NULL)
21628 fputs (rs6000_xcoff_visibility (decl), stream);
21629 #endif
21630 putc ('\n', stream);
21631 }
21632 else
21633 fprintf (stream,
21634 "\t.lcomm %s," HOST_WIDE_INT_PRINT_UNSIGNED ",%s,%u\n",
21635 (*targetm.strip_name_encoding) (name), size, name, align2);
21636 }
21637
21638 /* This macro produces the initial definition of a object (variable) name.
21639 Because AIX assembler's .set command has unexpected semantics, we output
21640 all aliases as alternative labels in front of the definition. */
21641
21642 void
21643 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
21644 {
21645 struct declare_alias_data data = {file, false};
21646 ASM_OUTPUT_LABEL (file, name);
21647 symtab_node::get_create (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
21648 &data, true);
21649 }
21650
21651 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
21652
21653 void
21654 rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label)
21655 {
21656 fputs (integer_asm_op (size, FALSE), file);
21657 assemble_name (file, label);
21658 fputs ("-$", file);
21659 }
21660
21661 /* Output a symbol offset relative to the dbase for the current object.
21662 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
21663 signed offsets.
21664
21665 __gcc_unwind_dbase is embedded in all executables/libraries through
21666 libgcc/config/rs6000/crtdbase.S. */
21667
21668 void
21669 rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label)
21670 {
21671 fputs (integer_asm_op (size, FALSE), file);
21672 assemble_name (file, label);
21673 fputs("-__gcc_unwind_dbase", file);
21674 }
21675
21676 #ifdef HAVE_AS_TLS
21677 static void
21678 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
21679 {
21680 rtx symbol;
21681 int flags;
21682 const char *symname;
21683
21684 default_encode_section_info (decl, rtl, first);
21685
21686 /* Careful not to prod global register variables. */
21687 if (!MEM_P (rtl))
21688 return;
21689 symbol = XEXP (rtl, 0);
21690 if (!SYMBOL_REF_P (symbol))
21691 return;
21692
21693 flags = SYMBOL_REF_FLAGS (symbol);
21694
21695 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
21696 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
21697
21698 SYMBOL_REF_FLAGS (symbol) = flags;
21699
21700 symname = XSTR (symbol, 0);
21701
21702 /* Append CSECT mapping class, unless the symbol already is qualified.
21703 Aliases are implemented as labels, so the symbol name should not add
21704 a mapping class. */
21705 if (decl
21706 && DECL_P (decl)
21707 && VAR_OR_FUNCTION_DECL_P (decl)
21708 && (symtab_node::get (decl) == NULL
21709 || symtab_node::get (decl)->alias == 0)
21710 && symname[strlen (symname) - 1] != ']')
21711 {
21712 const char *smclass = NULL;
21713
21714 if (TREE_CODE (decl) == FUNCTION_DECL)
21715 smclass = "[DS]";
21716 else if (DECL_THREAD_LOCAL_P (decl))
21717 {
21718 if (bss_initializer_p (decl))
21719 smclass = "[UL]";
21720 else if (flag_data_sections)
21721 smclass = "[TL]";
21722 }
21723 else if (DECL_EXTERNAL (decl))
21724 smclass = "[UA]";
21725 else if (bss_initializer_p (decl))
21726 smclass = "[BS]";
21727 else if (flag_data_sections)
21728 {
21729 /* This must exactly match the logic of select section. */
21730 if (decl_readonly_section (decl, compute_reloc_for_var (decl)))
21731 smclass = "[RO]";
21732 else
21733 smclass = "[RW]";
21734 }
21735
21736 if (smclass != NULL)
21737 {
21738 char *newname = XALLOCAVEC (char, strlen (symname) + 5);
21739
21740 strcpy (newname, symname);
21741 strcat (newname, smclass);
21742 XSTR (symbol, 0) = ggc_strdup (newname);
21743 }
21744 }
21745 }
21746 #endif /* HAVE_AS_TLS */
21747 #endif /* TARGET_XCOFF */
21748
21749 void
21750 rs6000_asm_weaken_decl (FILE *stream, tree decl,
21751 const char *name, const char *val)
21752 {
21753 fputs ("\t.weak\t", stream);
21754 assemble_name (stream, name);
21755 if (decl && TREE_CODE (decl) == FUNCTION_DECL
21756 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
21757 {
21758 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
21759 if (TARGET_XCOFF)
21760 fputs (rs6000_xcoff_visibility (decl), stream);
21761 #endif
21762 fputs ("\n\t.weak\t.", stream);
21763 RS6000_OUTPUT_BASENAME (stream, name);
21764 }
21765 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
21766 if (TARGET_XCOFF)
21767 fputs (rs6000_xcoff_visibility (decl), stream);
21768 #endif
21769 fputc ('\n', stream);
21770
21771 if (val)
21772 {
21773 #ifdef ASM_OUTPUT_DEF
21774 ASM_OUTPUT_DEF (stream, name, val);
21775 #endif
21776 if (decl && TREE_CODE (decl) == FUNCTION_DECL
21777 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
21778 {
21779 fputs ("\t.set\t.", stream);
21780 RS6000_OUTPUT_BASENAME (stream, name);
21781 fputs (",.", stream);
21782 RS6000_OUTPUT_BASENAME (stream, val);
21783 fputc ('\n', stream);
21784 }
21785 }
21786 }
21787
21788
21789 /* Return true if INSN should not be copied. */
21790
21791 static bool
21792 rs6000_cannot_copy_insn_p (rtx_insn *insn)
21793 {
21794 return recog_memoized (insn) >= 0
21795 && get_attr_cannot_copy (insn);
21796 }
21797
21798 /* Compute a (partial) cost for rtx X. Return true if the complete
21799 cost has been computed, and false if subexpressions should be
21800 scanned. In either case, *TOTAL contains the cost result. */
21801
21802 static bool
21803 rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
21804 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
21805 {
21806 int code = GET_CODE (x);
21807
21808 switch (code)
21809 {
21810 /* On the RS/6000, if it is valid in the insn, it is free. */
21811 case CONST_INT:
21812 if (((outer_code == SET
21813 || outer_code == PLUS
21814 || outer_code == MINUS)
21815 && (satisfies_constraint_I (x)
21816 || satisfies_constraint_L (x)))
21817 || (outer_code == AND
21818 && (satisfies_constraint_K (x)
21819 || (mode == SImode
21820 ? satisfies_constraint_L (x)
21821 : satisfies_constraint_J (x))))
21822 || ((outer_code == IOR || outer_code == XOR)
21823 && (satisfies_constraint_K (x)
21824 || (mode == SImode
21825 ? satisfies_constraint_L (x)
21826 : satisfies_constraint_J (x))))
21827 || outer_code == ASHIFT
21828 || outer_code == ASHIFTRT
21829 || outer_code == LSHIFTRT
21830 || outer_code == ROTATE
21831 || outer_code == ROTATERT
21832 || outer_code == ZERO_EXTRACT
21833 || (outer_code == MULT
21834 && satisfies_constraint_I (x))
21835 || ((outer_code == DIV || outer_code == UDIV
21836 || outer_code == MOD || outer_code == UMOD)
21837 && exact_log2 (INTVAL (x)) >= 0)
21838 || (outer_code == COMPARE
21839 && (satisfies_constraint_I (x)
21840 || satisfies_constraint_K (x)))
21841 || ((outer_code == EQ || outer_code == NE)
21842 && (satisfies_constraint_I (x)
21843 || satisfies_constraint_K (x)
21844 || (mode == SImode
21845 ? satisfies_constraint_L (x)
21846 : satisfies_constraint_J (x))))
21847 || (outer_code == GTU
21848 && satisfies_constraint_I (x))
21849 || (outer_code == LTU
21850 && satisfies_constraint_P (x)))
21851 {
21852 *total = 0;
21853 return true;
21854 }
21855 else if ((outer_code == PLUS
21856 && reg_or_add_cint_operand (x, mode))
21857 || (outer_code == MINUS
21858 && reg_or_sub_cint_operand (x, mode))
21859 || ((outer_code == SET
21860 || outer_code == IOR
21861 || outer_code == XOR)
21862 && (INTVAL (x)
21863 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
21864 {
21865 *total = COSTS_N_INSNS (1);
21866 return true;
21867 }
21868 /* FALLTHRU */
21869
21870 case CONST_DOUBLE:
21871 case CONST_WIDE_INT:
21872 case CONST:
21873 case HIGH:
21874 case SYMBOL_REF:
21875 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
21876 return true;
21877
21878 case MEM:
21879 /* When optimizing for size, MEM should be slightly more expensive
21880 than generating address, e.g., (plus (reg) (const)).
21881 L1 cache latency is about two instructions. */
21882 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
21883 if (rs6000_slow_unaligned_access (mode, MEM_ALIGN (x)))
21884 *total += COSTS_N_INSNS (100);
21885 return true;
21886
21887 case LABEL_REF:
21888 *total = 0;
21889 return true;
21890
21891 case PLUS:
21892 case MINUS:
21893 if (FLOAT_MODE_P (mode))
21894 *total = rs6000_cost->fp;
21895 else
21896 *total = COSTS_N_INSNS (1);
21897 return false;
21898
21899 case MULT:
21900 if (CONST_INT_P (XEXP (x, 1))
21901 && satisfies_constraint_I (XEXP (x, 1)))
21902 {
21903 if (INTVAL (XEXP (x, 1)) >= -256
21904 && INTVAL (XEXP (x, 1)) <= 255)
21905 *total = rs6000_cost->mulsi_const9;
21906 else
21907 *total = rs6000_cost->mulsi_const;
21908 }
21909 else if (mode == SFmode)
21910 *total = rs6000_cost->fp;
21911 else if (FLOAT_MODE_P (mode))
21912 *total = rs6000_cost->dmul;
21913 else if (mode == DImode)
21914 *total = rs6000_cost->muldi;
21915 else
21916 *total = rs6000_cost->mulsi;
21917 return false;
21918
21919 case FMA:
21920 if (mode == SFmode)
21921 *total = rs6000_cost->fp;
21922 else
21923 *total = rs6000_cost->dmul;
21924 break;
21925
21926 case DIV:
21927 case MOD:
21928 if (FLOAT_MODE_P (mode))
21929 {
21930 *total = mode == DFmode ? rs6000_cost->ddiv
21931 : rs6000_cost->sdiv;
21932 return false;
21933 }
21934 /* FALLTHRU */
21935
21936 case UDIV:
21937 case UMOD:
21938 if (CONST_INT_P (XEXP (x, 1))
21939 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
21940 {
21941 if (code == DIV || code == MOD)
21942 /* Shift, addze */
21943 *total = COSTS_N_INSNS (2);
21944 else
21945 /* Shift */
21946 *total = COSTS_N_INSNS (1);
21947 }
21948 else
21949 {
21950 if (GET_MODE (XEXP (x, 1)) == DImode)
21951 *total = rs6000_cost->divdi;
21952 else
21953 *total = rs6000_cost->divsi;
21954 }
21955 /* Add in shift and subtract for MOD unless we have a mod instruction. */
21956 if (!TARGET_MODULO && (code == MOD || code == UMOD))
21957 *total += COSTS_N_INSNS (2);
21958 return false;
21959
21960 case CTZ:
21961 *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4);
21962 return false;
21963
21964 case FFS:
21965 *total = COSTS_N_INSNS (4);
21966 return false;
21967
21968 case POPCOUNT:
21969 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
21970 return false;
21971
21972 case PARITY:
21973 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
21974 return false;
21975
21976 case NOT:
21977 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
21978 *total = 0;
21979 else
21980 *total = COSTS_N_INSNS (1);
21981 return false;
21982
21983 case AND:
21984 if (CONST_INT_P (XEXP (x, 1)))
21985 {
21986 rtx left = XEXP (x, 0);
21987 rtx_code left_code = GET_CODE (left);
21988
21989 /* rotate-and-mask: 1 insn. */
21990 if ((left_code == ROTATE
21991 || left_code == ASHIFT
21992 || left_code == LSHIFTRT)
21993 && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
21994 {
21995 *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
21996 if (!CONST_INT_P (XEXP (left, 1)))
21997 *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
21998 *total += COSTS_N_INSNS (1);
21999 return true;
22000 }
22001
22002 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
22003 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
22004 if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
22005 || (val & 0xffff) == val
22006 || (val & 0xffff0000) == val
22007 || ((val & 0xffff) == 0 && mode == SImode))
22008 {
22009 *total = rtx_cost (left, mode, AND, 0, speed);
22010 *total += COSTS_N_INSNS (1);
22011 return true;
22012 }
22013
22014 /* 2 insns. */
22015 if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
22016 {
22017 *total = rtx_cost (left, mode, AND, 0, speed);
22018 *total += COSTS_N_INSNS (2);
22019 return true;
22020 }
22021 }
22022
22023 *total = COSTS_N_INSNS (1);
22024 return false;
22025
22026 case IOR:
22027 /* FIXME */
22028 *total = COSTS_N_INSNS (1);
22029 return true;
22030
22031 case CLZ:
22032 case XOR:
22033 case ZERO_EXTRACT:
22034 *total = COSTS_N_INSNS (1);
22035 return false;
22036
22037 case ASHIFT:
22038 /* The EXTSWSLI instruction is a combined instruction. Don't count both
22039 the sign extend and shift separately within the insn. */
22040 if (TARGET_EXTSWSLI && mode == DImode
22041 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
22042 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
22043 {
22044 *total = 0;
22045 return false;
22046 }
22047 /* fall through */
22048
22049 case ASHIFTRT:
22050 case LSHIFTRT:
22051 case ROTATE:
22052 case ROTATERT:
22053 /* Handle mul_highpart. */
22054 if (outer_code == TRUNCATE
22055 && GET_CODE (XEXP (x, 0)) == MULT)
22056 {
22057 if (mode == DImode)
22058 *total = rs6000_cost->muldi;
22059 else
22060 *total = rs6000_cost->mulsi;
22061 return true;
22062 }
22063 else if (outer_code == AND)
22064 *total = 0;
22065 else
22066 *total = COSTS_N_INSNS (1);
22067 return false;
22068
22069 case SIGN_EXTEND:
22070 case ZERO_EXTEND:
22071 if (MEM_P (XEXP (x, 0)))
22072 *total = 0;
22073 else
22074 *total = COSTS_N_INSNS (1);
22075 return false;
22076
22077 case COMPARE:
22078 case NEG:
22079 case ABS:
22080 if (!FLOAT_MODE_P (mode))
22081 {
22082 *total = COSTS_N_INSNS (1);
22083 return false;
22084 }
22085 /* FALLTHRU */
22086
22087 case FLOAT:
22088 case UNSIGNED_FLOAT:
22089 case FIX:
22090 case UNSIGNED_FIX:
22091 case FLOAT_TRUNCATE:
22092 *total = rs6000_cost->fp;
22093 return false;
22094
22095 case FLOAT_EXTEND:
22096 if (mode == DFmode)
22097 *total = rs6000_cost->sfdf_convert;
22098 else
22099 *total = rs6000_cost->fp;
22100 return false;
22101
22102 case CALL:
22103 case IF_THEN_ELSE:
22104 if (!speed)
22105 {
22106 *total = COSTS_N_INSNS (1);
22107 return true;
22108 }
22109 else if (FLOAT_MODE_P (mode) && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT)
22110 {
22111 *total = rs6000_cost->fp;
22112 return false;
22113 }
22114 break;
22115
22116 case NE:
22117 case EQ:
22118 case GTU:
22119 case LTU:
22120 /* Carry bit requires mode == Pmode.
22121 NEG or PLUS already counted so only add one. */
22122 if (mode == Pmode
22123 && (outer_code == NEG || outer_code == PLUS))
22124 {
22125 *total = COSTS_N_INSNS (1);
22126 return true;
22127 }
22128 /* FALLTHRU */
22129
22130 case GT:
22131 case LT:
22132 case UNORDERED:
22133 if (outer_code == SET)
22134 {
22135 if (XEXP (x, 1) == const0_rtx)
22136 {
22137 *total = COSTS_N_INSNS (2);
22138 return true;
22139 }
22140 else
22141 {
22142 *total = COSTS_N_INSNS (3);
22143 return false;
22144 }
22145 }
22146 /* CC COMPARE. */
22147 if (outer_code == COMPARE)
22148 {
22149 *total = 0;
22150 return true;
22151 }
22152 break;
22153
22154 case UNSPEC:
22155 if (XINT (x, 1) == UNSPECV_MMA_XXSETACCZ)
22156 {
22157 *total = 0;
22158 return true;
22159 }
22160 break;
22161
22162 default:
22163 break;
22164 }
22165
22166 return false;
22167 }
22168
22169 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
22170
22171 static bool
22172 rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
22173 int opno, int *total, bool speed)
22174 {
22175 bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed);
22176
22177 fprintf (stderr,
22178 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
22179 "opno = %d, total = %d, speed = %s, x:\n",
22180 ret ? "complete" : "scan inner",
22181 GET_MODE_NAME (mode),
22182 GET_RTX_NAME (outer_code),
22183 opno,
22184 *total,
22185 speed ? "true" : "false");
22186
22187 debug_rtx (x);
22188
22189 return ret;
22190 }
22191
22192 static int
22193 rs6000_insn_cost (rtx_insn *insn, bool speed)
22194 {
22195 if (recog_memoized (insn) < 0)
22196 return 0;
22197
22198 /* If we are optimizing for size, just use the length. */
22199 if (!speed)
22200 return get_attr_length (insn);
22201
22202 /* Use the cost if provided. */
22203 int cost = get_attr_cost (insn);
22204 if (cost > 0)
22205 return cost;
22206
22207 /* If the insn tells us how many insns there are, use that. Otherwise use
22208 the length/4. Adjust the insn length to remove the extra size that
22209 prefixed instructions take. */
22210 int n = get_attr_num_insns (insn);
22211 if (n == 0)
22212 {
22213 int length = get_attr_length (insn);
22214 if (get_attr_prefixed (insn) == PREFIXED_YES)
22215 {
22216 int adjust = 0;
22217 ADJUST_INSN_LENGTH (insn, adjust);
22218 length -= adjust;
22219 }
22220
22221 n = length / 4;
22222 }
22223
22224 enum attr_type type = get_attr_type (insn);
22225
22226 switch (type)
22227 {
22228 case TYPE_LOAD:
22229 case TYPE_FPLOAD:
22230 case TYPE_VECLOAD:
22231 cost = COSTS_N_INSNS (n + 1);
22232 break;
22233
22234 case TYPE_MUL:
22235 switch (get_attr_size (insn))
22236 {
22237 case SIZE_8:
22238 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const9;
22239 break;
22240 case SIZE_16:
22241 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const;
22242 break;
22243 case SIZE_32:
22244 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi;
22245 break;
22246 case SIZE_64:
22247 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->muldi;
22248 break;
22249 default:
22250 gcc_unreachable ();
22251 }
22252 break;
22253 case TYPE_DIV:
22254 switch (get_attr_size (insn))
22255 {
22256 case SIZE_32:
22257 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divsi;
22258 break;
22259 case SIZE_64:
22260 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divdi;
22261 break;
22262 default:
22263 gcc_unreachable ();
22264 }
22265 break;
22266
22267 case TYPE_FP:
22268 cost = n * rs6000_cost->fp;
22269 break;
22270 case TYPE_DMUL:
22271 cost = n * rs6000_cost->dmul;
22272 break;
22273 case TYPE_SDIV:
22274 cost = n * rs6000_cost->sdiv;
22275 break;
22276 case TYPE_DDIV:
22277 cost = n * rs6000_cost->ddiv;
22278 break;
22279
22280 case TYPE_SYNC:
22281 case TYPE_LOAD_L:
22282 case TYPE_MFCR:
22283 case TYPE_MFCRF:
22284 cost = COSTS_N_INSNS (n + 2);
22285 break;
22286
22287 default:
22288 cost = COSTS_N_INSNS (n);
22289 }
22290
22291 return cost;
22292 }
22293
22294 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
22295
22296 static int
22297 rs6000_debug_address_cost (rtx x, machine_mode mode,
22298 addr_space_t as, bool speed)
22299 {
22300 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
22301
22302 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
22303 ret, speed ? "true" : "false");
22304 debug_rtx (x);
22305
22306 return ret;
22307 }
22308
22309
22310 /* A C expression returning the cost of moving data from a register of class
22311 CLASS1 to one of CLASS2. */
22312
22313 static int
22314 rs6000_register_move_cost (machine_mode mode,
22315 reg_class_t from, reg_class_t to)
22316 {
22317 int ret;
22318 reg_class_t rclass;
22319
22320 if (TARGET_DEBUG_COST)
22321 dbg_cost_ctrl++;
22322
22323 /* If we have VSX, we can easily move between FPR or Altivec registers,
22324 otherwise we can only easily move within classes.
22325 Do this first so we give best-case answers for union classes
22326 containing both gprs and vsx regs. */
22327 HARD_REG_SET to_vsx, from_vsx;
22328 to_vsx = reg_class_contents[to] & reg_class_contents[VSX_REGS];
22329 from_vsx = reg_class_contents[from] & reg_class_contents[VSX_REGS];
22330 if (!hard_reg_set_empty_p (to_vsx)
22331 && !hard_reg_set_empty_p (from_vsx)
22332 && (TARGET_VSX
22333 || hard_reg_set_intersect_p (to_vsx, from_vsx)))
22334 {
22335 int reg = FIRST_FPR_REGNO;
22336 if (TARGET_VSX
22337 || (TEST_HARD_REG_BIT (to_vsx, FIRST_ALTIVEC_REGNO)
22338 && TEST_HARD_REG_BIT (from_vsx, FIRST_ALTIVEC_REGNO)))
22339 reg = FIRST_ALTIVEC_REGNO;
22340 ret = 2 * hard_regno_nregs (reg, mode);
22341 }
22342
22343 /* Moves from/to GENERAL_REGS. */
22344 else if ((rclass = from, reg_classes_intersect_p (to, GENERAL_REGS))
22345 || (rclass = to, reg_classes_intersect_p (from, GENERAL_REGS)))
22346 {
22347 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
22348 {
22349 if (TARGET_DIRECT_MOVE)
22350 {
22351 /* Keep the cost for direct moves above that for within
22352 a register class even if the actual processor cost is
22353 comparable. We do this because a direct move insn
22354 can't be a nop, whereas with ideal register
22355 allocation a move within the same class might turn
22356 out to be a nop. */
22357 if (rs6000_tune == PROCESSOR_POWER9
22358 || rs6000_tune == PROCESSOR_POWER10)
22359 ret = 3 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22360 else
22361 ret = 4 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22362 /* SFmode requires a conversion when moving between gprs
22363 and vsx. */
22364 if (mode == SFmode)
22365 ret += 2;
22366 }
22367 else
22368 ret = (rs6000_memory_move_cost (mode, rclass, false)
22369 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
22370 }
22371
22372 /* It's more expensive to move CR_REGS than CR0_REGS because of the
22373 shift. */
22374 else if (rclass == CR_REGS)
22375 ret = 4;
22376
22377 /* For those processors that have slow LR/CTR moves, make them more
22378 expensive than memory in order to bias spills to memory .*/
22379 else if ((rs6000_tune == PROCESSOR_POWER6
22380 || rs6000_tune == PROCESSOR_POWER7
22381 || rs6000_tune == PROCESSOR_POWER8
22382 || rs6000_tune == PROCESSOR_POWER9)
22383 && reg_class_subset_p (rclass, SPECIAL_REGS))
22384 ret = 6 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22385
22386 else
22387 /* A move will cost one instruction per GPR moved. */
22388 ret = 2 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22389 }
22390
22391 /* Everything else has to go through GENERAL_REGS. */
22392 else
22393 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
22394 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
22395
22396 if (TARGET_DEBUG_COST)
22397 {
22398 if (dbg_cost_ctrl == 1)
22399 fprintf (stderr,
22400 "rs6000_register_move_cost: ret=%d, mode=%s, from=%s, to=%s\n",
22401 ret, GET_MODE_NAME (mode), reg_class_names[from],
22402 reg_class_names[to]);
22403 dbg_cost_ctrl--;
22404 }
22405
22406 return ret;
22407 }
22408
22409 /* A C expressions returning the cost of moving data of MODE from a register to
22410 or from memory. */
22411
22412 static int
22413 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
22414 bool in ATTRIBUTE_UNUSED)
22415 {
22416 int ret;
22417
22418 if (TARGET_DEBUG_COST)
22419 dbg_cost_ctrl++;
22420
22421 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
22422 ret = 4 * hard_regno_nregs (0, mode);
22423 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
22424 || reg_classes_intersect_p (rclass, VSX_REGS)))
22425 ret = 4 * hard_regno_nregs (32, mode);
22426 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
22427 ret = 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO, mode);
22428 else
22429 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
22430
22431 if (TARGET_DEBUG_COST)
22432 {
22433 if (dbg_cost_ctrl == 1)
22434 fprintf (stderr,
22435 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
22436 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
22437 dbg_cost_ctrl--;
22438 }
22439
22440 return ret;
22441 }
22442
22443 /* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
22444
22445 The register allocator chooses GEN_OR_VSX_REGS for the allocno
22446 class if GENERAL_REGS and VSX_REGS cost is lower than the memory
22447 cost. This happens a lot when TARGET_DIRECT_MOVE makes the register
22448 move cost between GENERAL_REGS and VSX_REGS low.
22449
22450 It might seem reasonable to use a union class. After all, if usage
22451 of vsr is low and gpr high, it might make sense to spill gpr to vsr
22452 rather than memory. However, in cases where register pressure of
22453 both is high, like the cactus_adm spec test, allowing
22454 GEN_OR_VSX_REGS as the allocno class results in bad decisions in
22455 the first scheduling pass. This is partly due to an allocno of
22456 GEN_OR_VSX_REGS wrongly contributing to the GENERAL_REGS pressure
22457 class, which gives too high a pressure for GENERAL_REGS and too low
22458 for VSX_REGS. So, force a choice of the subclass here.
22459
22460 The best class is also the union if GENERAL_REGS and VSX_REGS have
22461 the same cost. In that case we do use GEN_OR_VSX_REGS as the
22462 allocno class, since trying to narrow down the class by regno mode
22463 is prone to error. For example, SImode is allowed in VSX regs and
22464 in some cases (eg. gcc.target/powerpc/p9-xxbr-3.c do_bswap32_vect)
22465 it would be wrong to choose an allocno of GENERAL_REGS based on
22466 SImode. */
22467
22468 static reg_class_t
22469 rs6000_ira_change_pseudo_allocno_class (int regno ATTRIBUTE_UNUSED,
22470 reg_class_t allocno_class,
22471 reg_class_t best_class)
22472 {
22473 switch (allocno_class)
22474 {
22475 case GEN_OR_VSX_REGS:
22476 /* best_class must be a subset of allocno_class. */
22477 gcc_checking_assert (best_class == GEN_OR_VSX_REGS
22478 || best_class == GEN_OR_FLOAT_REGS
22479 || best_class == VSX_REGS
22480 || best_class == ALTIVEC_REGS
22481 || best_class == FLOAT_REGS
22482 || best_class == GENERAL_REGS
22483 || best_class == BASE_REGS);
22484 /* Use best_class but choose wider classes when copying from the
22485 wider class to best_class is cheap. This mimics IRA choice
22486 of allocno class. */
22487 if (best_class == BASE_REGS)
22488 return GENERAL_REGS;
22489 if (TARGET_VSX && best_class == FLOAT_REGS)
22490 return VSX_REGS;
22491 return best_class;
22492
22493 case VSX_REGS:
22494 if (best_class == ALTIVEC_REGS)
22495 return ALTIVEC_REGS;
22496
22497 default:
22498 break;
22499 }
22500
22501 return allocno_class;
22502 }
22503
22504 /* Load up a constant. If the mode is a vector mode, splat the value across
22505 all of the vector elements. */
22506
22507 static rtx
22508 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
22509 {
22510 rtx reg;
22511
22512 if (mode == SFmode || mode == DFmode)
22513 {
22514 rtx d = const_double_from_real_value (dconst, mode);
22515 reg = force_reg (mode, d);
22516 }
22517 else if (mode == V4SFmode)
22518 {
22519 rtx d = const_double_from_real_value (dconst, SFmode);
22520 rtvec v = gen_rtvec (4, d, d, d, d);
22521 reg = gen_reg_rtx (mode);
22522 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
22523 }
22524 else if (mode == V2DFmode)
22525 {
22526 rtx d = const_double_from_real_value (dconst, DFmode);
22527 rtvec v = gen_rtvec (2, d, d);
22528 reg = gen_reg_rtx (mode);
22529 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
22530 }
22531 else
22532 gcc_unreachable ();
22533
22534 return reg;
22535 }
22536
22537 /* Generate an FMA instruction. */
22538
22539 static void
22540 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
22541 {
22542 machine_mode mode = GET_MODE (target);
22543 rtx dst;
22544
22545 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
22546 gcc_assert (dst != NULL);
22547
22548 if (dst != target)
22549 emit_move_insn (target, dst);
22550 }
22551
22552 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
22553
22554 static void
22555 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
22556 {
22557 machine_mode mode = GET_MODE (dst);
22558 rtx r;
22559
22560 /* This is a tad more complicated, since the fnma_optab is for
22561 a different expression: fma(-m1, m2, a), which is the same
22562 thing except in the case of signed zeros.
22563
22564 Fortunately we know that if FMA is supported that FNMSUB is
22565 also supported in the ISA. Just expand it directly. */
22566
22567 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
22568
22569 r = gen_rtx_NEG (mode, a);
22570 r = gen_rtx_FMA (mode, m1, m2, r);
22571 r = gen_rtx_NEG (mode, r);
22572 emit_insn (gen_rtx_SET (dst, r));
22573 }
22574
22575 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
22576 add a reg_note saying that this was a division. Support both scalar and
22577 vector divide. Assumes no trapping math and finite arguments. */
22578
22579 void
22580 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
22581 {
22582 machine_mode mode = GET_MODE (dst);
22583 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
22584 int i;
22585
22586 /* Low precision estimates guarantee 5 bits of accuracy. High
22587 precision estimates guarantee 14 bits of accuracy. SFmode
22588 requires 23 bits of accuracy. DFmode requires 52 bits of
22589 accuracy. Each pass at least doubles the accuracy, leading
22590 to the following. */
22591 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
22592 if (mode == DFmode || mode == V2DFmode)
22593 passes++;
22594
22595 enum insn_code code = optab_handler (smul_optab, mode);
22596 insn_gen_fn gen_mul = GEN_FCN (code);
22597
22598 gcc_assert (code != CODE_FOR_nothing);
22599
22600 one = rs6000_load_constant_and_splat (mode, dconst1);
22601
22602 /* x0 = 1./d estimate */
22603 x0 = gen_reg_rtx (mode);
22604 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
22605 UNSPEC_FRES)));
22606
22607 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
22608 if (passes > 1) {
22609
22610 /* e0 = 1. - d * x0 */
22611 e0 = gen_reg_rtx (mode);
22612 rs6000_emit_nmsub (e0, d, x0, one);
22613
22614 /* x1 = x0 + e0 * x0 */
22615 x1 = gen_reg_rtx (mode);
22616 rs6000_emit_madd (x1, e0, x0, x0);
22617
22618 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
22619 ++i, xprev = xnext, eprev = enext) {
22620
22621 /* enext = eprev * eprev */
22622 enext = gen_reg_rtx (mode);
22623 emit_insn (gen_mul (enext, eprev, eprev));
22624
22625 /* xnext = xprev + enext * xprev */
22626 xnext = gen_reg_rtx (mode);
22627 rs6000_emit_madd (xnext, enext, xprev, xprev);
22628 }
22629
22630 } else
22631 xprev = x0;
22632
22633 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
22634
22635 /* u = n * xprev */
22636 u = gen_reg_rtx (mode);
22637 emit_insn (gen_mul (u, n, xprev));
22638
22639 /* v = n - (d * u) */
22640 v = gen_reg_rtx (mode);
22641 rs6000_emit_nmsub (v, d, u, n);
22642
22643 /* dst = (v * xprev) + u */
22644 rs6000_emit_madd (dst, v, xprev, u);
22645
22646 if (note_p)
22647 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
22648 }
22649
22650 /* Goldschmidt's Algorithm for single/double-precision floating point
22651 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
22652
22653 void
22654 rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
22655 {
22656 machine_mode mode = GET_MODE (src);
22657 rtx e = gen_reg_rtx (mode);
22658 rtx g = gen_reg_rtx (mode);
22659 rtx h = gen_reg_rtx (mode);
22660
22661 /* Low precision estimates guarantee 5 bits of accuracy. High
22662 precision estimates guarantee 14 bits of accuracy. SFmode
22663 requires 23 bits of accuracy. DFmode requires 52 bits of
22664 accuracy. Each pass at least doubles the accuracy, leading
22665 to the following. */
22666 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
22667 if (mode == DFmode || mode == V2DFmode)
22668 passes++;
22669
22670 int i;
22671 rtx mhalf;
22672 enum insn_code code = optab_handler (smul_optab, mode);
22673 insn_gen_fn gen_mul = GEN_FCN (code);
22674
22675 gcc_assert (code != CODE_FOR_nothing);
22676
22677 mhalf = rs6000_load_constant_and_splat (mode, dconsthalf);
22678
22679 /* e = rsqrt estimate */
22680 emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
22681 UNSPEC_RSQRT)));
22682
22683 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
22684 if (!recip)
22685 {
22686 rtx zero = force_reg (mode, CONST0_RTX (mode));
22687
22688 if (mode == SFmode)
22689 {
22690 rtx target = emit_conditional_move (e, { GT, src, zero, mode },
22691 e, zero, mode, 0);
22692 if (target != e)
22693 emit_move_insn (e, target);
22694 }
22695 else
22696 {
22697 rtx cond = gen_rtx_GT (VOIDmode, e, zero);
22698 rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero);
22699 }
22700 }
22701
22702 /* g = sqrt estimate. */
22703 emit_insn (gen_mul (g, e, src));
22704 /* h = 1/(2*sqrt) estimate. */
22705 emit_insn (gen_mul (h, e, mhalf));
22706
22707 if (recip)
22708 {
22709 if (passes == 1)
22710 {
22711 rtx t = gen_reg_rtx (mode);
22712 rs6000_emit_nmsub (t, g, h, mhalf);
22713 /* Apply correction directly to 1/rsqrt estimate. */
22714 rs6000_emit_madd (dst, e, t, e);
22715 }
22716 else
22717 {
22718 for (i = 0; i < passes; i++)
22719 {
22720 rtx t1 = gen_reg_rtx (mode);
22721 rtx g1 = gen_reg_rtx (mode);
22722 rtx h1 = gen_reg_rtx (mode);
22723
22724 rs6000_emit_nmsub (t1, g, h, mhalf);
22725 rs6000_emit_madd (g1, g, t1, g);
22726 rs6000_emit_madd (h1, h, t1, h);
22727
22728 g = g1;
22729 h = h1;
22730 }
22731 /* Multiply by 2 for 1/rsqrt. */
22732 emit_insn (gen_add3_insn (dst, h, h));
22733 }
22734 }
22735 else
22736 {
22737 rtx t = gen_reg_rtx (mode);
22738 rs6000_emit_nmsub (t, g, h, mhalf);
22739 rs6000_emit_madd (dst, g, t, g);
22740 }
22741
22742 return;
22743 }
22744
22745 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
22746 (Power7) targets. DST is the target, and SRC is the argument operand. */
22747
22748 void
22749 rs6000_emit_popcount (rtx dst, rtx src)
22750 {
22751 machine_mode mode = GET_MODE (dst);
22752 rtx tmp1, tmp2;
22753
22754 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
22755 if (TARGET_POPCNTD)
22756 {
22757 if (mode == SImode)
22758 emit_insn (gen_popcntdsi2 (dst, src));
22759 else
22760 emit_insn (gen_popcntddi2 (dst, src));
22761 return;
22762 }
22763
22764 tmp1 = gen_reg_rtx (mode);
22765
22766 if (mode == SImode)
22767 {
22768 emit_insn (gen_popcntbsi2 (tmp1, src));
22769 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
22770 NULL_RTX, 0);
22771 tmp2 = force_reg (SImode, tmp2);
22772 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
22773 }
22774 else
22775 {
22776 emit_insn (gen_popcntbdi2 (tmp1, src));
22777 tmp2 = expand_mult (DImode, tmp1,
22778 GEN_INT ((HOST_WIDE_INT)
22779 0x01010101 << 32 | 0x01010101),
22780 NULL_RTX, 0);
22781 tmp2 = force_reg (DImode, tmp2);
22782 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
22783 }
22784 }
22785
22786
22787 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
22788 target, and SRC is the argument operand. */
22789
22790 void
22791 rs6000_emit_parity (rtx dst, rtx src)
22792 {
22793 machine_mode mode = GET_MODE (dst);
22794 rtx tmp;
22795
22796 tmp = gen_reg_rtx (mode);
22797
22798 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
22799 if (TARGET_CMPB)
22800 {
22801 if (mode == SImode)
22802 {
22803 emit_insn (gen_popcntbsi2 (tmp, src));
22804 emit_insn (gen_paritysi2_cmpb (dst, tmp));
22805 }
22806 else
22807 {
22808 emit_insn (gen_popcntbdi2 (tmp, src));
22809 emit_insn (gen_paritydi2_cmpb (dst, tmp));
22810 }
22811 return;
22812 }
22813
22814 if (mode == SImode)
22815 {
22816 /* Is mult+shift >= shift+xor+shift+xor? */
22817 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
22818 {
22819 rtx tmp1, tmp2, tmp3, tmp4;
22820
22821 tmp1 = gen_reg_rtx (SImode);
22822 emit_insn (gen_popcntbsi2 (tmp1, src));
22823
22824 tmp2 = gen_reg_rtx (SImode);
22825 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
22826 tmp3 = gen_reg_rtx (SImode);
22827 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
22828
22829 tmp4 = gen_reg_rtx (SImode);
22830 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
22831 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
22832 }
22833 else
22834 rs6000_emit_popcount (tmp, src);
22835 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
22836 }
22837 else
22838 {
22839 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
22840 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
22841 {
22842 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
22843
22844 tmp1 = gen_reg_rtx (DImode);
22845 emit_insn (gen_popcntbdi2 (tmp1, src));
22846
22847 tmp2 = gen_reg_rtx (DImode);
22848 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
22849 tmp3 = gen_reg_rtx (DImode);
22850 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
22851
22852 tmp4 = gen_reg_rtx (DImode);
22853 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
22854 tmp5 = gen_reg_rtx (DImode);
22855 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
22856
22857 tmp6 = gen_reg_rtx (DImode);
22858 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
22859 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
22860 }
22861 else
22862 rs6000_emit_popcount (tmp, src);
22863 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
22864 }
22865 }
22866
22867 /* Expand an Altivec constant permutation for little endian mode.
22868 OP0 and OP1 are the input vectors and TARGET is the output vector.
22869 SEL specifies the constant permutation vector.
22870
22871 There are two issues: First, the two input operands must be
22872 swapped so that together they form a double-wide array in LE
22873 order. Second, the vperm instruction has surprising behavior
22874 in LE mode: it interprets the elements of the source vectors
22875 in BE mode ("left to right") and interprets the elements of
22876 the destination vector in LE mode ("right to left"). To
22877 correct for this, we must subtract each element of the permute
22878 control vector from 31.
22879
22880 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
22881 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
22882 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
22883 serve as the permute control vector. Then, in BE mode,
22884
22885 vperm 9,10,11,12
22886
22887 places the desired result in vr9. However, in LE mode the
22888 vector contents will be
22889
22890 vr10 = 00000003 00000002 00000001 00000000
22891 vr11 = 00000007 00000006 00000005 00000004
22892
22893 The result of the vperm using the same permute control vector is
22894
22895 vr9 = 05000000 07000000 01000000 03000000
22896
22897 That is, the leftmost 4 bytes of vr10 are interpreted as the
22898 source for the rightmost 4 bytes of vr9, and so on.
22899
22900 If we change the permute control vector to
22901
22902 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
22903
22904 and issue
22905
22906 vperm 9,11,10,12
22907
22908 we get the desired
22909
22910 vr9 = 00000006 00000004 00000002 00000000. */
22911
22912 static void
22913 altivec_expand_vec_perm_const_le (rtx target, rtx op0, rtx op1,
22914 const vec_perm_indices &sel)
22915 {
22916 unsigned int i;
22917 rtx perm[16];
22918 rtx constv, unspec;
22919
22920 /* Unpack and adjust the constant selector. */
22921 for (i = 0; i < 16; ++i)
22922 {
22923 unsigned int elt = 31 - (sel[i] & 31);
22924 perm[i] = GEN_INT (elt);
22925 }
22926
22927 /* Expand to a permute, swapping the inputs and using the
22928 adjusted selector. */
22929 if (!REG_P (op0))
22930 op0 = force_reg (V16QImode, op0);
22931 if (!REG_P (op1))
22932 op1 = force_reg (V16QImode, op1);
22933
22934 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
22935 constv = force_reg (V16QImode, constv);
22936 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
22937 UNSPEC_VPERM);
22938 if (!REG_P (target))
22939 {
22940 rtx tmp = gen_reg_rtx (V16QImode);
22941 emit_move_insn (tmp, unspec);
22942 unspec = tmp;
22943 }
22944
22945 emit_move_insn (target, unspec);
22946 }
22947
22948 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
22949 permute control vector. But here it's not a constant, so we must
22950 generate a vector NAND or NOR to do the adjustment. */
22951
22952 void
22953 altivec_expand_vec_perm_le (rtx operands[4])
22954 {
22955 rtx notx, iorx, unspec;
22956 rtx target = operands[0];
22957 rtx op0 = operands[1];
22958 rtx op1 = operands[2];
22959 rtx sel = operands[3];
22960 rtx tmp = target;
22961 rtx norreg = gen_reg_rtx (V16QImode);
22962 machine_mode mode = GET_MODE (target);
22963
22964 /* Get everything in regs so the pattern matches. */
22965 if (!REG_P (op0))
22966 op0 = force_reg (mode, op0);
22967 if (!REG_P (op1))
22968 op1 = force_reg (mode, op1);
22969 if (!REG_P (sel))
22970 sel = force_reg (V16QImode, sel);
22971 if (!REG_P (target))
22972 tmp = gen_reg_rtx (mode);
22973
22974 if (TARGET_P9_VECTOR)
22975 {
22976 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, sel),
22977 UNSPEC_VPERMR);
22978 }
22979 else
22980 {
22981 /* Invert the selector with a VNAND if available, else a VNOR.
22982 The VNAND is preferred for future fusion opportunities. */
22983 notx = gen_rtx_NOT (V16QImode, sel);
22984 iorx = (TARGET_P8_VECTOR
22985 ? gen_rtx_IOR (V16QImode, notx, notx)
22986 : gen_rtx_AND (V16QImode, notx, notx));
22987 emit_insn (gen_rtx_SET (norreg, iorx));
22988
22989 /* Permute with operands reversed and adjusted selector. */
22990 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
22991 UNSPEC_VPERM);
22992 }
22993
22994 /* Copy into target, possibly by way of a register. */
22995 if (!REG_P (target))
22996 {
22997 emit_move_insn (tmp, unspec);
22998 unspec = tmp;
22999 }
23000
23001 emit_move_insn (target, unspec);
23002 }
23003
23004 /* Expand an Altivec constant permutation. Return true if we match
23005 an efficient implementation; false to fall back to VPERM.
23006
23007 OP0 and OP1 are the input vectors and TARGET is the output vector.
23008 SEL specifies the constant permutation vector. */
23009
23010 static bool
23011 altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1,
23012 const vec_perm_indices &sel)
23013 {
23014 struct altivec_perm_insn {
23015 HOST_WIDE_INT mask;
23016 enum insn_code impl;
23017 unsigned char perm[16];
23018 };
23019 static const struct altivec_perm_insn patterns[] = {
23020 {OPTION_MASK_ALTIVEC,
23021 CODE_FOR_altivec_vpkuhum_direct,
23022 {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}},
23023 {OPTION_MASK_ALTIVEC,
23024 CODE_FOR_altivec_vpkuwum_direct,
23025 {2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31}},
23026 {OPTION_MASK_ALTIVEC,
23027 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
23028 : CODE_FOR_altivec_vmrglb_direct,
23029 {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}},
23030 {OPTION_MASK_ALTIVEC,
23031 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
23032 : CODE_FOR_altivec_vmrglh_direct,
23033 {0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23}},
23034 {OPTION_MASK_ALTIVEC,
23035 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct_v4si
23036 : CODE_FOR_altivec_vmrglw_direct_v4si,
23037 {0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23}},
23038 {OPTION_MASK_ALTIVEC,
23039 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
23040 : CODE_FOR_altivec_vmrghb_direct,
23041 {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}},
23042 {OPTION_MASK_ALTIVEC,
23043 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
23044 : CODE_FOR_altivec_vmrghh_direct,
23045 {8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31}},
23046 {OPTION_MASK_ALTIVEC,
23047 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct_v4si
23048 : CODE_FOR_altivec_vmrghw_direct_v4si,
23049 {8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31}},
23050 {OPTION_MASK_P8_VECTOR,
23051 BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgew_v4sf_direct
23052 : CODE_FOR_p8_vmrgow_v4sf_direct,
23053 {0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27}},
23054 {OPTION_MASK_P8_VECTOR,
23055 BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgow_v4sf_direct
23056 : CODE_FOR_p8_vmrgew_v4sf_direct,
23057 {4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31}},
23058 {OPTION_MASK_VSX, CODE_FOR_vsx_xxpermdi_v16qi,
23059 {0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23}},
23060 {OPTION_MASK_VSX, CODE_FOR_vsx_xxpermdi_v16qi,
23061 {8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}},
23062 {OPTION_MASK_VSX, CODE_FOR_vsx_xxpermdi_v16qi,
23063 {0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31}},
23064 {OPTION_MASK_VSX, CODE_FOR_vsx_xxpermdi_v16qi,
23065 {8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31}}};
23066
23067 unsigned int i, j, elt, which;
23068 unsigned char perm[16];
23069 rtx x;
23070 bool one_vec;
23071
23072 /* Unpack the constant selector. */
23073 for (i = which = 0; i < 16; ++i)
23074 {
23075 elt = sel[i] & 31;
23076 which |= (elt < 16 ? 1 : 2);
23077 perm[i] = elt;
23078 }
23079
23080 /* Simplify the constant selector based on operands. */
23081 switch (which)
23082 {
23083 default:
23084 gcc_unreachable ();
23085
23086 case 3:
23087 one_vec = false;
23088 if (!rtx_equal_p (op0, op1))
23089 break;
23090 /* FALLTHRU */
23091
23092 case 2:
23093 for (i = 0; i < 16; ++i)
23094 perm[i] &= 15;
23095 op0 = op1;
23096 one_vec = true;
23097 break;
23098
23099 case 1:
23100 op1 = op0;
23101 one_vec = true;
23102 break;
23103 }
23104
23105 /* Look for splat patterns. */
23106 if (one_vec)
23107 {
23108 elt = perm[0];
23109
23110 for (i = 0; i < 16; ++i)
23111 if (perm[i] != elt)
23112 break;
23113 if (i == 16)
23114 {
23115 if (!BYTES_BIG_ENDIAN)
23116 elt = 15 - elt;
23117 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
23118 return true;
23119 }
23120
23121 if (elt % 2 == 0)
23122 {
23123 for (i = 0; i < 16; i += 2)
23124 if (perm[i] != elt || perm[i + 1] != elt + 1)
23125 break;
23126 if (i == 16)
23127 {
23128 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
23129 x = gen_reg_rtx (V8HImode);
23130 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
23131 GEN_INT (field)));
23132 emit_move_insn (target, gen_lowpart (V16QImode, x));
23133 return true;
23134 }
23135 }
23136
23137 if (elt % 4 == 0)
23138 {
23139 for (i = 0; i < 16; i += 4)
23140 if (perm[i] != elt
23141 || perm[i + 1] != elt + 1
23142 || perm[i + 2] != elt + 2
23143 || perm[i + 3] != elt + 3)
23144 break;
23145 if (i == 16)
23146 {
23147 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
23148 x = gen_reg_rtx (V4SImode);
23149 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
23150 GEN_INT (field)));
23151 emit_move_insn (target, gen_lowpart (V16QImode, x));
23152 return true;
23153 }
23154 }
23155 }
23156
23157 /* Look for merge and pack patterns. */
23158 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
23159 {
23160 bool swapped;
23161
23162 if ((patterns[j].mask & rs6000_isa_flags) == 0)
23163 continue;
23164
23165 elt = patterns[j].perm[0];
23166 if (perm[0] == elt)
23167 swapped = false;
23168 else if (perm[0] == elt + 16)
23169 swapped = true;
23170 else
23171 continue;
23172 for (i = 1; i < 16; ++i)
23173 {
23174 elt = patterns[j].perm[i];
23175 if (swapped)
23176 elt = (elt >= 16 ? elt - 16 : elt + 16);
23177 else if (one_vec && elt >= 16)
23178 elt -= 16;
23179 if (perm[i] != elt)
23180 break;
23181 }
23182 if (i == 16)
23183 {
23184 enum insn_code icode = patterns[j].impl;
23185 machine_mode omode = insn_data[icode].operand[0].mode;
23186 machine_mode imode = insn_data[icode].operand[1].mode;
23187
23188 rtx perm_idx = GEN_INT (0);
23189 if (icode == CODE_FOR_vsx_xxpermdi_v16qi)
23190 {
23191 int perm_val = 0;
23192 if (one_vec)
23193 {
23194 if (perm[0] == 8)
23195 perm_val |= 2;
23196 if (perm[8] == 8)
23197 perm_val |= 1;
23198 }
23199 else
23200 {
23201 if (perm[0] != 0)
23202 perm_val |= 2;
23203 if (perm[8] != 16)
23204 perm_val |= 1;
23205 }
23206 perm_idx = GEN_INT (perm_val);
23207 }
23208
23209 /* For little-endian, don't use vpkuwum and vpkuhum if the
23210 underlying vector type is not V4SI and V8HI, respectively.
23211 For example, using vpkuwum with a V8HI picks up the even
23212 halfwords (BE numbering) when the even halfwords (LE
23213 numbering) are what we need. */
23214 if (!BYTES_BIG_ENDIAN
23215 && icode == CODE_FOR_altivec_vpkuwum_direct
23216 && ((REG_P (op0)
23217 && GET_MODE (op0) != V4SImode)
23218 || (SUBREG_P (op0)
23219 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
23220 continue;
23221 if (!BYTES_BIG_ENDIAN
23222 && icode == CODE_FOR_altivec_vpkuhum_direct
23223 && ((REG_P (op0)
23224 && GET_MODE (op0) != V8HImode)
23225 || (SUBREG_P (op0)
23226 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
23227 continue;
23228
23229 /* For little-endian, the two input operands must be swapped
23230 (or swapped back) to ensure proper right-to-left numbering
23231 from 0 to 2N-1. */
23232 if (swapped ^ !BYTES_BIG_ENDIAN
23233 && icode != CODE_FOR_vsx_xxpermdi_v16qi)
23234 std::swap (op0, op1);
23235 if (imode != V16QImode)
23236 {
23237 op0 = gen_lowpart (imode, op0);
23238 op1 = gen_lowpart (imode, op1);
23239 }
23240 if (omode == V16QImode)
23241 x = target;
23242 else
23243 x = gen_reg_rtx (omode);
23244 if (icode == CODE_FOR_vsx_xxpermdi_v16qi)
23245 emit_insn (GEN_FCN (icode) (x, op0, op1, perm_idx));
23246 else
23247 emit_insn (GEN_FCN (icode) (x, op0, op1));
23248 if (omode != V16QImode)
23249 emit_move_insn (target, gen_lowpart (V16QImode, x));
23250 return true;
23251 }
23252 }
23253
23254 if (!BYTES_BIG_ENDIAN)
23255 {
23256 altivec_expand_vec_perm_const_le (target, op0, op1, sel);
23257 return true;
23258 }
23259
23260 return false;
23261 }
23262
23263 /* Expand a VSX Permute Doubleword constant permutation.
23264 Return true if we match an efficient implementation. */
23265
23266 static bool
23267 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
23268 unsigned char perm0, unsigned char perm1)
23269 {
23270 rtx x;
23271
23272 /* If both selectors come from the same operand, fold to single op. */
23273 if ((perm0 & 2) == (perm1 & 2))
23274 {
23275 if (perm0 & 2)
23276 op0 = op1;
23277 else
23278 op1 = op0;
23279 }
23280 /* If both operands are equal, fold to simpler permutation. */
23281 if (rtx_equal_p (op0, op1))
23282 {
23283 perm0 = perm0 & 1;
23284 perm1 = (perm1 & 1) + 2;
23285 }
23286 /* If the first selector comes from the second operand, swap. */
23287 else if (perm0 & 2)
23288 {
23289 if (perm1 & 2)
23290 return false;
23291 perm0 -= 2;
23292 perm1 += 2;
23293 std::swap (op0, op1);
23294 }
23295 /* If the second selector does not come from the second operand, fail. */
23296 else if ((perm1 & 2) == 0)
23297 return false;
23298
23299 /* Success! */
23300 if (target != NULL)
23301 {
23302 machine_mode vmode, dmode;
23303 rtvec v;
23304
23305 vmode = GET_MODE (target);
23306 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
23307 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4).require ();
23308 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
23309 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
23310 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
23311 emit_insn (gen_rtx_SET (target, x));
23312 }
23313 return true;
23314 }
23315
23316 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
23317
23318 static bool
23319 rs6000_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
23320 rtx target, rtx op0, rtx op1,
23321 const vec_perm_indices &sel)
23322 {
23323 if (vmode != op_mode)
23324 return false;
23325
23326 bool testing_p = !target;
23327
23328 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
23329 if (TARGET_ALTIVEC && testing_p)
23330 return true;
23331
23332 if (op0)
23333 {
23334 rtx nop0 = force_reg (vmode, op0);
23335 if (op0 == op1)
23336 op1 = nop0;
23337 op0 = nop0;
23338 }
23339 if (op1)
23340 op1 = force_reg (vmode, op1);
23341
23342 /* Check for ps_merge* or xxpermdi insns. */
23343 if ((vmode == V2DFmode || vmode == V2DImode) && VECTOR_MEM_VSX_P (vmode))
23344 {
23345 if (testing_p)
23346 {
23347 op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
23348 op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
23349 }
23350 if (rs6000_expand_vec_perm_const_1 (target, op0, op1, sel[0], sel[1]))
23351 return true;
23352 }
23353
23354 if (TARGET_ALTIVEC)
23355 {
23356 /* Force the target-independent code to lower to V16QImode. */
23357 if (vmode != V16QImode)
23358 return false;
23359 if (altivec_expand_vec_perm_const (target, op0, op1, sel))
23360 return true;
23361 }
23362
23363 return false;
23364 }
23365
23366 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave.
23367 OP0 and OP1 are the input vectors and TARGET is the output vector.
23368 PERM specifies the constant permutation vector. */
23369
23370 static void
23371 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
23372 machine_mode vmode, const vec_perm_builder &perm)
23373 {
23374 rtx x = expand_vec_perm_const (vmode, op0, op1, perm, BLKmode, target);
23375 if (x != target)
23376 emit_move_insn (target, x);
23377 }
23378
23379 /* Expand an extract even operation. */
23380
23381 void
23382 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
23383 {
23384 machine_mode vmode = GET_MODE (target);
23385 unsigned i, nelt = GET_MODE_NUNITS (vmode);
23386 vec_perm_builder perm (nelt, nelt, 1);
23387
23388 for (i = 0; i < nelt; i++)
23389 perm.quick_push (i * 2);
23390
23391 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
23392 }
23393
23394 /* Expand a vector interleave operation. */
23395
23396 void
23397 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
23398 {
23399 machine_mode vmode = GET_MODE (target);
23400 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
23401 vec_perm_builder perm (nelt, nelt, 1);
23402
23403 high = (highp ? 0 : nelt / 2);
23404 for (i = 0; i < nelt / 2; i++)
23405 {
23406 perm.quick_push (i + high);
23407 perm.quick_push (i + nelt + high);
23408 }
23409
23410 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
23411 }
23412
23413 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
23414 void
23415 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
23416 {
23417 HOST_WIDE_INT hwi_scale (scale);
23418 REAL_VALUE_TYPE r_pow;
23419 rtvec v = rtvec_alloc (2);
23420 rtx elt;
23421 rtx scale_vec = gen_reg_rtx (V2DFmode);
23422 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
23423 elt = const_double_from_real_value (r_pow, DFmode);
23424 RTVEC_ELT (v, 0) = elt;
23425 RTVEC_ELT (v, 1) = elt;
23426 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
23427 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
23428 }
23429
23430 /* Return an RTX representing where to find the function value of a
23431 function returning MODE. */
23432 static rtx
23433 rs6000_complex_function_value (machine_mode mode)
23434 {
23435 unsigned int regno;
23436 rtx r1, r2;
23437 machine_mode inner = GET_MODE_INNER (mode);
23438 unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode);
23439
23440 if (TARGET_FLOAT128_TYPE
23441 && (mode == KCmode
23442 || (mode == TCmode && TARGET_IEEEQUAD)))
23443 regno = ALTIVEC_ARG_RETURN;
23444
23445 else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
23446 regno = FP_ARG_RETURN;
23447
23448 else
23449 {
23450 regno = GP_ARG_RETURN;
23451
23452 /* 32-bit is OK since it'll go in r3/r4. */
23453 if (TARGET_32BIT && inner_bytes >= 4)
23454 return gen_rtx_REG (mode, regno);
23455 }
23456
23457 if (inner_bytes >= 8)
23458 return gen_rtx_REG (mode, regno);
23459
23460 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
23461 const0_rtx);
23462 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
23463 GEN_INT (inner_bytes));
23464 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
23465 }
23466
23467 /* Return an rtx describing a return value of MODE as a PARALLEL
23468 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
23469 stride REG_STRIDE. */
23470
23471 static rtx
23472 rs6000_parallel_return (machine_mode mode,
23473 int n_elts, machine_mode elt_mode,
23474 unsigned int regno, unsigned int reg_stride)
23475 {
23476 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
23477
23478 int i;
23479 for (i = 0; i < n_elts; i++)
23480 {
23481 rtx r = gen_rtx_REG (elt_mode, regno);
23482 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
23483 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
23484 regno += reg_stride;
23485 }
23486
23487 return par;
23488 }
23489
23490 /* Target hook for TARGET_FUNCTION_VALUE.
23491
23492 An integer value is in r3 and a floating-point value is in fp1,
23493 unless -msoft-float. */
23494
23495 static rtx
23496 rs6000_function_value (const_tree valtype,
23497 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
23498 bool outgoing ATTRIBUTE_UNUSED)
23499 {
23500 machine_mode mode;
23501 unsigned int regno;
23502 machine_mode elt_mode;
23503 int n_elts;
23504
23505 /* Special handling for structs in darwin64. */
23506 if (TARGET_MACHO
23507 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
23508 {
23509 CUMULATIVE_ARGS valcum;
23510 rtx valret;
23511
23512 valcum.words = 0;
23513 valcum.fregno = FP_ARG_MIN_REG;
23514 valcum.vregno = ALTIVEC_ARG_MIN_REG;
23515 /* Do a trial code generation as if this were going to be passed as
23516 an argument; if any part goes in memory, we return NULL. */
23517 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
23518 if (valret)
23519 return valret;
23520 /* Otherwise fall through to standard ABI rules. */
23521 }
23522
23523 mode = TYPE_MODE (valtype);
23524
23525 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
23526 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
23527 {
23528 int first_reg, n_regs;
23529
23530 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode))
23531 {
23532 /* _Decimal128 must use even/odd register pairs. */
23533 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
23534 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
23535 }
23536 else
23537 {
23538 first_reg = ALTIVEC_ARG_RETURN;
23539 n_regs = 1;
23540 }
23541
23542 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
23543 }
23544
23545 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
23546 if (TARGET_32BIT && TARGET_POWERPC64)
23547 switch (mode)
23548 {
23549 default:
23550 break;
23551 case E_DImode:
23552 case E_SCmode:
23553 case E_DCmode:
23554 case E_TCmode:
23555 int count = GET_MODE_SIZE (mode) / 4;
23556 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
23557 }
23558
23559 if ((INTEGRAL_TYPE_P (valtype)
23560 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
23561 || POINTER_TYPE_P (valtype))
23562 mode = TARGET_32BIT ? SImode : DImode;
23563
23564 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
23565 /* _Decimal128 must use an even/odd register pair. */
23566 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
23567 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT
23568 && !FLOAT128_VECTOR_P (mode))
23569 regno = FP_ARG_RETURN;
23570 else if (TREE_CODE (valtype) == COMPLEX_TYPE
23571 && targetm.calls.split_complex_arg)
23572 return rs6000_complex_function_value (mode);
23573 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
23574 return register is used in both cases, and we won't see V2DImode/V2DFmode
23575 for pure altivec, combine the two cases. */
23576 else if ((TREE_CODE (valtype) == VECTOR_TYPE || VECTOR_ALIGNMENT_P (mode))
23577 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
23578 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
23579 regno = ALTIVEC_ARG_RETURN;
23580 else
23581 regno = GP_ARG_RETURN;
23582
23583 return gen_rtx_REG (mode, regno);
23584 }
23585
23586 /* Define how to find the value returned by a library function
23587 assuming the value has mode MODE. */
23588 rtx
23589 rs6000_libcall_value (machine_mode mode)
23590 {
23591 unsigned int regno;
23592
23593 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
23594 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
23595 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
23596
23597 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
23598 /* _Decimal128 must use an even/odd register pair. */
23599 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
23600 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && TARGET_HARD_FLOAT)
23601 regno = FP_ARG_RETURN;
23602 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
23603 return register is used in both cases, and we won't see V2DImode/V2DFmode
23604 for pure altivec, combine the two cases. */
23605 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
23606 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
23607 regno = ALTIVEC_ARG_RETURN;
23608 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
23609 return rs6000_complex_function_value (mode);
23610 else
23611 regno = GP_ARG_RETURN;
23612
23613 return gen_rtx_REG (mode, regno);
23614 }
23615
23616 /* Compute register pressure classes. We implement the target hook to avoid
23617 IRA picking something like GEN_OR_FLOAT_REGS as a pressure class, which can
23618 lead to incorrect estimates of number of available registers and therefor
23619 increased register pressure/spill. */
23620 static int
23621 rs6000_compute_pressure_classes (enum reg_class *pressure_classes)
23622 {
23623 int n;
23624
23625 n = 0;
23626 pressure_classes[n++] = GENERAL_REGS;
23627 if (TARGET_ALTIVEC)
23628 pressure_classes[n++] = ALTIVEC_REGS;
23629 if (TARGET_VSX)
23630 pressure_classes[n++] = VSX_REGS;
23631 else
23632 {
23633 if (TARGET_HARD_FLOAT)
23634 pressure_classes[n++] = FLOAT_REGS;
23635 }
23636 pressure_classes[n++] = CR_REGS;
23637 pressure_classes[n++] = SPECIAL_REGS;
23638
23639 return n;
23640 }
23641
23642 /* Given FROM and TO register numbers, say whether this elimination is allowed.
23643 Frame pointer elimination is automatically handled.
23644
23645 For the RS/6000, if frame pointer elimination is being done, we would like
23646 to convert ap into fp, not sp.
23647
23648 We need r30 if -mminimal-toc was specified, and there are constant pool
23649 references. */
23650
23651 static bool
23652 rs6000_can_eliminate (const int from, const int to)
23653 {
23654 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
23655 ? ! frame_pointer_needed
23656 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
23657 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC_OR_PCREL
23658 || constant_pool_empty_p ()
23659 : true);
23660 }
23661
23662 /* Define the offset between two registers, FROM to be eliminated and its
23663 replacement TO, at the start of a routine. */
23664 HOST_WIDE_INT
23665 rs6000_initial_elimination_offset (int from, int to)
23666 {
23667 rs6000_stack_t *info = rs6000_stack_info ();
23668 HOST_WIDE_INT offset;
23669
23670 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
23671 offset = info->push_p ? 0 : -info->total_size;
23672 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
23673 {
23674 offset = info->push_p ? 0 : -info->total_size;
23675 if (FRAME_GROWS_DOWNWARD)
23676 offset += info->fixed_size + info->vars_size + info->parm_size;
23677 }
23678 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
23679 offset = FRAME_GROWS_DOWNWARD
23680 ? info->fixed_size + info->vars_size + info->parm_size
23681 : 0;
23682 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
23683 offset = info->total_size;
23684 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
23685 offset = info->push_p ? info->total_size : 0;
23686 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
23687 offset = 0;
23688 else
23689 gcc_unreachable ();
23690
23691 return offset;
23692 }
23693
23694 /* Fill in sizes of registers used by unwinder. */
23695
23696 static void
23697 rs6000_init_dwarf_reg_sizes_extra (tree address)
23698 {
23699 if (TARGET_MACHO && ! TARGET_ALTIVEC)
23700 {
23701 int i;
23702 machine_mode mode = TYPE_MODE (char_type_node);
23703 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
23704 rtx mem = gen_rtx_MEM (BLKmode, addr);
23705 rtx value = gen_int_mode (16, mode);
23706
23707 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
23708 The unwinder still needs to know the size of Altivec registers. */
23709
23710 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
23711 {
23712 int column = DWARF_REG_TO_UNWIND_COLUMN
23713 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
23714 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
23715
23716 emit_move_insn (adjust_address (mem, mode, offset), value);
23717 }
23718 }
23719 }
23720
23721 /* Map internal gcc register numbers to debug format register numbers.
23722 FORMAT specifies the type of debug register number to use:
23723 0 -- debug information, except for frame-related sections
23724 1 -- DWARF .debug_frame section
23725 2 -- DWARF .eh_frame section */
23726
23727 unsigned int
23728 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
23729 {
23730 /* On some platforms, we use the standard DWARF register
23731 numbering for .debug_info and .debug_frame. */
23732 if ((format == 0 && dwarf_debuginfo_p ()) || format == 1)
23733 {
23734 #ifdef RS6000_USE_DWARF_NUMBERING
23735 if (regno <= 31)
23736 return regno;
23737 if (FP_REGNO_P (regno))
23738 return regno - FIRST_FPR_REGNO + 32;
23739 if (ALTIVEC_REGNO_P (regno))
23740 return regno - FIRST_ALTIVEC_REGNO + 1124;
23741 if (regno == LR_REGNO)
23742 return 108;
23743 if (regno == CTR_REGNO)
23744 return 109;
23745 if (regno == CA_REGNO)
23746 return 101; /* XER */
23747 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
23748 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
23749 The actual code emitted saves the whole of CR, so we map CR2_REGNO
23750 to the DWARF reg for CR. */
23751 if (format == 1 && regno == CR2_REGNO)
23752 return 64;
23753 if (CR_REGNO_P (regno))
23754 return regno - CR0_REGNO + 86;
23755 if (regno == VRSAVE_REGNO)
23756 return 356;
23757 if (regno == VSCR_REGNO)
23758 return 67;
23759
23760 /* These do not make much sense. */
23761 if (regno == FRAME_POINTER_REGNUM)
23762 return 111;
23763 if (regno == ARG_POINTER_REGNUM)
23764 return 67;
23765 if (regno == 64)
23766 return 100;
23767
23768 gcc_unreachable ();
23769 #endif
23770 }
23771
23772 /* We use the GCC 7 (and before) internal number for non-DWARF debug
23773 information, and also for .eh_frame. */
23774 /* Translate the regnos to their numbers in GCC 7 (and before). */
23775 if (regno <= 31)
23776 return regno;
23777 if (FP_REGNO_P (regno))
23778 return regno - FIRST_FPR_REGNO + 32;
23779 if (ALTIVEC_REGNO_P (regno))
23780 return regno - FIRST_ALTIVEC_REGNO + 77;
23781 if (regno == LR_REGNO)
23782 return 65;
23783 if (regno == CTR_REGNO)
23784 return 66;
23785 if (regno == CA_REGNO)
23786 return 76; /* XER */
23787 if (CR_REGNO_P (regno))
23788 return regno - CR0_REGNO + 68;
23789 if (regno == VRSAVE_REGNO)
23790 return 109;
23791 if (regno == VSCR_REGNO)
23792 return 110;
23793
23794 if (regno == FRAME_POINTER_REGNUM)
23795 return 111;
23796 if (regno == ARG_POINTER_REGNUM)
23797 return 67;
23798 if (regno == 64)
23799 return 64;
23800
23801 gcc_unreachable ();
23802 }
23803
23804 /* target hook eh_return_filter_mode */
23805 static scalar_int_mode
23806 rs6000_eh_return_filter_mode (void)
23807 {
23808 return TARGET_32BIT ? SImode : word_mode;
23809 }
23810
23811 /* Target hook for scalar_mode_supported_p. */
23812 static bool
23813 rs6000_scalar_mode_supported_p (scalar_mode mode)
23814 {
23815 /* -m32 does not support TImode. This is the default, from
23816 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
23817 same ABI as for -m32. But default_scalar_mode_supported_p allows
23818 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
23819 for -mpowerpc64. */
23820 if (TARGET_32BIT && mode == TImode)
23821 return false;
23822
23823 if (DECIMAL_FLOAT_MODE_P (mode))
23824 return default_decimal_float_supported_p ();
23825 else if (TARGET_FLOAT128_TYPE && mode == KFmode)
23826 return true;
23827 else if (TARGET_IBM128 && mode == IFmode)
23828 return true;
23829 else
23830 return default_scalar_mode_supported_p (mode);
23831 }
23832
23833 /* Target hook for libgcc_floating_mode_supported_p. */
23834
23835 static bool
23836 rs6000_libgcc_floating_mode_supported_p (scalar_float_mode mode)
23837 {
23838 switch (mode)
23839 {
23840 case E_SFmode:
23841 case E_DFmode:
23842 case E_TFmode:
23843 return true;
23844
23845 /* We only return true for KFmode if IEEE 128-bit types are supported. */
23846 case E_KFmode:
23847 return TARGET_FLOAT128_TYPE;
23848
23849 default:
23850 return false;
23851 }
23852 }
23853
23854 /* Target hook for vector_mode_supported_p. */
23855 static bool
23856 rs6000_vector_mode_supported_p (machine_mode mode)
23857 {
23858 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
23859 128-bit, the compiler might try to widen IEEE 128-bit to IBM
23860 double-double. */
23861 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode))
23862 return true;
23863
23864 else
23865 return false;
23866 }
23867
23868 /* Target hook for floatn_mode. */
23869 static opt_scalar_float_mode
23870 rs6000_floatn_mode (int n, bool extended)
23871 {
23872 if (extended)
23873 {
23874 switch (n)
23875 {
23876 case 32:
23877 return DFmode;
23878
23879 case 64:
23880 if (TARGET_FLOAT128_TYPE)
23881 return KFmode;
23882 else
23883 return opt_scalar_float_mode ();
23884
23885 case 128:
23886 return opt_scalar_float_mode ();
23887
23888 default:
23889 /* Those are the only valid _FloatNx types. */
23890 gcc_unreachable ();
23891 }
23892 }
23893 else
23894 {
23895 switch (n)
23896 {
23897 case 32:
23898 return SFmode;
23899
23900 case 64:
23901 return DFmode;
23902
23903 case 128:
23904 if (TARGET_FLOAT128_TYPE)
23905 return KFmode;
23906 else
23907 return opt_scalar_float_mode ();
23908
23909 default:
23910 return opt_scalar_float_mode ();
23911 }
23912 }
23913
23914 }
23915
23916 /* Target hook for c_mode_for_suffix. */
23917 static machine_mode
23918 rs6000_c_mode_for_suffix (char suffix)
23919 {
23920 if (TARGET_FLOAT128_TYPE)
23921 {
23922 if (suffix == 'q' || suffix == 'Q')
23923 return KFmode;
23924
23925 /* At the moment, we are not defining a suffix for IBM extended double.
23926 If/when the default for -mabi=ieeelongdouble is changed, and we want
23927 to support __ibm128 constants in legacy library code, we may need to
23928 re-evalaute this decision. Currently, c-lex.cc only supports 'w' and
23929 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
23930 __float80 constants. */
23931 }
23932
23933 return VOIDmode;
23934 }
23935
23936 /* Target hook for invalid_arg_for_unprototyped_fn. */
23937 static const char *
23938 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
23939 {
23940 return (!rs6000_darwin64_abi
23941 && typelist == 0
23942 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
23943 && (funcdecl == NULL_TREE
23944 || (TREE_CODE (funcdecl) == FUNCTION_DECL
23945 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
23946 ? N_("AltiVec argument passed to unprototyped function")
23947 : NULL;
23948 }
23949
23950 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
23951 setup by using __stack_chk_fail_local hidden function instead of
23952 calling __stack_chk_fail directly. Otherwise it is better to call
23953 __stack_chk_fail directly. */
23954
23955 static tree ATTRIBUTE_UNUSED
23956 rs6000_stack_protect_fail (void)
23957 {
23958 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
23959 ? default_hidden_stack_protect_fail ()
23960 : default_external_stack_protect_fail ();
23961 }
23962
23963 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
23964
23965 #if TARGET_ELF
23966 static unsigned HOST_WIDE_INT
23967 rs6000_asan_shadow_offset (void)
23968 {
23969 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
23970 }
23971 #endif
23972 \f
23973 /* Mask options that we want to support inside of attribute((target)) and
23974 #pragma GCC target operations. Note, we do not include things like
23975 64/32-bit, endianness, hard/soft floating point, etc. that would have
23976 different calling sequences. */
23977
23978 struct rs6000_opt_mask {
23979 const char *name; /* option name */
23980 HOST_WIDE_INT mask; /* mask to set */
23981 bool invert; /* invert sense of mask */
23982 bool valid_target; /* option is a target option */
23983 };
23984
23985 static struct rs6000_opt_mask const rs6000_opt_masks[] =
23986 {
23987 { "altivec", OPTION_MASK_ALTIVEC, false, true },
23988 { "block-ops-unaligned-vsx", OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX,
23989 false, true },
23990 { "block-ops-vector-pair", OPTION_MASK_BLOCK_OPS_VECTOR_PAIR,
23991 false, true },
23992 { "cmpb", OPTION_MASK_CMPB, false, true },
23993 { "crypto", OPTION_MASK_CRYPTO, false, true },
23994 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
23995 { "dlmzb", OPTION_MASK_DLMZB, false, true },
23996 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
23997 false, true },
23998 { "float128", OPTION_MASK_FLOAT128_KEYWORD, false, true },
23999 { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, true },
24000 { "fprnd", OPTION_MASK_FPRND, false, true },
24001 { "power10", OPTION_MASK_POWER10, false, true },
24002 { "hard-dfp", OPTION_MASK_DFP, false, true },
24003 { "htm", OPTION_MASK_HTM, false, true },
24004 { "isel", OPTION_MASK_ISEL, false, true },
24005 { "mfcrf", OPTION_MASK_MFCRF, false, true },
24006 { "mfpgpr", 0, false, true },
24007 { "mma", OPTION_MASK_MMA, false, true },
24008 { "modulo", OPTION_MASK_MODULO, false, true },
24009 { "mulhw", OPTION_MASK_MULHW, false, true },
24010 { "multiple", OPTION_MASK_MULTIPLE, false, true },
24011 { "pcrel", OPTION_MASK_PCREL, false, true },
24012 { "pcrel-opt", OPTION_MASK_PCREL_OPT, false, true },
24013 { "popcntb", OPTION_MASK_POPCNTB, false, true },
24014 { "popcntd", OPTION_MASK_POPCNTD, false, true },
24015 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
24016 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
24017 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
24018 { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true },
24019 { "power9-misc", OPTION_MASK_P9_MISC, false, true },
24020 { "power9-vector", OPTION_MASK_P9_VECTOR, false, true },
24021 { "power10-fusion", OPTION_MASK_P10_FUSION, false, true },
24022 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
24023 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
24024 { "prefixed", OPTION_MASK_PREFIXED, false, true },
24025 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
24026 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
24027 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
24028 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
24029 { "string", 0, false, true },
24030 { "update", OPTION_MASK_NO_UPDATE, true , true },
24031 { "vsx", OPTION_MASK_VSX, false, true },
24032 #ifdef OPTION_MASK_64BIT
24033 #if TARGET_AIX_OS
24034 { "aix64", OPTION_MASK_64BIT, false, false },
24035 { "aix32", OPTION_MASK_64BIT, true, false },
24036 #else
24037 { "64", OPTION_MASK_64BIT, false, false },
24038 { "32", OPTION_MASK_64BIT, true, false },
24039 #endif
24040 #endif
24041 #ifdef OPTION_MASK_EABI
24042 { "eabi", OPTION_MASK_EABI, false, false },
24043 #endif
24044 #ifdef OPTION_MASK_LITTLE_ENDIAN
24045 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
24046 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
24047 #endif
24048 #ifdef OPTION_MASK_RELOCATABLE
24049 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
24050 #endif
24051 #ifdef OPTION_MASK_STRICT_ALIGN
24052 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
24053 #endif
24054 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
24055 { "string", 0, false, false },
24056 };
24057
24058 /* Builtin mask mapping for printing the flags. */
24059 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
24060 {
24061 { "altivec", OPTION_MASK_ALTIVEC, false, false },
24062 { "vsx", OPTION_MASK_VSX, false, false },
24063 { "fre", OPTION_MASK_POPCNTB, false, false },
24064 { "fres", OPTION_MASK_PPC_GFXOPT, false, false },
24065 { "frsqrte", OPTION_MASK_PPC_GFXOPT, false, false },
24066 { "frsqrtes", OPTION_MASK_POPCNTB, false, false },
24067 { "popcntd", OPTION_MASK_POPCNTD, false, false },
24068 { "cell", OPTION_MASK_FPRND, false, false },
24069 { "power8-vector", OPTION_MASK_P8_VECTOR, false, false },
24070 { "power9-vector", OPTION_MASK_P9_VECTOR, false, false },
24071 { "power9-misc", OPTION_MASK_P9_MISC, false, false },
24072 { "crypto", OPTION_MASK_CRYPTO, false, false },
24073 { "htm", OPTION_MASK_HTM, false, false },
24074 { "hard-dfp", OPTION_MASK_DFP, false, false },
24075 { "hard-float", OPTION_MASK_SOFT_FLOAT, false, false },
24076 { "long-double-128", OPTION_MASK_MULTIPLE, false, false },
24077 { "powerpc64", MASK_POWERPC64, false, false },
24078 { "float128", OPTION_MASK_FLOAT128_KEYWORD, false, false },
24079 { "float128-hw", OPTION_MASK_FLOAT128_HW,false, false },
24080 { "mma", OPTION_MASK_MMA, false, false },
24081 { "power10", OPTION_MASK_POWER10, false, false },
24082 };
24083
24084 /* Option variables that we want to support inside attribute((target)) and
24085 #pragma GCC target operations. */
24086
24087 struct rs6000_opt_var {
24088 const char *name; /* option name */
24089 size_t global_offset; /* offset of the option in global_options. */
24090 size_t target_offset; /* offset of the option in target options. */
24091 };
24092
24093 static struct rs6000_opt_var const rs6000_opt_vars[] =
24094 {
24095 { "friz",
24096 offsetof (struct gcc_options, x_TARGET_FRIZ),
24097 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
24098 { "avoid-indexed-addresses",
24099 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
24100 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
24101 { "longcall",
24102 offsetof (struct gcc_options, x_rs6000_default_long_calls),
24103 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
24104 { "optimize-swaps",
24105 offsetof (struct gcc_options, x_rs6000_optimize_swaps),
24106 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
24107 { "allow-movmisalign",
24108 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
24109 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
24110 { "sched-groups",
24111 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
24112 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
24113 { "always-hint",
24114 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
24115 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
24116 { "align-branch-targets",
24117 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
24118 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
24119 { "sched-prolog",
24120 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
24121 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
24122 { "sched-epilog",
24123 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
24124 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
24125 { "speculate-indirect-jumps",
24126 offsetof (struct gcc_options, x_rs6000_speculate_indirect_jumps),
24127 offsetof (struct cl_target_option, x_rs6000_speculate_indirect_jumps), },
24128 };
24129
24130 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
24131 parsing. Return true if there were no errors. */
24132
24133 static bool
24134 rs6000_inner_target_options (tree args, bool attr_p)
24135 {
24136 bool ret = true;
24137
24138 if (args == NULL_TREE)
24139 ;
24140
24141 else if (TREE_CODE (args) == STRING_CST)
24142 {
24143 char *p = ASTRDUP (TREE_STRING_POINTER (args));
24144 char *q;
24145
24146 while ((q = strtok (p, ",")) != NULL)
24147 {
24148 bool error_p = false;
24149 bool not_valid_p = false;
24150 const char *cpu_opt = NULL;
24151
24152 p = NULL;
24153 if (startswith (q, "cpu="))
24154 {
24155 int cpu_index = rs6000_cpu_name_lookup (q+4);
24156 if (cpu_index >= 0)
24157 rs6000_cpu_index = cpu_index;
24158 else
24159 {
24160 error_p = true;
24161 cpu_opt = q+4;
24162 }
24163 }
24164 else if (startswith (q, "tune="))
24165 {
24166 int tune_index = rs6000_cpu_name_lookup (q+5);
24167 if (tune_index >= 0)
24168 rs6000_tune_index = tune_index;
24169 else
24170 {
24171 error_p = true;
24172 cpu_opt = q+5;
24173 }
24174 }
24175 else
24176 {
24177 size_t i;
24178 bool invert = false;
24179 char *r = q;
24180
24181 error_p = true;
24182 if (startswith (r, "no-"))
24183 {
24184 invert = true;
24185 r += 3;
24186 }
24187
24188 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
24189 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
24190 {
24191 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
24192
24193 if (!rs6000_opt_masks[i].valid_target)
24194 not_valid_p = true;
24195 else
24196 {
24197 error_p = false;
24198 rs6000_isa_flags_explicit |= mask;
24199
24200 /* VSX needs altivec, so -mvsx automagically sets
24201 altivec and disables -mavoid-indexed-addresses. */
24202 if (!invert)
24203 {
24204 if (mask == OPTION_MASK_VSX)
24205 {
24206 mask |= OPTION_MASK_ALTIVEC;
24207 TARGET_AVOID_XFORM = 0;
24208 }
24209 }
24210
24211 if (rs6000_opt_masks[i].invert)
24212 invert = !invert;
24213
24214 if (invert)
24215 rs6000_isa_flags &= ~mask;
24216 else
24217 rs6000_isa_flags |= mask;
24218 }
24219 break;
24220 }
24221
24222 if (error_p && !not_valid_p)
24223 {
24224 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
24225 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
24226 {
24227 size_t j = rs6000_opt_vars[i].global_offset;
24228 *((int *) ((char *)&global_options + j)) = !invert;
24229 error_p = false;
24230 not_valid_p = false;
24231 break;
24232 }
24233 }
24234 }
24235
24236 if (error_p)
24237 {
24238 const char *eprefix, *esuffix;
24239
24240 ret = false;
24241 if (attr_p)
24242 {
24243 eprefix = "__attribute__((__target__(";
24244 esuffix = ")))";
24245 }
24246 else
24247 {
24248 eprefix = "#pragma GCC target ";
24249 esuffix = "";
24250 }
24251
24252 if (cpu_opt)
24253 error ("invalid cpu %qs for %s%qs%s", cpu_opt, eprefix,
24254 q, esuffix);
24255 else if (not_valid_p)
24256 error ("%s%qs%s is not allowed", eprefix, q, esuffix);
24257 else
24258 error ("%s%qs%s is invalid", eprefix, q, esuffix);
24259 }
24260 }
24261 }
24262
24263 else if (TREE_CODE (args) == TREE_LIST)
24264 {
24265 do
24266 {
24267 tree value = TREE_VALUE (args);
24268 if (value)
24269 {
24270 bool ret2 = rs6000_inner_target_options (value, attr_p);
24271 if (!ret2)
24272 ret = false;
24273 }
24274 args = TREE_CHAIN (args);
24275 }
24276 while (args != NULL_TREE);
24277 }
24278
24279 else
24280 {
24281 error ("attribute %<target%> argument not a string");
24282 return false;
24283 }
24284
24285 return ret;
24286 }
24287
24288 /* Print out the target options as a list for -mdebug=target. */
24289
24290 static void
24291 rs6000_debug_target_options (tree args, const char *prefix)
24292 {
24293 if (args == NULL_TREE)
24294 fprintf (stderr, "%s<NULL>", prefix);
24295
24296 else if (TREE_CODE (args) == STRING_CST)
24297 {
24298 char *p = ASTRDUP (TREE_STRING_POINTER (args));
24299 char *q;
24300
24301 while ((q = strtok (p, ",")) != NULL)
24302 {
24303 p = NULL;
24304 fprintf (stderr, "%s\"%s\"", prefix, q);
24305 prefix = ", ";
24306 }
24307 }
24308
24309 else if (TREE_CODE (args) == TREE_LIST)
24310 {
24311 do
24312 {
24313 tree value = TREE_VALUE (args);
24314 if (value)
24315 {
24316 rs6000_debug_target_options (value, prefix);
24317 prefix = ", ";
24318 }
24319 args = TREE_CHAIN (args);
24320 }
24321 while (args != NULL_TREE);
24322 }
24323
24324 else
24325 gcc_unreachable ();
24326
24327 return;
24328 }
24329
24330 \f
24331 /* Hook to validate attribute((target("..."))). */
24332
24333 static bool
24334 rs6000_valid_attribute_p (tree fndecl,
24335 tree ARG_UNUSED (name),
24336 tree args,
24337 int flags)
24338 {
24339 struct cl_target_option cur_target;
24340 bool ret;
24341 tree old_optimize;
24342 tree new_target, new_optimize;
24343 tree func_optimize;
24344
24345 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
24346
24347 if (TARGET_DEBUG_TARGET)
24348 {
24349 tree tname = DECL_NAME (fndecl);
24350 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
24351 if (tname)
24352 fprintf (stderr, "function: %.*s\n",
24353 (int) IDENTIFIER_LENGTH (tname),
24354 IDENTIFIER_POINTER (tname));
24355 else
24356 fprintf (stderr, "function: unknown\n");
24357
24358 fprintf (stderr, "args:");
24359 rs6000_debug_target_options (args, " ");
24360 fprintf (stderr, "\n");
24361
24362 if (flags)
24363 fprintf (stderr, "flags: 0x%x\n", flags);
24364
24365 fprintf (stderr, "--------------------\n");
24366 }
24367
24368 /* attribute((target("default"))) does nothing, beyond
24369 affecting multi-versioning. */
24370 if (TREE_VALUE (args)
24371 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
24372 && TREE_CHAIN (args) == NULL_TREE
24373 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
24374 return true;
24375
24376 old_optimize = build_optimization_node (&global_options,
24377 &global_options_set);
24378 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
24379
24380 /* If the function changed the optimization levels as well as setting target
24381 options, start with the optimizations specified. */
24382 if (func_optimize && func_optimize != old_optimize)
24383 cl_optimization_restore (&global_options, &global_options_set,
24384 TREE_OPTIMIZATION (func_optimize));
24385
24386 /* The target attributes may also change some optimization flags, so update
24387 the optimization options if necessary. */
24388 cl_target_option_save (&cur_target, &global_options, &global_options_set);
24389 rs6000_cpu_index = rs6000_tune_index = -1;
24390 ret = rs6000_inner_target_options (args, true);
24391
24392 /* Set up any additional state. */
24393 if (ret)
24394 {
24395 ret = rs6000_option_override_internal (false);
24396 new_target = build_target_option_node (&global_options,
24397 &global_options_set);
24398 }
24399 else
24400 new_target = NULL;
24401
24402 new_optimize = build_optimization_node (&global_options,
24403 &global_options_set);
24404
24405 if (!new_target)
24406 ret = false;
24407
24408 else if (fndecl)
24409 {
24410 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
24411
24412 if (old_optimize != new_optimize)
24413 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
24414 }
24415
24416 cl_target_option_restore (&global_options, &global_options_set, &cur_target);
24417
24418 if (old_optimize != new_optimize)
24419 cl_optimization_restore (&global_options, &global_options_set,
24420 TREE_OPTIMIZATION (old_optimize));
24421
24422 return ret;
24423 }
24424
24425 \f
24426 /* Hook to validate the current #pragma GCC target and set the state, and
24427 update the macros based on what was changed. If ARGS is NULL, then
24428 POP_TARGET is used to reset the options. */
24429
24430 bool
24431 rs6000_pragma_target_parse (tree args, tree pop_target)
24432 {
24433 tree prev_tree = build_target_option_node (&global_options,
24434 &global_options_set);
24435 tree cur_tree;
24436 struct cl_target_option *prev_opt, *cur_opt;
24437 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
24438 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
24439
24440 if (TARGET_DEBUG_TARGET)
24441 {
24442 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
24443 fprintf (stderr, "args:");
24444 rs6000_debug_target_options (args, " ");
24445 fprintf (stderr, "\n");
24446
24447 if (pop_target)
24448 {
24449 fprintf (stderr, "pop_target:\n");
24450 debug_tree (pop_target);
24451 }
24452 else
24453 fprintf (stderr, "pop_target: <NULL>\n");
24454
24455 fprintf (stderr, "--------------------\n");
24456 }
24457
24458 if (! args)
24459 {
24460 cur_tree = ((pop_target)
24461 ? pop_target
24462 : target_option_default_node);
24463 cl_target_option_restore (&global_options, &global_options_set,
24464 TREE_TARGET_OPTION (cur_tree));
24465 }
24466 else
24467 {
24468 rs6000_cpu_index = rs6000_tune_index = -1;
24469 if (!rs6000_inner_target_options (args, false)
24470 || !rs6000_option_override_internal (false)
24471 || (cur_tree = build_target_option_node (&global_options,
24472 &global_options_set))
24473 == NULL_TREE)
24474 {
24475 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
24476 fprintf (stderr, "invalid pragma\n");
24477
24478 return false;
24479 }
24480 }
24481
24482 target_option_current_node = cur_tree;
24483 rs6000_activate_target_options (target_option_current_node);
24484
24485 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
24486 change the macros that are defined. */
24487 if (rs6000_target_modify_macros_ptr)
24488 {
24489 prev_opt = TREE_TARGET_OPTION (prev_tree);
24490 prev_bumask = prev_opt->x_rs6000_builtin_mask;
24491 prev_flags = prev_opt->x_rs6000_isa_flags;
24492
24493 cur_opt = TREE_TARGET_OPTION (cur_tree);
24494 cur_flags = cur_opt->x_rs6000_isa_flags;
24495 cur_bumask = cur_opt->x_rs6000_builtin_mask;
24496
24497 diff_bumask = (prev_bumask ^ cur_bumask);
24498 diff_flags = (prev_flags ^ cur_flags);
24499
24500 if ((diff_flags != 0) || (diff_bumask != 0))
24501 {
24502 /* Delete old macros. */
24503 rs6000_target_modify_macros_ptr (false,
24504 prev_flags & diff_flags,
24505 prev_bumask & diff_bumask);
24506
24507 /* Define new macros. */
24508 rs6000_target_modify_macros_ptr (true,
24509 cur_flags & diff_flags,
24510 cur_bumask & diff_bumask);
24511 }
24512 }
24513
24514 return true;
24515 }
24516
24517 \f
24518 /* Remember the last target of rs6000_set_current_function. */
24519 static GTY(()) tree rs6000_previous_fndecl;
24520
24521 /* Restore target's globals from NEW_TREE and invalidate the
24522 rs6000_previous_fndecl cache. */
24523
24524 void
24525 rs6000_activate_target_options (tree new_tree)
24526 {
24527 cl_target_option_restore (&global_options, &global_options_set,
24528 TREE_TARGET_OPTION (new_tree));
24529 if (TREE_TARGET_GLOBALS (new_tree))
24530 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
24531 else if (new_tree == target_option_default_node)
24532 restore_target_globals (&default_target_globals);
24533 else
24534 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
24535 rs6000_previous_fndecl = NULL_TREE;
24536 }
24537
24538 /* Establish appropriate back-end context for processing the function
24539 FNDECL. The argument might be NULL to indicate processing at top
24540 level, outside of any function scope. */
24541 static void
24542 rs6000_set_current_function (tree fndecl)
24543 {
24544 if (TARGET_DEBUG_TARGET)
24545 {
24546 fprintf (stderr, "\n==================== rs6000_set_current_function");
24547
24548 if (fndecl)
24549 fprintf (stderr, ", fndecl %s (%p)",
24550 (DECL_NAME (fndecl)
24551 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
24552 : "<unknown>"), (void *)fndecl);
24553
24554 if (rs6000_previous_fndecl)
24555 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
24556
24557 fprintf (stderr, "\n");
24558 }
24559
24560 /* Only change the context if the function changes. This hook is called
24561 several times in the course of compiling a function, and we don't want to
24562 slow things down too much or call target_reinit when it isn't safe. */
24563 if (fndecl == rs6000_previous_fndecl)
24564 return;
24565
24566 tree old_tree;
24567 if (rs6000_previous_fndecl == NULL_TREE)
24568 old_tree = target_option_current_node;
24569 else if (DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl))
24570 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl);
24571 else
24572 old_tree = target_option_default_node;
24573
24574 tree new_tree;
24575 if (fndecl == NULL_TREE)
24576 {
24577 if (old_tree != target_option_current_node)
24578 new_tree = target_option_current_node;
24579 else
24580 new_tree = NULL_TREE;
24581 }
24582 else
24583 {
24584 new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
24585 if (new_tree == NULL_TREE)
24586 new_tree = target_option_default_node;
24587 }
24588
24589 if (TARGET_DEBUG_TARGET)
24590 {
24591 if (new_tree)
24592 {
24593 fprintf (stderr, "\nnew fndecl target specific options:\n");
24594 debug_tree (new_tree);
24595 }
24596
24597 if (old_tree)
24598 {
24599 fprintf (stderr, "\nold fndecl target specific options:\n");
24600 debug_tree (old_tree);
24601 }
24602
24603 if (old_tree != NULL_TREE || new_tree != NULL_TREE)
24604 fprintf (stderr, "--------------------\n");
24605 }
24606
24607 if (new_tree && old_tree != new_tree)
24608 rs6000_activate_target_options (new_tree);
24609
24610 if (fndecl)
24611 rs6000_previous_fndecl = fndecl;
24612 }
24613
24614 \f
24615 /* Save the current options */
24616
24617 static void
24618 rs6000_function_specific_save (struct cl_target_option *ptr,
24619 struct gcc_options *opts,
24620 struct gcc_options */* opts_set */)
24621 {
24622 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
24623 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
24624 }
24625
24626 /* Restore the current options */
24627
24628 static void
24629 rs6000_function_specific_restore (struct gcc_options *opts,
24630 struct gcc_options */* opts_set */,
24631 struct cl_target_option *ptr)
24632
24633 {
24634 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
24635 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
24636 (void) rs6000_option_override_internal (false);
24637 }
24638
24639 /* Print the current options */
24640
24641 static void
24642 rs6000_function_specific_print (FILE *file, int indent,
24643 struct cl_target_option *ptr)
24644 {
24645 rs6000_print_isa_options (file, indent, "Isa options set",
24646 ptr->x_rs6000_isa_flags);
24647
24648 rs6000_print_isa_options (file, indent, "Isa options explicit",
24649 ptr->x_rs6000_isa_flags_explicit);
24650 }
24651
24652 /* Helper function to print the current isa or misc options on a line. */
24653
24654 static void
24655 rs6000_print_options_internal (FILE *file,
24656 int indent,
24657 const char *string,
24658 HOST_WIDE_INT flags,
24659 const char *prefix,
24660 const struct rs6000_opt_mask *opts,
24661 size_t num_elements)
24662 {
24663 size_t i;
24664 size_t start_column = 0;
24665 size_t cur_column;
24666 size_t max_column = 120;
24667 size_t prefix_len = strlen (prefix);
24668 size_t comma_len = 0;
24669 const char *comma = "";
24670
24671 if (indent)
24672 start_column += fprintf (file, "%*s", indent, "");
24673
24674 if (!flags)
24675 {
24676 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
24677 return;
24678 }
24679
24680 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
24681
24682 /* Print the various mask options. */
24683 cur_column = start_column;
24684 for (i = 0; i < num_elements; i++)
24685 {
24686 bool invert = opts[i].invert;
24687 const char *name = opts[i].name;
24688 const char *no_str = "";
24689 HOST_WIDE_INT mask = opts[i].mask;
24690 size_t len = comma_len + prefix_len + strlen (name);
24691
24692 if (!invert)
24693 {
24694 if ((flags & mask) == 0)
24695 {
24696 no_str = "no-";
24697 len += strlen ("no-");
24698 }
24699
24700 flags &= ~mask;
24701 }
24702
24703 else
24704 {
24705 if ((flags & mask) != 0)
24706 {
24707 no_str = "no-";
24708 len += strlen ("no-");
24709 }
24710
24711 flags |= mask;
24712 }
24713
24714 cur_column += len;
24715 if (cur_column > max_column)
24716 {
24717 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
24718 cur_column = start_column + len;
24719 comma = "";
24720 }
24721
24722 fprintf (file, "%s%s%s%s", comma, prefix, no_str, name);
24723 comma = ", ";
24724 comma_len = strlen (", ");
24725 }
24726
24727 fputs ("\n", file);
24728 }
24729
24730 /* Helper function to print the current isa options on a line. */
24731
24732 static void
24733 rs6000_print_isa_options (FILE *file, int indent, const char *string,
24734 HOST_WIDE_INT flags)
24735 {
24736 rs6000_print_options_internal (file, indent, string, flags, "-m",
24737 &rs6000_opt_masks[0],
24738 ARRAY_SIZE (rs6000_opt_masks));
24739 }
24740
24741 static void
24742 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
24743 HOST_WIDE_INT flags)
24744 {
24745 rs6000_print_options_internal (file, indent, string, flags, "",
24746 &rs6000_builtin_mask_names[0],
24747 ARRAY_SIZE (rs6000_builtin_mask_names));
24748 }
24749
24750 /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06,
24751 2.07, and 3.0 options that relate to the vector unit (-mdirect-move,
24752 -mupper-regs-df, etc.).
24753
24754 If the user used -mno-power8-vector, we need to turn off all of the implicit
24755 ISA 2.07 and 3.0 options that relate to the vector unit.
24756
24757 If the user used -mno-power9-vector, we need to turn off all of the implicit
24758 ISA 3.0 options that relate to the vector unit.
24759
24760 This function does not handle explicit options such as the user specifying
24761 -mdirect-move. These are handled in rs6000_option_override_internal, and
24762 the appropriate error is given if needed.
24763
24764 We return a mask of all of the implicit options that should not be enabled
24765 by default. */
24766
24767 static HOST_WIDE_INT
24768 rs6000_disable_incompatible_switches (void)
24769 {
24770 HOST_WIDE_INT ignore_masks = rs6000_isa_flags_explicit;
24771 size_t i, j;
24772
24773 static const struct {
24774 const HOST_WIDE_INT no_flag; /* flag explicitly turned off. */
24775 const HOST_WIDE_INT dep_flags; /* flags that depend on this option. */
24776 const char *const name; /* name of the switch. */
24777 } flags[] = {
24778 { OPTION_MASK_P9_VECTOR, OTHER_P9_VECTOR_MASKS, "power9-vector" },
24779 { OPTION_MASK_P8_VECTOR, OTHER_P8_VECTOR_MASKS, "power8-vector" },
24780 { OPTION_MASK_VSX, OTHER_VSX_VECTOR_MASKS, "vsx" },
24781 { OPTION_MASK_ALTIVEC, OTHER_ALTIVEC_MASKS, "altivec" },
24782 };
24783
24784 for (i = 0; i < ARRAY_SIZE (flags); i++)
24785 {
24786 HOST_WIDE_INT no_flag = flags[i].no_flag;
24787
24788 if ((rs6000_isa_flags & no_flag) == 0
24789 && (rs6000_isa_flags_explicit & no_flag) != 0)
24790 {
24791 HOST_WIDE_INT dep_flags = flags[i].dep_flags;
24792 HOST_WIDE_INT set_flags = (rs6000_isa_flags_explicit
24793 & rs6000_isa_flags
24794 & dep_flags);
24795
24796 if (set_flags)
24797 {
24798 for (j = 0; j < ARRAY_SIZE (rs6000_opt_masks); j++)
24799 if ((set_flags & rs6000_opt_masks[j].mask) != 0)
24800 {
24801 set_flags &= ~rs6000_opt_masks[j].mask;
24802 error ("%<-mno-%s%> turns off %<-m%s%>",
24803 flags[i].name,
24804 rs6000_opt_masks[j].name);
24805 }
24806
24807 gcc_assert (!set_flags);
24808 }
24809
24810 rs6000_isa_flags &= ~dep_flags;
24811 ignore_masks |= no_flag | dep_flags;
24812 }
24813 }
24814
24815 return ignore_masks;
24816 }
24817
24818 \f
24819 /* Helper function for printing the function name when debugging. */
24820
24821 static const char *
24822 get_decl_name (tree fn)
24823 {
24824 tree name;
24825
24826 if (!fn)
24827 return "<null>";
24828
24829 name = DECL_NAME (fn);
24830 if (!name)
24831 return "<no-name>";
24832
24833 return IDENTIFIER_POINTER (name);
24834 }
24835
24836 /* Return the clone id of the target we are compiling code for in a target
24837 clone. The clone id is ordered from 0 (default) to CLONE_MAX-1 and gives
24838 the priority list for the target clones (ordered from lowest to
24839 highest). */
24840
24841 static int
24842 rs6000_clone_priority (tree fndecl)
24843 {
24844 tree fn_opts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
24845 HOST_WIDE_INT isa_masks;
24846 int ret = CLONE_DEFAULT;
24847 tree attrs = lookup_attribute ("target", DECL_ATTRIBUTES (fndecl));
24848 const char *attrs_str = NULL;
24849
24850 attrs = TREE_VALUE (TREE_VALUE (attrs));
24851 attrs_str = TREE_STRING_POINTER (attrs);
24852
24853 /* Return priority zero for default function. Return the ISA needed for the
24854 function if it is not the default. */
24855 if (strcmp (attrs_str, "default") != 0)
24856 {
24857 if (fn_opts == NULL_TREE)
24858 fn_opts = target_option_default_node;
24859
24860 if (!fn_opts || !TREE_TARGET_OPTION (fn_opts))
24861 isa_masks = rs6000_isa_flags;
24862 else
24863 isa_masks = TREE_TARGET_OPTION (fn_opts)->x_rs6000_isa_flags;
24864
24865 for (ret = CLONE_MAX - 1; ret != 0; ret--)
24866 if ((rs6000_clone_map[ret].isa_mask & isa_masks) != 0)
24867 break;
24868 }
24869
24870 if (TARGET_DEBUG_TARGET)
24871 fprintf (stderr, "rs6000_get_function_version_priority (%s) => %d\n",
24872 get_decl_name (fndecl), ret);
24873
24874 return ret;
24875 }
24876
24877 /* This compares the priority of target features in function DECL1 and DECL2.
24878 It returns positive value if DECL1 is higher priority, negative value if
24879 DECL2 is higher priority and 0 if they are the same. Note, priorities are
24880 ordered from lowest (CLONE_DEFAULT) to highest (currently CLONE_ISA_3_0). */
24881
24882 static int
24883 rs6000_compare_version_priority (tree decl1, tree decl2)
24884 {
24885 int priority1 = rs6000_clone_priority (decl1);
24886 int priority2 = rs6000_clone_priority (decl2);
24887 int ret = priority1 - priority2;
24888
24889 if (TARGET_DEBUG_TARGET)
24890 fprintf (stderr, "rs6000_compare_version_priority (%s, %s) => %d\n",
24891 get_decl_name (decl1), get_decl_name (decl2), ret);
24892
24893 return ret;
24894 }
24895
24896 /* Make a dispatcher declaration for the multi-versioned function DECL.
24897 Calls to DECL function will be replaced with calls to the dispatcher
24898 by the front-end. Returns the decl of the dispatcher function. */
24899
24900 static tree
24901 rs6000_get_function_versions_dispatcher (void *decl)
24902 {
24903 tree fn = (tree) decl;
24904 struct cgraph_node *node = NULL;
24905 struct cgraph_node *default_node = NULL;
24906 struct cgraph_function_version_info *node_v = NULL;
24907 struct cgraph_function_version_info *first_v = NULL;
24908
24909 tree dispatch_decl = NULL;
24910
24911 struct cgraph_function_version_info *default_version_info = NULL;
24912 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
24913
24914 if (TARGET_DEBUG_TARGET)
24915 fprintf (stderr, "rs6000_get_function_versions_dispatcher (%s)\n",
24916 get_decl_name (fn));
24917
24918 node = cgraph_node::get (fn);
24919 gcc_assert (node != NULL);
24920
24921 node_v = node->function_version ();
24922 gcc_assert (node_v != NULL);
24923
24924 if (node_v->dispatcher_resolver != NULL)
24925 return node_v->dispatcher_resolver;
24926
24927 /* Find the default version and make it the first node. */
24928 first_v = node_v;
24929 /* Go to the beginning of the chain. */
24930 while (first_v->prev != NULL)
24931 first_v = first_v->prev;
24932
24933 default_version_info = first_v;
24934 while (default_version_info != NULL)
24935 {
24936 const tree decl2 = default_version_info->this_node->decl;
24937 if (is_function_default_version (decl2))
24938 break;
24939 default_version_info = default_version_info->next;
24940 }
24941
24942 /* If there is no default node, just return NULL. */
24943 if (default_version_info == NULL)
24944 return NULL;
24945
24946 /* Make default info the first node. */
24947 if (first_v != default_version_info)
24948 {
24949 default_version_info->prev->next = default_version_info->next;
24950 if (default_version_info->next)
24951 default_version_info->next->prev = default_version_info->prev;
24952 first_v->prev = default_version_info;
24953 default_version_info->next = first_v;
24954 default_version_info->prev = NULL;
24955 }
24956
24957 default_node = default_version_info->this_node;
24958
24959 #ifndef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
24960 error_at (DECL_SOURCE_LOCATION (default_node->decl),
24961 "%<target_clones%> attribute needs GLIBC (2.23 and newer) that "
24962 "exports hardware capability bits");
24963 #else
24964
24965 if (targetm.has_ifunc_p ())
24966 {
24967 struct cgraph_function_version_info *it_v = NULL;
24968 struct cgraph_node *dispatcher_node = NULL;
24969 struct cgraph_function_version_info *dispatcher_version_info = NULL;
24970
24971 /* Right now, the dispatching is done via ifunc. */
24972 dispatch_decl = make_dispatcher_decl (default_node->decl);
24973
24974 dispatcher_node = cgraph_node::get_create (dispatch_decl);
24975 gcc_assert (dispatcher_node != NULL);
24976 dispatcher_node->dispatcher_function = 1;
24977 dispatcher_version_info
24978 = dispatcher_node->insert_new_function_version ();
24979 dispatcher_version_info->next = default_version_info;
24980 dispatcher_node->definition = 1;
24981
24982 /* Set the dispatcher for all the versions. */
24983 it_v = default_version_info;
24984 while (it_v != NULL)
24985 {
24986 it_v->dispatcher_resolver = dispatch_decl;
24987 it_v = it_v->next;
24988 }
24989 }
24990 else
24991 {
24992 error_at (DECL_SOURCE_LOCATION (default_node->decl),
24993 "multiversioning needs %<ifunc%> which is not supported "
24994 "on this target");
24995 }
24996 #endif
24997
24998 return dispatch_decl;
24999 }
25000
25001 /* Make the resolver function decl to dispatch the versions of a multi-
25002 versioned function, DEFAULT_DECL. Create an empty basic block in the
25003 resolver and store the pointer in EMPTY_BB. Return the decl of the resolver
25004 function. */
25005
25006 static tree
25007 make_resolver_func (const tree default_decl,
25008 const tree dispatch_decl,
25009 basic_block *empty_bb)
25010 {
25011 /* Make the resolver function static. The resolver function returns
25012 void *. */
25013 tree decl_name = clone_function_name (default_decl, "resolver");
25014 const char *resolver_name = IDENTIFIER_POINTER (decl_name);
25015 tree type = build_function_type_list (ptr_type_node, NULL_TREE);
25016 tree decl = build_fn_decl (resolver_name, type);
25017 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
25018
25019 DECL_NAME (decl) = decl_name;
25020 TREE_USED (decl) = 1;
25021 DECL_ARTIFICIAL (decl) = 1;
25022 DECL_IGNORED_P (decl) = 0;
25023 TREE_PUBLIC (decl) = 0;
25024 DECL_UNINLINABLE (decl) = 1;
25025
25026 /* Resolver is not external, body is generated. */
25027 DECL_EXTERNAL (decl) = 0;
25028 DECL_EXTERNAL (dispatch_decl) = 0;
25029
25030 DECL_CONTEXT (decl) = NULL_TREE;
25031 DECL_INITIAL (decl) = make_node (BLOCK);
25032 DECL_STATIC_CONSTRUCTOR (decl) = 0;
25033
25034 if (DECL_COMDAT_GROUP (default_decl)
25035 || TREE_PUBLIC (default_decl))
25036 {
25037 /* In this case, each translation unit with a call to this
25038 versioned function will put out a resolver. Ensure it
25039 is comdat to keep just one copy. */
25040 DECL_COMDAT (decl) = 1;
25041 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
25042 }
25043 else
25044 TREE_PUBLIC (dispatch_decl) = 0;
25045
25046 /* Build result decl and add to function_decl. */
25047 tree t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
25048 DECL_CONTEXT (t) = decl;
25049 DECL_ARTIFICIAL (t) = 1;
25050 DECL_IGNORED_P (t) = 1;
25051 DECL_RESULT (decl) = t;
25052
25053 gimplify_function_tree (decl);
25054 push_cfun (DECL_STRUCT_FUNCTION (decl));
25055 *empty_bb = init_lowered_empty_function (decl, false,
25056 profile_count::uninitialized ());
25057
25058 cgraph_node::add_new_function (decl, true);
25059 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
25060
25061 pop_cfun ();
25062
25063 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
25064 DECL_ATTRIBUTES (dispatch_decl)
25065 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
25066
25067 cgraph_node::create_same_body_alias (dispatch_decl, decl);
25068
25069 return decl;
25070 }
25071
25072 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL to
25073 return a pointer to VERSION_DECL if we are running on a machine that
25074 supports the index CLONE_ISA hardware architecture bits. This function will
25075 be called during version dispatch to decide which function version to
25076 execute. It returns the basic block at the end, to which more conditions
25077 can be added. */
25078
25079 static basic_block
25080 add_condition_to_bb (tree function_decl, tree version_decl,
25081 int clone_isa, basic_block new_bb)
25082 {
25083 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
25084
25085 gcc_assert (new_bb != NULL);
25086 gimple_seq gseq = bb_seq (new_bb);
25087
25088
25089 tree convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
25090 build_fold_addr_expr (version_decl));
25091 tree result_var = create_tmp_var (ptr_type_node);
25092 gimple *convert_stmt = gimple_build_assign (result_var, convert_expr);
25093 gimple *return_stmt = gimple_build_return (result_var);
25094
25095 if (clone_isa == CLONE_DEFAULT)
25096 {
25097 gimple_seq_add_stmt (&gseq, convert_stmt);
25098 gimple_seq_add_stmt (&gseq, return_stmt);
25099 set_bb_seq (new_bb, gseq);
25100 gimple_set_bb (convert_stmt, new_bb);
25101 gimple_set_bb (return_stmt, new_bb);
25102 pop_cfun ();
25103 return new_bb;
25104 }
25105
25106 tree bool_zero = build_int_cst (bool_int_type_node, 0);
25107 tree cond_var = create_tmp_var (bool_int_type_node);
25108 tree predicate_decl = rs6000_builtin_decls[(int) RS6000_BIF_CPU_SUPPORTS];
25109 const char *arg_str = rs6000_clone_map[clone_isa].name;
25110 tree predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
25111 gimple *call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
25112 gimple_call_set_lhs (call_cond_stmt, cond_var);
25113
25114 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
25115 gimple_set_bb (call_cond_stmt, new_bb);
25116 gimple_seq_add_stmt (&gseq, call_cond_stmt);
25117
25118 gimple *if_else_stmt = gimple_build_cond (NE_EXPR, cond_var, bool_zero,
25119 NULL_TREE, NULL_TREE);
25120 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
25121 gimple_set_bb (if_else_stmt, new_bb);
25122 gimple_seq_add_stmt (&gseq, if_else_stmt);
25123
25124 gimple_seq_add_stmt (&gseq, convert_stmt);
25125 gimple_seq_add_stmt (&gseq, return_stmt);
25126 set_bb_seq (new_bb, gseq);
25127
25128 basic_block bb1 = new_bb;
25129 edge e12 = split_block (bb1, if_else_stmt);
25130 basic_block bb2 = e12->dest;
25131 e12->flags &= ~EDGE_FALLTHRU;
25132 e12->flags |= EDGE_TRUE_VALUE;
25133
25134 edge e23 = split_block (bb2, return_stmt);
25135 gimple_set_bb (convert_stmt, bb2);
25136 gimple_set_bb (return_stmt, bb2);
25137
25138 basic_block bb3 = e23->dest;
25139 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
25140
25141 remove_edge (e23);
25142 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
25143
25144 pop_cfun ();
25145 return bb3;
25146 }
25147
25148 /* This function generates the dispatch function for multi-versioned functions.
25149 DISPATCH_DECL is the function which will contain the dispatch logic.
25150 FNDECLS are the function choices for dispatch, and is a tree chain.
25151 EMPTY_BB is the basic block pointer in DISPATCH_DECL in which the dispatch
25152 code is generated. */
25153
25154 static int
25155 dispatch_function_versions (tree dispatch_decl,
25156 void *fndecls_p,
25157 basic_block *empty_bb)
25158 {
25159 int ix;
25160 tree ele;
25161 vec<tree> *fndecls;
25162 tree clones[CLONE_MAX];
25163
25164 if (TARGET_DEBUG_TARGET)
25165 fputs ("dispatch_function_versions, top\n", stderr);
25166
25167 gcc_assert (dispatch_decl != NULL
25168 && fndecls_p != NULL
25169 && empty_bb != NULL);
25170
25171 /* fndecls_p is actually a vector. */
25172 fndecls = static_cast<vec<tree> *> (fndecls_p);
25173
25174 /* At least one more version other than the default. */
25175 gcc_assert (fndecls->length () >= 2);
25176
25177 /* The first version in the vector is the default decl. */
25178 memset ((void *) clones, '\0', sizeof (clones));
25179 clones[CLONE_DEFAULT] = (*fndecls)[0];
25180
25181 /* On the PowerPC, we do not need to call __builtin_cpu_init, which is a NOP
25182 on the PowerPC (on the x86_64, it is not a NOP). The builtin function
25183 __builtin_cpu_support ensures that the TOC fields are setup by requiring a
25184 recent glibc. If we ever need to call __builtin_cpu_init, we would need
25185 to insert the code here to do the call. */
25186
25187 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
25188 {
25189 int priority = rs6000_clone_priority (ele);
25190 if (!clones[priority])
25191 clones[priority] = ele;
25192 }
25193
25194 for (ix = CLONE_MAX - 1; ix >= 0; ix--)
25195 if (clones[ix])
25196 {
25197 if (TARGET_DEBUG_TARGET)
25198 fprintf (stderr, "dispatch_function_versions, clone %d, %s\n",
25199 ix, get_decl_name (clones[ix]));
25200
25201 *empty_bb = add_condition_to_bb (dispatch_decl, clones[ix], ix,
25202 *empty_bb);
25203 }
25204
25205 return 0;
25206 }
25207
25208 /* Generate the dispatching code body to dispatch multi-versioned function
25209 DECL. The target hook is called to process the "target" attributes and
25210 provide the code to dispatch the right function at run-time. NODE points
25211 to the dispatcher decl whose body will be created. */
25212
25213 static tree
25214 rs6000_generate_version_dispatcher_body (void *node_p)
25215 {
25216 tree resolver;
25217 basic_block empty_bb;
25218 struct cgraph_node *node = (cgraph_node *) node_p;
25219 struct cgraph_function_version_info *ninfo = node->function_version ();
25220
25221 if (ninfo->dispatcher_resolver)
25222 return ninfo->dispatcher_resolver;
25223
25224 /* node is going to be an alias, so remove the finalized bit. */
25225 node->definition = false;
25226
25227 /* The first version in the chain corresponds to the default version. */
25228 ninfo->dispatcher_resolver = resolver
25229 = make_resolver_func (ninfo->next->this_node->decl, node->decl, &empty_bb);
25230
25231 if (TARGET_DEBUG_TARGET)
25232 fprintf (stderr, "rs6000_get_function_versions_dispatcher, %s\n",
25233 get_decl_name (resolver));
25234
25235 push_cfun (DECL_STRUCT_FUNCTION (resolver));
25236 auto_vec<tree, 2> fn_ver_vec;
25237
25238 for (struct cgraph_function_version_info *vinfo = ninfo->next;
25239 vinfo;
25240 vinfo = vinfo->next)
25241 {
25242 struct cgraph_node *version = vinfo->this_node;
25243 /* Check for virtual functions here again, as by this time it should
25244 have been determined if this function needs a vtable index or
25245 not. This happens for methods in derived classes that override
25246 virtual methods in base classes but are not explicitly marked as
25247 virtual. */
25248 if (DECL_VINDEX (version->decl))
25249 sorry ("Virtual function multiversioning not supported");
25250
25251 fn_ver_vec.safe_push (version->decl);
25252 }
25253
25254 dispatch_function_versions (resolver, &fn_ver_vec, &empty_bb);
25255 cgraph_edge::rebuild_edges ();
25256 pop_cfun ();
25257 return resolver;
25258 }
25259
25260 /* Hook to decide if we need to scan function gimple statements to
25261 collect target specific information for inlining, and update the
25262 corresponding RS6000_FN_TARGET_INFO_* bit in INFO if we are able
25263 to predict which ISA feature is used at this time. Return true
25264 if we need to scan, otherwise return false. */
25265
25266 static bool
25267 rs6000_need_ipa_fn_target_info (const_tree decl,
25268 unsigned int &info ATTRIBUTE_UNUSED)
25269 {
25270 tree target = DECL_FUNCTION_SPECIFIC_TARGET (decl);
25271 if (!target)
25272 target = target_option_default_node;
25273 struct cl_target_option *opts = TREE_TARGET_OPTION (target);
25274
25275 /* See PR102059, we only handle HTM for now, so will only do
25276 the consequent scannings when HTM feature enabled. */
25277 if (opts->x_rs6000_isa_flags & OPTION_MASK_HTM)
25278 return true;
25279
25280 return false;
25281 }
25282
25283 /* Hook to update target specific information INFO for inlining by
25284 checking the given STMT. Return false if we don't need to scan
25285 any more, otherwise return true. */
25286
25287 static bool
25288 rs6000_update_ipa_fn_target_info (unsigned int &info, const gimple *stmt)
25289 {
25290 /* Assume inline asm can use any instruction features. */
25291 if (gimple_code (stmt) == GIMPLE_ASM)
25292 {
25293 /* Should set any bits we concerned, for now OPTION_MASK_HTM is
25294 the only bit we care about. */
25295 info |= RS6000_FN_TARGET_INFO_HTM;
25296 return false;
25297 }
25298 else if (gimple_code (stmt) == GIMPLE_CALL)
25299 {
25300 tree fndecl = gimple_call_fndecl (stmt);
25301 if (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD))
25302 {
25303 enum rs6000_gen_builtins fcode
25304 = (enum rs6000_gen_builtins) DECL_MD_FUNCTION_CODE (fndecl);
25305 /* HTM bifs definitely exploit HTM insns. */
25306 if (bif_is_htm (rs6000_builtin_info[fcode]))
25307 {
25308 info |= RS6000_FN_TARGET_INFO_HTM;
25309 return false;
25310 }
25311 }
25312 }
25313
25314 return true;
25315 }
25316
25317 /* Hook to determine if one function can safely inline another. */
25318
25319 static bool
25320 rs6000_can_inline_p (tree caller, tree callee)
25321 {
25322 bool ret = false;
25323 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
25324 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
25325
25326 /* If the callee has no option attributes, then it is ok to inline. */
25327 if (!callee_tree)
25328 ret = true;
25329
25330 else
25331 {
25332 HOST_WIDE_INT caller_isa;
25333 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
25334 HOST_WIDE_INT callee_isa = callee_opts->x_rs6000_isa_flags;
25335 HOST_WIDE_INT explicit_isa = callee_opts->x_rs6000_isa_flags_explicit;
25336
25337 /* If the caller has option attributes, then use them.
25338 Otherwise, use the command line options. */
25339 if (caller_tree)
25340 caller_isa = TREE_TARGET_OPTION (caller_tree)->x_rs6000_isa_flags;
25341 else
25342 caller_isa = rs6000_isa_flags;
25343
25344 cgraph_node *callee_node = cgraph_node::get (callee);
25345 if (ipa_fn_summaries && ipa_fn_summaries->get (callee_node) != NULL)
25346 {
25347 unsigned int info = ipa_fn_summaries->get (callee_node)->target_info;
25348 if ((info & RS6000_FN_TARGET_INFO_HTM) == 0)
25349 {
25350 callee_isa &= ~OPTION_MASK_HTM;
25351 explicit_isa &= ~OPTION_MASK_HTM;
25352 }
25353 }
25354
25355 /* Ignore -mpower8-fusion and -mpower10-fusion options for inlining
25356 purposes. */
25357 callee_isa &= ~(OPTION_MASK_P8_FUSION | OPTION_MASK_P10_FUSION);
25358 explicit_isa &= ~(OPTION_MASK_P8_FUSION | OPTION_MASK_P10_FUSION);
25359
25360 /* The callee's options must be a subset of the caller's options, i.e.
25361 a vsx function may inline an altivec function, but a no-vsx function
25362 must not inline a vsx function. However, for those options that the
25363 callee has explicitly enabled or disabled, then we must enforce that
25364 the callee's and caller's options match exactly; see PR70010. */
25365 if (((caller_isa & callee_isa) == callee_isa)
25366 && (caller_isa & explicit_isa) == (callee_isa & explicit_isa))
25367 ret = true;
25368 }
25369
25370 if (TARGET_DEBUG_TARGET)
25371 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
25372 get_decl_name (caller), get_decl_name (callee),
25373 (ret ? "can" : "cannot"));
25374
25375 return ret;
25376 }
25377 \f
25378 /* Allocate a stack temp and fixup the address so it meets the particular
25379 memory requirements (either offetable or REG+REG addressing). */
25380
25381 rtx
25382 rs6000_allocate_stack_temp (machine_mode mode,
25383 bool offsettable_p,
25384 bool reg_reg_p)
25385 {
25386 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
25387 rtx addr = XEXP (stack, 0);
25388 int strict_p = reload_completed;
25389
25390 if (!legitimate_indirect_address_p (addr, strict_p))
25391 {
25392 if (offsettable_p
25393 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
25394 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
25395
25396 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
25397 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
25398 }
25399
25400 return stack;
25401 }
25402
25403 /* Given a memory reference, if it is not a reg or reg+reg addressing,
25404 convert to such a form to deal with memory reference instructions
25405 like STFIWX and LDBRX that only take reg+reg addressing. */
25406
25407 rtx
25408 rs6000_force_indexed_or_indirect_mem (rtx x)
25409 {
25410 machine_mode mode = GET_MODE (x);
25411
25412 gcc_assert (MEM_P (x));
25413 if (can_create_pseudo_p () && !indexed_or_indirect_operand (x, mode))
25414 {
25415 rtx addr = XEXP (x, 0);
25416 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
25417 {
25418 rtx reg = XEXP (addr, 0);
25419 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
25420 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
25421 gcc_assert (REG_P (reg));
25422 emit_insn (gen_add3_insn (reg, reg, size_rtx));
25423 addr = reg;
25424 }
25425 else if (GET_CODE (addr) == PRE_MODIFY)
25426 {
25427 rtx reg = XEXP (addr, 0);
25428 rtx expr = XEXP (addr, 1);
25429 gcc_assert (REG_P (reg));
25430 gcc_assert (GET_CODE (expr) == PLUS);
25431 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
25432 addr = reg;
25433 }
25434
25435 if (GET_CODE (addr) == PLUS)
25436 {
25437 rtx op0 = XEXP (addr, 0);
25438 rtx op1 = XEXP (addr, 1);
25439 op0 = force_reg (Pmode, op0);
25440 op1 = force_reg (Pmode, op1);
25441 x = replace_equiv_address (x, gen_rtx_PLUS (Pmode, op0, op1));
25442 }
25443 else
25444 x = replace_equiv_address (x, force_reg (Pmode, addr));
25445 }
25446
25447 return x;
25448 }
25449
25450 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
25451
25452 On the RS/6000, all integer constants are acceptable, most won't be valid
25453 for particular insns, though. Only easy FP constants are acceptable. */
25454
25455 static bool
25456 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
25457 {
25458 if (TARGET_ELF && tls_referenced_p (x))
25459 return false;
25460
25461 if (CONST_DOUBLE_P (x))
25462 return easy_fp_constant (x, mode);
25463
25464 if (GET_CODE (x) == CONST_VECTOR)
25465 return easy_vector_constant (x, mode);
25466
25467 return true;
25468 }
25469
25470 #if TARGET_AIX_OS
25471 /* Implement TARGET_PRECOMPUTE_TLS_P.
25472
25473 On the AIX, TLS symbols are in the TOC, which is maintained in the
25474 constant pool. AIX TOC TLS symbols need to be pre-computed, but
25475 must be considered legitimate constants. */
25476
25477 static bool
25478 rs6000_aix_precompute_tls_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
25479 {
25480 return tls_referenced_p (x);
25481 }
25482 #endif
25483
25484 \f
25485 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
25486
25487 static bool
25488 chain_already_loaded (rtx_insn *last)
25489 {
25490 for (; last != NULL; last = PREV_INSN (last))
25491 {
25492 if (NONJUMP_INSN_P (last))
25493 {
25494 rtx patt = PATTERN (last);
25495
25496 if (GET_CODE (patt) == SET)
25497 {
25498 rtx lhs = XEXP (patt, 0);
25499
25500 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
25501 return true;
25502 }
25503 }
25504 }
25505 return false;
25506 }
25507
25508 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
25509
25510 void
25511 rs6000_call_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
25512 {
25513 rtx func = func_desc;
25514 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
25515 rtx toc_load = NULL_RTX;
25516 rtx toc_restore = NULL_RTX;
25517 rtx func_addr;
25518 rtx abi_reg = NULL_RTX;
25519 rtx call[5];
25520 int n_call;
25521 rtx insn;
25522 bool is_pltseq_longcall;
25523
25524 if (global_tlsarg)
25525 tlsarg = global_tlsarg;
25526
25527 /* Handle longcall attributes. */
25528 is_pltseq_longcall = false;
25529 if ((INTVAL (cookie) & CALL_LONG) != 0
25530 && GET_CODE (func_desc) == SYMBOL_REF)
25531 {
25532 func = rs6000_longcall_ref (func_desc, tlsarg);
25533 if (TARGET_PLTSEQ)
25534 is_pltseq_longcall = true;
25535 }
25536
25537 /* Handle indirect calls. */
25538 if (!SYMBOL_REF_P (func)
25539 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func)))
25540 {
25541 if (!rs6000_pcrel_p ())
25542 {
25543 /* Save the TOC into its reserved slot before the call,
25544 and prepare to restore it after the call. */
25545 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
25546 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
25547 gen_rtvec (1, stack_toc_offset),
25548 UNSPEC_TOCSLOT);
25549 toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
25550
25551 /* Can we optimize saving the TOC in the prologue or
25552 do we need to do it at every call? */
25553 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
25554 cfun->machine->save_toc_in_prologue = true;
25555 else
25556 {
25557 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
25558 rtx stack_toc_mem = gen_frame_mem (Pmode,
25559 gen_rtx_PLUS (Pmode, stack_ptr,
25560 stack_toc_offset));
25561 MEM_VOLATILE_P (stack_toc_mem) = 1;
25562 if (is_pltseq_longcall)
25563 {
25564 rtvec v = gen_rtvec (3, toc_reg, func_desc, tlsarg);
25565 rtx mark_toc_reg = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25566 emit_insn (gen_rtx_SET (stack_toc_mem, mark_toc_reg));
25567 }
25568 else
25569 emit_move_insn (stack_toc_mem, toc_reg);
25570 }
25571 }
25572
25573 if (DEFAULT_ABI == ABI_ELFv2)
25574 {
25575 /* A function pointer in the ELFv2 ABI is just a plain address, but
25576 the ABI requires it to be loaded into r12 before the call. */
25577 func_addr = gen_rtx_REG (Pmode, 12);
25578 emit_move_insn (func_addr, func);
25579 abi_reg = func_addr;
25580 /* Indirect calls via CTR are strongly preferred over indirect
25581 calls via LR, so move the address there. Needed to mark
25582 this insn for linker plt sequence editing too. */
25583 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
25584 if (is_pltseq_longcall)
25585 {
25586 rtvec v = gen_rtvec (3, abi_reg, func_desc, tlsarg);
25587 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25588 emit_insn (gen_rtx_SET (func_addr, mark_func));
25589 v = gen_rtvec (2, func_addr, func_desc);
25590 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25591 }
25592 else
25593 emit_move_insn (func_addr, abi_reg);
25594 }
25595 else
25596 {
25597 /* A function pointer under AIX is a pointer to a data area whose
25598 first word contains the actual address of the function, whose
25599 second word contains a pointer to its TOC, and whose third word
25600 contains a value to place in the static chain register (r11).
25601 Note that if we load the static chain, our "trampoline" need
25602 not have any executable code. */
25603
25604 /* Load up address of the actual function. */
25605 func = force_reg (Pmode, func);
25606 func_addr = gen_reg_rtx (Pmode);
25607 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func));
25608
25609 /* Indirect calls via CTR are strongly preferred over indirect
25610 calls via LR, so move the address there. */
25611 rtx ctr_reg = gen_rtx_REG (Pmode, CTR_REGNO);
25612 emit_move_insn (ctr_reg, func_addr);
25613 func_addr = ctr_reg;
25614
25615 /* Prepare to load the TOC of the called function. Note that the
25616 TOC load must happen immediately before the actual call so
25617 that unwinding the TOC registers works correctly. See the
25618 comment in frob_update_context. */
25619 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
25620 rtx func_toc_mem = gen_rtx_MEM (Pmode,
25621 gen_rtx_PLUS (Pmode, func,
25622 func_toc_offset));
25623 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
25624
25625 /* If we have a static chain, load it up. But, if the call was
25626 originally direct, the 3rd word has not been written since no
25627 trampoline has been built, so we ought not to load it, lest we
25628 override a static chain value. */
25629 if (!(GET_CODE (func_desc) == SYMBOL_REF
25630 && SYMBOL_REF_FUNCTION_P (func_desc))
25631 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
25632 && !chain_already_loaded (get_current_sequence ()->next->last))
25633 {
25634 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
25635 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
25636 rtx func_sc_mem = gen_rtx_MEM (Pmode,
25637 gen_rtx_PLUS (Pmode, func,
25638 func_sc_offset));
25639 emit_move_insn (sc_reg, func_sc_mem);
25640 abi_reg = sc_reg;
25641 }
25642 }
25643 }
25644 else
25645 {
25646 /* No TOC register needed for calls from PC-relative callers. */
25647 if (!rs6000_pcrel_p ())
25648 /* Direct calls use the TOC: for local calls, the callee will
25649 assume the TOC register is set; for non-local calls, the
25650 PLT stub needs the TOC register. */
25651 abi_reg = toc_reg;
25652 func_addr = func;
25653 }
25654
25655 /* Create the call. */
25656 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
25657 if (value != NULL_RTX)
25658 call[0] = gen_rtx_SET (value, call[0]);
25659 call[1] = gen_rtx_USE (VOIDmode, cookie);
25660 n_call = 2;
25661
25662 if (toc_load)
25663 call[n_call++] = toc_load;
25664 if (toc_restore)
25665 call[n_call++] = toc_restore;
25666
25667 call[n_call++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
25668
25669 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
25670 insn = emit_call_insn (insn);
25671
25672 /* Mention all registers defined by the ABI to hold information
25673 as uses in CALL_INSN_FUNCTION_USAGE. */
25674 if (abi_reg)
25675 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
25676 }
25677
25678 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
25679
25680 void
25681 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
25682 {
25683 rtx call[2];
25684 rtx insn;
25685 rtx r12 = NULL_RTX;
25686 rtx func_addr = func_desc;
25687
25688 if (global_tlsarg)
25689 tlsarg = global_tlsarg;
25690
25691 /* Handle longcall attributes. */
25692 if (INTVAL (cookie) & CALL_LONG && SYMBOL_REF_P (func_desc))
25693 {
25694 /* PCREL can do a sibling call to a longcall function
25695 because we don't need to restore the TOC register. */
25696 gcc_assert (rs6000_pcrel_p ());
25697 func_desc = rs6000_longcall_ref (func_desc, tlsarg);
25698 }
25699 else
25700 gcc_assert (INTVAL (cookie) == 0);
25701
25702 /* For ELFv2, r12 and CTR need to hold the function address
25703 for an indirect call. */
25704 if (GET_CODE (func_desc) != SYMBOL_REF && DEFAULT_ABI == ABI_ELFv2)
25705 {
25706 r12 = gen_rtx_REG (Pmode, 12);
25707 emit_move_insn (r12, func_desc);
25708 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
25709 emit_move_insn (func_addr, r12);
25710 }
25711
25712 /* Create the call. */
25713 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
25714 if (value != NULL_RTX)
25715 call[0] = gen_rtx_SET (value, call[0]);
25716
25717 call[1] = simple_return_rtx;
25718
25719 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
25720 insn = emit_call_insn (insn);
25721
25722 /* Note use of the TOC register. */
25723 if (!rs6000_pcrel_p ())
25724 use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
25725 gen_rtx_REG (Pmode, TOC_REGNUM));
25726
25727 /* Note use of r12. */
25728 if (r12)
25729 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r12);
25730 }
25731
25732 /* Expand code to perform a call under the SYSV4 ABI. */
25733
25734 void
25735 rs6000_call_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
25736 {
25737 rtx func = func_desc;
25738 rtx func_addr;
25739 rtx call[4];
25740 rtx insn;
25741 rtx abi_reg = NULL_RTX;
25742 int n;
25743
25744 if (global_tlsarg)
25745 tlsarg = global_tlsarg;
25746
25747 /* Handle longcall attributes. */
25748 if ((INTVAL (cookie) & CALL_LONG) != 0
25749 && GET_CODE (func_desc) == SYMBOL_REF)
25750 {
25751 func = rs6000_longcall_ref (func_desc, tlsarg);
25752 /* If the longcall was implemented as an inline PLT call using
25753 PLT unspecs then func will be REG:r11. If not, func will be
25754 a pseudo reg. The inline PLT call sequence supports lazy
25755 linking (and longcalls to functions in dlopen'd libraries).
25756 The other style of longcalls don't. The lazy linking entry
25757 to the dynamic symbol resolver requires r11 be the function
25758 address (as it is for linker generated PLT stubs). Ensure
25759 r11 stays valid to the bctrl by marking r11 used by the call. */
25760 if (TARGET_PLTSEQ)
25761 abi_reg = func;
25762 }
25763
25764 /* Handle indirect calls. */
25765 if (GET_CODE (func) != SYMBOL_REF)
25766 {
25767 func = force_reg (Pmode, func);
25768
25769 /* Indirect calls via CTR are strongly preferred over indirect
25770 calls via LR, so move the address there. That can't be left
25771 to reload because we want to mark every instruction in an
25772 inline PLT call sequence with a reloc, enabling the linker to
25773 edit the sequence back to a direct call when that makes sense. */
25774 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
25775 if (abi_reg)
25776 {
25777 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
25778 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25779 emit_insn (gen_rtx_SET (func_addr, mark_func));
25780 v = gen_rtvec (2, func_addr, func_desc);
25781 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25782 }
25783 else
25784 emit_move_insn (func_addr, func);
25785 }
25786 else
25787 func_addr = func;
25788
25789 /* Create the call. */
25790 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
25791 if (value != NULL_RTX)
25792 call[0] = gen_rtx_SET (value, call[0]);
25793
25794 call[1] = gen_rtx_USE (VOIDmode, cookie);
25795 n = 2;
25796 if (TARGET_SECURE_PLT
25797 && flag_pic
25798 && GET_CODE (func_addr) == SYMBOL_REF
25799 && !SYMBOL_REF_LOCAL_P (func_addr))
25800 call[n++] = gen_rtx_USE (VOIDmode, pic_offset_table_rtx);
25801
25802 call[n++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
25803
25804 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n, call));
25805 insn = emit_call_insn (insn);
25806 if (abi_reg)
25807 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
25808 }
25809
25810 /* Expand code to perform a sibling call under the SysV4 ABI. */
25811
25812 void
25813 rs6000_sibcall_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
25814 {
25815 rtx func = func_desc;
25816 rtx func_addr;
25817 rtx call[3];
25818 rtx insn;
25819 rtx abi_reg = NULL_RTX;
25820
25821 if (global_tlsarg)
25822 tlsarg = global_tlsarg;
25823
25824 /* Handle longcall attributes. */
25825 if ((INTVAL (cookie) & CALL_LONG) != 0
25826 && GET_CODE (func_desc) == SYMBOL_REF)
25827 {
25828 func = rs6000_longcall_ref (func_desc, tlsarg);
25829 /* If the longcall was implemented as an inline PLT call using
25830 PLT unspecs then func will be REG:r11. If not, func will be
25831 a pseudo reg. The inline PLT call sequence supports lazy
25832 linking (and longcalls to functions in dlopen'd libraries).
25833 The other style of longcalls don't. The lazy linking entry
25834 to the dynamic symbol resolver requires r11 be the function
25835 address (as it is for linker generated PLT stubs). Ensure
25836 r11 stays valid to the bctr by marking r11 used by the call. */
25837 if (TARGET_PLTSEQ)
25838 abi_reg = func;
25839 }
25840
25841 /* Handle indirect calls. */
25842 if (GET_CODE (func) != SYMBOL_REF)
25843 {
25844 func = force_reg (Pmode, func);
25845
25846 /* Indirect sibcalls must go via CTR. That can't be left to
25847 reload because we want to mark every instruction in an inline
25848 PLT call sequence with a reloc, enabling the linker to edit
25849 the sequence back to a direct call when that makes sense. */
25850 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
25851 if (abi_reg)
25852 {
25853 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
25854 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25855 emit_insn (gen_rtx_SET (func_addr, mark_func));
25856 v = gen_rtvec (2, func_addr, func_desc);
25857 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25858 }
25859 else
25860 emit_move_insn (func_addr, func);
25861 }
25862 else
25863 func_addr = func;
25864
25865 /* Create the call. */
25866 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
25867 if (value != NULL_RTX)
25868 call[0] = gen_rtx_SET (value, call[0]);
25869
25870 call[1] = gen_rtx_USE (VOIDmode, cookie);
25871 call[2] = simple_return_rtx;
25872
25873 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
25874 insn = emit_call_insn (insn);
25875 if (abi_reg)
25876 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
25877 }
25878
25879 #if TARGET_MACHO
25880
25881 /* Expand code to perform a call under the Darwin ABI.
25882 Modulo handling of mlongcall, this is much the same as sysv.
25883 if/when the longcall optimisation is removed, we could drop this
25884 code and use the sysv case (taking care to avoid the tls stuff).
25885
25886 We can use this for sibcalls too, if needed. */
25887
25888 void
25889 rs6000_call_darwin_1 (rtx value, rtx func_desc, rtx tlsarg,
25890 rtx cookie, bool sibcall)
25891 {
25892 rtx func = func_desc;
25893 rtx func_addr;
25894 rtx call[3];
25895 rtx insn;
25896 int cookie_val = INTVAL (cookie);
25897 bool make_island = false;
25898
25899 /* Handle longcall attributes, there are two cases for Darwin:
25900 1) Newer linkers are capable of synthesising any branch islands needed.
25901 2) We need a helper branch island synthesised by the compiler.
25902 The second case has mostly been retired and we don't use it for m64.
25903 In fact, it's is an optimisation, we could just indirect as sysv does..
25904 ... however, backwards compatibility for now.
25905 If we're going to use this, then we need to keep the CALL_LONG bit set,
25906 so that we can pick up the special insn form later. */
25907 if ((cookie_val & CALL_LONG) != 0
25908 && GET_CODE (func_desc) == SYMBOL_REF)
25909 {
25910 /* FIXME: the longcall opt should not hang off this flag, it is most
25911 likely incorrect for kernel-mode code-generation. */
25912 if (darwin_symbol_stubs && TARGET_32BIT)
25913 make_island = true; /* Do nothing yet, retain the CALL_LONG flag. */
25914 else
25915 {
25916 /* The linker is capable of doing this, but the user explicitly
25917 asked for -mlongcall, so we'll do the 'normal' version. */
25918 func = rs6000_longcall_ref (func_desc, NULL_RTX);
25919 cookie_val &= ~CALL_LONG; /* Handled, zap it. */
25920 }
25921 }
25922
25923 /* Handle indirect calls. */
25924 if (GET_CODE (func) != SYMBOL_REF)
25925 {
25926 func = force_reg (Pmode, func);
25927
25928 /* Indirect calls via CTR are strongly preferred over indirect
25929 calls via LR, and are required for indirect sibcalls, so move
25930 the address there. */
25931 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
25932 emit_move_insn (func_addr, func);
25933 }
25934 else
25935 func_addr = func;
25936
25937 /* Create the call. */
25938 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
25939 if (value != NULL_RTX)
25940 call[0] = gen_rtx_SET (value, call[0]);
25941
25942 call[1] = gen_rtx_USE (VOIDmode, GEN_INT (cookie_val));
25943
25944 if (sibcall)
25945 call[2] = simple_return_rtx;
25946 else
25947 call[2] = gen_hard_reg_clobber (Pmode, LR_REGNO);
25948
25949 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
25950 insn = emit_call_insn (insn);
25951 /* Now we have the debug info in the insn, we can set up the branch island
25952 if we're using one. */
25953 if (make_island)
25954 {
25955 tree funname = get_identifier (XSTR (func_desc, 0));
25956
25957 if (no_previous_def (funname))
25958 {
25959 rtx label_rtx = gen_label_rtx ();
25960 char *label_buf, temp_buf[256];
25961 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
25962 CODE_LABEL_NUMBER (label_rtx));
25963 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
25964 tree labelname = get_identifier (label_buf);
25965 add_compiler_branch_island (labelname, funname,
25966 insn_line ((const rtx_insn*)insn));
25967 }
25968 }
25969 }
25970 #endif
25971
25972 void
25973 rs6000_call_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
25974 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
25975 {
25976 #if TARGET_MACHO
25977 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, false);
25978 #else
25979 gcc_unreachable();
25980 #endif
25981 }
25982
25983
25984 void
25985 rs6000_sibcall_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
25986 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
25987 {
25988 #if TARGET_MACHO
25989 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, true);
25990 #else
25991 gcc_unreachable();
25992 #endif
25993 }
25994
25995 /* Return whether we should generate PC-relative code for FNDECL. */
25996 bool
25997 rs6000_fndecl_pcrel_p (const_tree fndecl)
25998 {
25999 if (DEFAULT_ABI != ABI_ELFv2)
26000 return false;
26001
26002 struct cl_target_option *opts = target_opts_for_fn (fndecl);
26003
26004 return ((opts->x_rs6000_isa_flags & OPTION_MASK_PCREL) != 0
26005 && TARGET_CMODEL == CMODEL_MEDIUM);
26006 }
26007
26008 /* Return whether we should generate PC-relative code for *FN. */
26009 bool
26010 rs6000_function_pcrel_p (struct function *fn)
26011 {
26012 if (DEFAULT_ABI != ABI_ELFv2)
26013 return false;
26014
26015 /* Optimize usual case. */
26016 if (fn == cfun)
26017 return ((rs6000_isa_flags & OPTION_MASK_PCREL) != 0
26018 && TARGET_CMODEL == CMODEL_MEDIUM);
26019
26020 return rs6000_fndecl_pcrel_p (fn->decl);
26021 }
26022
26023 /* Return whether we should generate PC-relative code for the current
26024 function. */
26025 bool
26026 rs6000_pcrel_p ()
26027 {
26028 return (DEFAULT_ABI == ABI_ELFv2
26029 && (rs6000_isa_flags & OPTION_MASK_PCREL) != 0
26030 && TARGET_CMODEL == CMODEL_MEDIUM);
26031 }
26032
26033 \f
26034 /* Given an address (ADDR), a mode (MODE), and what the format of the
26035 non-prefixed address (NON_PREFIXED_FORMAT) is, return the instruction format
26036 for the address. */
26037
26038 enum insn_form
26039 address_to_insn_form (rtx addr,
26040 machine_mode mode,
26041 enum non_prefixed_form non_prefixed_format)
26042 {
26043 /* Single register is easy. */
26044 if (REG_P (addr) || SUBREG_P (addr))
26045 return INSN_FORM_BASE_REG;
26046
26047 /* If the non prefixed instruction format doesn't support offset addressing,
26048 make sure only indexed addressing is allowed.
26049
26050 We special case SDmode so that the register allocator does not try to move
26051 SDmode through GPR registers, but instead uses the 32-bit integer load and
26052 store instructions for the floating point registers. */
26053 if (non_prefixed_format == NON_PREFIXED_X || (mode == SDmode && TARGET_DFP))
26054 {
26055 if (GET_CODE (addr) != PLUS)
26056 return INSN_FORM_BAD;
26057
26058 rtx op0 = XEXP (addr, 0);
26059 rtx op1 = XEXP (addr, 1);
26060 if (!REG_P (op0) && !SUBREG_P (op0))
26061 return INSN_FORM_BAD;
26062
26063 if (!REG_P (op1) && !SUBREG_P (op1))
26064 return INSN_FORM_BAD;
26065
26066 return INSN_FORM_X;
26067 }
26068
26069 /* Deal with update forms. */
26070 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
26071 return INSN_FORM_UPDATE;
26072
26073 /* Handle PC-relative symbols and labels. Check for both local and
26074 external symbols. Assume labels are always local. TLS symbols
26075 are not PC-relative for rs6000. */
26076 if (TARGET_PCREL)
26077 {
26078 if (LABEL_REF_P (addr))
26079 return INSN_FORM_PCREL_LOCAL;
26080
26081 if (SYMBOL_REF_P (addr) && !SYMBOL_REF_TLS_MODEL (addr))
26082 {
26083 if (!SYMBOL_REF_LOCAL_P (addr))
26084 return INSN_FORM_PCREL_EXTERNAL;
26085 else
26086 return INSN_FORM_PCREL_LOCAL;
26087 }
26088 }
26089
26090 if (GET_CODE (addr) == CONST)
26091 addr = XEXP (addr, 0);
26092
26093 /* Recognize LO_SUM addresses used with TOC and 32-bit addressing. */
26094 if (GET_CODE (addr) == LO_SUM)
26095 return INSN_FORM_LO_SUM;
26096
26097 /* Everything below must be an offset address of some form. */
26098 if (GET_CODE (addr) != PLUS)
26099 return INSN_FORM_BAD;
26100
26101 rtx op0 = XEXP (addr, 0);
26102 rtx op1 = XEXP (addr, 1);
26103
26104 /* Check for indexed addresses. */
26105 if (REG_P (op1) || SUBREG_P (op1))
26106 {
26107 if (REG_P (op0) || SUBREG_P (op0))
26108 return INSN_FORM_X;
26109
26110 return INSN_FORM_BAD;
26111 }
26112
26113 if (!CONST_INT_P (op1))
26114 return INSN_FORM_BAD;
26115
26116 HOST_WIDE_INT offset = INTVAL (op1);
26117 if (!SIGNED_INTEGER_34BIT_P (offset))
26118 return INSN_FORM_BAD;
26119
26120 /* Check for local and external PC-relative addresses. Labels are always
26121 local. TLS symbols are not PC-relative for rs6000. */
26122 if (TARGET_PCREL)
26123 {
26124 if (LABEL_REF_P (op0))
26125 return INSN_FORM_PCREL_LOCAL;
26126
26127 if (SYMBOL_REF_P (op0) && !SYMBOL_REF_TLS_MODEL (op0))
26128 {
26129 if (!SYMBOL_REF_LOCAL_P (op0))
26130 return INSN_FORM_PCREL_EXTERNAL;
26131 else
26132 return INSN_FORM_PCREL_LOCAL;
26133 }
26134 }
26135
26136 /* If it isn't PC-relative, the address must use a base register. */
26137 if (!REG_P (op0) && !SUBREG_P (op0))
26138 return INSN_FORM_BAD;
26139
26140 /* Large offsets must be prefixed. */
26141 if (!SIGNED_INTEGER_16BIT_P (offset))
26142 {
26143 if (TARGET_PREFIXED)
26144 return INSN_FORM_PREFIXED_NUMERIC;
26145
26146 return INSN_FORM_BAD;
26147 }
26148
26149 /* We have a 16-bit offset, see what default instruction format to use. */
26150 if (non_prefixed_format == NON_PREFIXED_DEFAULT)
26151 {
26152 unsigned size = GET_MODE_SIZE (mode);
26153
26154 /* On 64-bit systems, assume 64-bit integers need to use DS form
26155 addresses (for LD/STD). VSX vectors need to use DQ form addresses
26156 (for LXV and STXV). TImode is problematical in that its normal usage
26157 is expected to be GPRs where it wants a DS instruction format, but if
26158 it goes into the vector registers, it wants a DQ instruction
26159 format. */
26160 if (TARGET_POWERPC64 && size >= 8 && GET_MODE_CLASS (mode) == MODE_INT)
26161 non_prefixed_format = NON_PREFIXED_DS;
26162
26163 else if (TARGET_VSX && size >= 16
26164 && (VECTOR_MODE_P (mode) || VECTOR_ALIGNMENT_P (mode)))
26165 non_prefixed_format = NON_PREFIXED_DQ;
26166
26167 else
26168 non_prefixed_format = NON_PREFIXED_D;
26169 }
26170
26171 /* Classify the D/DS/DQ-form addresses. */
26172 switch (non_prefixed_format)
26173 {
26174 /* Instruction format D, all 16 bits are valid. */
26175 case NON_PREFIXED_D:
26176 return INSN_FORM_D;
26177
26178 /* Instruction format DS, bottom 2 bits must be 0. */
26179 case NON_PREFIXED_DS:
26180 if ((offset & 3) == 0)
26181 return INSN_FORM_DS;
26182
26183 else if (TARGET_PREFIXED)
26184 return INSN_FORM_PREFIXED_NUMERIC;
26185
26186 else
26187 return INSN_FORM_BAD;
26188
26189 /* Instruction format DQ, bottom 4 bits must be 0. */
26190 case NON_PREFIXED_DQ:
26191 if ((offset & 15) == 0)
26192 return INSN_FORM_DQ;
26193
26194 else if (TARGET_PREFIXED)
26195 return INSN_FORM_PREFIXED_NUMERIC;
26196
26197 else
26198 return INSN_FORM_BAD;
26199
26200 default:
26201 break;
26202 }
26203
26204 return INSN_FORM_BAD;
26205 }
26206
26207 /* Given address rtx ADDR for a load of MODE, is this legitimate for a
26208 non-prefixed D-form or X-form instruction? NON_PREFIXED_FORMAT is
26209 given NON_PREFIXED_D or NON_PREFIXED_DS to indicate whether we want
26210 a D-form or DS-form instruction. X-form and base_reg are always
26211 allowed. */
26212 bool
26213 address_is_non_pfx_d_or_x (rtx addr, machine_mode mode,
26214 enum non_prefixed_form non_prefixed_format)
26215 {
26216 enum insn_form result_form;
26217
26218 result_form = address_to_insn_form (addr, mode, non_prefixed_format);
26219
26220 switch (non_prefixed_format)
26221 {
26222 case NON_PREFIXED_D:
26223 switch (result_form)
26224 {
26225 case INSN_FORM_X:
26226 case INSN_FORM_D:
26227 case INSN_FORM_DS:
26228 case INSN_FORM_BASE_REG:
26229 return true;
26230 default:
26231 return false;
26232 }
26233 break;
26234 case NON_PREFIXED_DS:
26235 switch (result_form)
26236 {
26237 case INSN_FORM_X:
26238 case INSN_FORM_DS:
26239 case INSN_FORM_BASE_REG:
26240 return true;
26241 default:
26242 return false;
26243 }
26244 break;
26245 default:
26246 break;
26247 }
26248 return false;
26249 }
26250
26251 /* Return true if an REG with a given MODE is loaded from or stored into a MEM
26252 location uses a non-prefixed D/DS/DQ-form address. This is used to validate
26253 the load or store with the PCREL_OPT optimization to make sure it is an
26254 instruction that can be optimized.
26255
26256 We need to specify the MODE separately from the REG to allow for loads that
26257 include zero/sign/float extension. */
26258
26259 bool
26260 pcrel_opt_valid_mem_p (rtx reg, machine_mode mode, rtx mem)
26261 {
26262 /* If the instruction is indexed only like LFIWAX/LXSIWAX we cannot do the
26263 PCREL_OPT optimization. */
26264 enum non_prefixed_form non_prefixed = reg_to_non_prefixed (reg, mode);
26265 if (non_prefixed == NON_PREFIXED_X)
26266 return false;
26267
26268 /* Check if this is a non-prefixed D/DS/DQ-form instruction. */
26269 rtx addr = XEXP (mem, 0);
26270 enum insn_form iform = address_to_insn_form (addr, mode, non_prefixed);
26271 return (iform == INSN_FORM_BASE_REG
26272 || iform == INSN_FORM_D
26273 || iform == INSN_FORM_DS
26274 || iform == INSN_FORM_DQ);
26275 }
26276
26277 /* Helper function to see if we're potentially looking at lfs/stfs.
26278 - PARALLEL containing a SET and a CLOBBER
26279 - stfs:
26280 - SET is from UNSPEC_SI_FROM_SF to MEM:SI
26281 - CLOBBER is a V4SF
26282 - lfs:
26283 - SET is from UNSPEC_SF_FROM_SI to REG:SF
26284 - CLOBBER is a DI
26285 */
26286
26287 static bool
26288 is_lfs_stfs_insn (rtx_insn *insn)
26289 {
26290 rtx pattern = PATTERN (insn);
26291 if (GET_CODE (pattern) != PARALLEL)
26292 return false;
26293
26294 /* This should be a parallel with exactly one set and one clobber. */
26295 if (XVECLEN (pattern, 0) != 2)
26296 return false;
26297
26298 rtx set = XVECEXP (pattern, 0, 0);
26299 if (GET_CODE (set) != SET)
26300 return false;
26301
26302 rtx clobber = XVECEXP (pattern, 0, 1);
26303 if (GET_CODE (clobber) != CLOBBER)
26304 return false;
26305
26306 /* All we care is that the destination of the SET is a mem:SI,
26307 the source should be an UNSPEC_SI_FROM_SF, and the clobber
26308 should be a scratch:V4SF. */
26309
26310 rtx dest = SET_DEST (set);
26311 rtx src = SET_SRC (set);
26312 rtx scratch = SET_DEST (clobber);
26313
26314 if (GET_CODE (src) != UNSPEC)
26315 return false;
26316
26317 /* stfs case. */
26318 if (XINT (src, 1) == UNSPEC_SI_FROM_SF
26319 && GET_CODE (dest) == MEM && GET_MODE (dest) == SImode
26320 && GET_CODE (scratch) == SCRATCH && GET_MODE (scratch) == V4SFmode)
26321 return true;
26322
26323 /* lfs case. */
26324 if (XINT (src, 1) == UNSPEC_SF_FROM_SI
26325 && GET_CODE (dest) == REG && GET_MODE (dest) == SFmode
26326 && GET_CODE (scratch) == SCRATCH && GET_MODE (scratch) == DImode)
26327 return true;
26328
26329 return false;
26330 }
26331
26332 /* Helper function to take a REG and a MODE and turn it into the non-prefixed
26333 instruction format (D/DS/DQ) used for offset memory. */
26334
26335 enum non_prefixed_form
26336 reg_to_non_prefixed (rtx reg, machine_mode mode)
26337 {
26338 /* If it isn't a register, use the defaults. */
26339 if (!REG_P (reg) && !SUBREG_P (reg))
26340 return NON_PREFIXED_DEFAULT;
26341
26342 unsigned int r = reg_or_subregno (reg);
26343
26344 /* If we have a pseudo, use the default instruction format. */
26345 if (!HARD_REGISTER_NUM_P (r))
26346 return NON_PREFIXED_DEFAULT;
26347
26348 unsigned size = GET_MODE_SIZE (mode);
26349
26350 /* FPR registers use D-mode for scalars, and DQ-mode for vectors, IEEE
26351 128-bit floating point, and 128-bit integers. Before power9, only indexed
26352 addressing was available for vectors. */
26353 if (FP_REGNO_P (r))
26354 {
26355 if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
26356 return NON_PREFIXED_D;
26357
26358 else if (size < 8)
26359 return NON_PREFIXED_X;
26360
26361 else if (TARGET_VSX && size >= 16
26362 && (VECTOR_MODE_P (mode)
26363 || VECTOR_ALIGNMENT_P (mode)
26364 || mode == TImode || mode == CTImode))
26365 return (TARGET_P9_VECTOR) ? NON_PREFIXED_DQ : NON_PREFIXED_X;
26366
26367 else
26368 return NON_PREFIXED_DEFAULT;
26369 }
26370
26371 /* Altivec registers use DS-mode for scalars, and DQ-mode for vectors, IEEE
26372 128-bit floating point, and 128-bit integers. Before power9, only indexed
26373 addressing was available. */
26374 else if (ALTIVEC_REGNO_P (r))
26375 {
26376 if (!TARGET_P9_VECTOR)
26377 return NON_PREFIXED_X;
26378
26379 if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
26380 return NON_PREFIXED_DS;
26381
26382 else if (size < 8)
26383 return NON_PREFIXED_X;
26384
26385 else if (TARGET_VSX && size >= 16
26386 && (VECTOR_MODE_P (mode)
26387 || VECTOR_ALIGNMENT_P (mode)
26388 || mode == TImode || mode == CTImode))
26389 return NON_PREFIXED_DQ;
26390
26391 else
26392 return NON_PREFIXED_DEFAULT;
26393 }
26394
26395 /* GPR registers use DS-mode for 64-bit items on 64-bit systems, and D-mode
26396 otherwise. Assume that any other register, such as LR, CRs, etc. will go
26397 through the GPR registers for memory operations. */
26398 else if (TARGET_POWERPC64 && size >= 8)
26399 return NON_PREFIXED_DS;
26400
26401 return NON_PREFIXED_D;
26402 }
26403
26404 \f
26405 /* Whether a load instruction is a prefixed instruction. This is called from
26406 the prefixed attribute processing. */
26407
26408 bool
26409 prefixed_load_p (rtx_insn *insn)
26410 {
26411 /* Validate the insn to make sure it is a normal load insn. */
26412 extract_insn_cached (insn);
26413 if (recog_data.n_operands < 2)
26414 return false;
26415
26416 rtx reg = recog_data.operand[0];
26417 rtx mem = recog_data.operand[1];
26418
26419 if (!REG_P (reg) && !SUBREG_P (reg))
26420 return false;
26421
26422 if (!MEM_P (mem))
26423 return false;
26424
26425 /* Prefixed load instructions do not support update or indexed forms. */
26426 if (get_attr_indexed (insn) == INDEXED_YES
26427 || get_attr_update (insn) == UPDATE_YES)
26428 return false;
26429
26430 /* LWA uses the DS format instead of the D format that LWZ uses. */
26431 enum non_prefixed_form non_prefixed;
26432 machine_mode reg_mode = GET_MODE (reg);
26433 machine_mode mem_mode = GET_MODE (mem);
26434
26435 if (mem_mode == SImode && reg_mode == DImode
26436 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
26437 non_prefixed = NON_PREFIXED_DS;
26438
26439 else
26440 non_prefixed = reg_to_non_prefixed (reg, mem_mode);
26441
26442 if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn))
26443 return address_is_prefixed (XEXP (mem, 0), mem_mode, NON_PREFIXED_DEFAULT);
26444 else
26445 return address_is_prefixed (XEXP (mem, 0), mem_mode, non_prefixed);
26446 }
26447
26448 /* Whether a store instruction is a prefixed instruction. This is called from
26449 the prefixed attribute processing. */
26450
26451 bool
26452 prefixed_store_p (rtx_insn *insn)
26453 {
26454 /* Validate the insn to make sure it is a normal store insn. */
26455 extract_insn_cached (insn);
26456 if (recog_data.n_operands < 2)
26457 return false;
26458
26459 rtx mem = recog_data.operand[0];
26460 rtx reg = recog_data.operand[1];
26461
26462 if (!REG_P (reg) && !SUBREG_P (reg))
26463 return false;
26464
26465 if (!MEM_P (mem))
26466 return false;
26467
26468 /* Prefixed store instructions do not support update or indexed forms. */
26469 if (get_attr_indexed (insn) == INDEXED_YES
26470 || get_attr_update (insn) == UPDATE_YES)
26471 return false;
26472
26473 machine_mode mem_mode = GET_MODE (mem);
26474 rtx addr = XEXP (mem, 0);
26475 enum non_prefixed_form non_prefixed = reg_to_non_prefixed (reg, mem_mode);
26476
26477 /* Need to make sure we aren't looking at a stfs which doesn't look
26478 like the other things reg_to_non_prefixed/address_is_prefixed
26479 looks for. */
26480 if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn))
26481 return address_is_prefixed (addr, mem_mode, NON_PREFIXED_DEFAULT);
26482 else
26483 return address_is_prefixed (addr, mem_mode, non_prefixed);
26484 }
26485
26486 /* Whether a load immediate or add instruction is a prefixed instruction. This
26487 is called from the prefixed attribute processing. */
26488
26489 bool
26490 prefixed_paddi_p (rtx_insn *insn)
26491 {
26492 rtx set = single_set (insn);
26493 if (!set)
26494 return false;
26495
26496 rtx dest = SET_DEST (set);
26497 rtx src = SET_SRC (set);
26498
26499 if (!REG_P (dest) && !SUBREG_P (dest))
26500 return false;
26501
26502 /* Is this a load immediate that can't be done with a simple ADDI or
26503 ADDIS? */
26504 if (CONST_INT_P (src))
26505 return (satisfies_constraint_eI (src)
26506 && !satisfies_constraint_I (src)
26507 && !satisfies_constraint_L (src));
26508
26509 /* Is this a PADDI instruction that can't be done with a simple ADDI or
26510 ADDIS? */
26511 if (GET_CODE (src) == PLUS)
26512 {
26513 rtx op1 = XEXP (src, 1);
26514
26515 return (CONST_INT_P (op1)
26516 && satisfies_constraint_eI (op1)
26517 && !satisfies_constraint_I (op1)
26518 && !satisfies_constraint_L (op1));
26519 }
26520
26521 /* If not, is it a load of a PC-relative address? */
26522 if (!TARGET_PCREL || GET_MODE (dest) != Pmode)
26523 return false;
26524
26525 if (!SYMBOL_REF_P (src) && !LABEL_REF_P (src) && GET_CODE (src) != CONST)
26526 return false;
26527
26528 enum insn_form iform = address_to_insn_form (src, Pmode,
26529 NON_PREFIXED_DEFAULT);
26530
26531 return (iform == INSN_FORM_PCREL_EXTERNAL || iform == INSN_FORM_PCREL_LOCAL);
26532 }
26533
26534 /* Whether the next instruction needs a 'p' prefix issued before the
26535 instruction is printed out. */
26536 static bool prepend_p_to_next_insn;
26537
26538 /* Define FINAL_PRESCAN_INSN if some processing needs to be done before
26539 outputting the assembler code. On the PowerPC, we remember if the current
26540 insn is a prefixed insn where we need to emit a 'p' before the insn.
26541
26542 In addition, if the insn is part of a PC-relative reference to an external
26543 label optimization, this is recorded also. */
26544 void
26545 rs6000_final_prescan_insn (rtx_insn *insn, rtx [], int)
26546 {
26547 prepend_p_to_next_insn = (get_attr_maybe_prefixed (insn)
26548 == MAYBE_PREFIXED_YES
26549 && get_attr_prefixed (insn) == PREFIXED_YES);
26550 return;
26551 }
26552
26553 /* Define ASM_OUTPUT_OPCODE to do anything special before emitting an opcode.
26554 We use it to emit a 'p' for prefixed insns that is set in
26555 FINAL_PRESCAN_INSN. */
26556 void
26557 rs6000_asm_output_opcode (FILE *stream)
26558 {
26559 if (prepend_p_to_next_insn)
26560 {
26561 fprintf (stream, "p");
26562
26563 /* Reset the flag in the case where there are separate insn lines in the
26564 sequence, so the 'p' is only emitted for the first line. This shows up
26565 when we are doing the PCREL_OPT optimization, in that the label created
26566 with %r<n> would have a leading 'p' printed. */
26567 prepend_p_to_next_insn = false;
26568 }
26569
26570 return;
26571 }
26572
26573 /* Emit the relocation to tie the next instruction to a previous instruction
26574 that loads up an external address. This is used to do the PCREL_OPT
26575 optimization. Note, the label is generated after the PLD of the got
26576 pc-relative address to allow for the assembler to insert NOPs before the PLD
26577 instruction. The operand is a constant integer that is the label
26578 number. */
26579
26580 void
26581 output_pcrel_opt_reloc (rtx label_num)
26582 {
26583 rtx operands[1] = { label_num };
26584 output_asm_insn (".reloc .Lpcrel%0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel%0-8)",
26585 operands);
26586 }
26587
26588 /* Adjust the length of an INSN. LENGTH is the currently-computed length and
26589 should be adjusted to reflect any required changes. This macro is used when
26590 there is some systematic length adjustment required that would be difficult
26591 to express in the length attribute.
26592
26593 In the PowerPC, we use this to adjust the length of an instruction if one or
26594 more prefixed instructions are generated, using the attribute
26595 num_prefixed_insns. A prefixed instruction is 8 bytes instead of 4, but the
26596 hardware requires that a prefied instruciton does not cross a 64-byte
26597 boundary. This means the compiler has to assume the length of the first
26598 prefixed instruction is 12 bytes instead of 8 bytes. Since the length is
26599 already set for the non-prefixed instruction, we just need to udpate for the
26600 difference. */
26601
26602 int
26603 rs6000_adjust_insn_length (rtx_insn *insn, int length)
26604 {
26605 if (TARGET_PREFIXED && NONJUMP_INSN_P (insn))
26606 {
26607 rtx pattern = PATTERN (insn);
26608 if (GET_CODE (pattern) != USE && GET_CODE (pattern) != CLOBBER
26609 && get_attr_prefixed (insn) == PREFIXED_YES)
26610 {
26611 int num_prefixed = get_attr_max_prefixed_insns (insn);
26612 length += 4 * (num_prefixed + 1);
26613 }
26614 }
26615
26616 return length;
26617 }
26618
26619 \f
26620 #ifdef HAVE_GAS_HIDDEN
26621 # define USE_HIDDEN_LINKONCE 1
26622 #else
26623 # define USE_HIDDEN_LINKONCE 0
26624 #endif
26625
26626 /* Fills in the label name that should be used for a 476 link stack thunk. */
26627
26628 void
26629 get_ppc476_thunk_name (char name[32])
26630 {
26631 gcc_assert (TARGET_LINK_STACK);
26632
26633 if (USE_HIDDEN_LINKONCE)
26634 sprintf (name, "__ppc476.get_thunk");
26635 else
26636 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
26637 }
26638
26639 /* This function emits the simple thunk routine that is used to preserve
26640 the link stack on the 476 cpu. */
26641
26642 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
26643 static void
26644 rs6000_code_end (void)
26645 {
26646 char name[32];
26647 tree decl;
26648
26649 if (!TARGET_LINK_STACK)
26650 return;
26651
26652 get_ppc476_thunk_name (name);
26653
26654 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
26655 build_function_type_list (void_type_node, NULL_TREE));
26656 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
26657 NULL_TREE, void_type_node);
26658 TREE_PUBLIC (decl) = 1;
26659 TREE_STATIC (decl) = 1;
26660
26661 #if RS6000_WEAK
26662 if (USE_HIDDEN_LINKONCE && !TARGET_XCOFF)
26663 {
26664 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
26665 targetm.asm_out.unique_section (decl, 0);
26666 switch_to_section (get_named_section (decl, NULL, 0));
26667 DECL_WEAK (decl) = 1;
26668 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
26669 targetm.asm_out.globalize_label (asm_out_file, name);
26670 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
26671 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
26672 }
26673 else
26674 #endif
26675 {
26676 switch_to_section (text_section);
26677 ASM_OUTPUT_LABEL (asm_out_file, name);
26678 }
26679
26680 DECL_INITIAL (decl) = make_node (BLOCK);
26681 current_function_decl = decl;
26682 allocate_struct_function (decl, false);
26683 init_function_start (decl);
26684 first_function_block_is_cold = false;
26685 /* Make sure unwind info is emitted for the thunk if needed. */
26686 final_start_function (emit_barrier (), asm_out_file, 1);
26687
26688 fputs ("\tblr\n", asm_out_file);
26689
26690 final_end_function ();
26691 init_insn_lengths ();
26692 free_after_compilation (cfun);
26693 set_cfun (NULL);
26694 current_function_decl = NULL;
26695 }
26696
26697 /* Add r30 to hard reg set if the prologue sets it up and it is not
26698 pic_offset_table_rtx. */
26699
26700 static void
26701 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
26702 {
26703 if (!TARGET_SINGLE_PIC_BASE
26704 && TARGET_TOC
26705 && TARGET_MINIMAL_TOC
26706 && !constant_pool_empty_p ())
26707 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
26708 if (cfun->machine->split_stack_argp_used)
26709 add_to_hard_reg_set (&set->set, Pmode, 12);
26710
26711 /* Make sure the hard reg set doesn't include r2, which was possibly added
26712 via PIC_OFFSET_TABLE_REGNUM. */
26713 if (TARGET_TOC)
26714 remove_from_hard_reg_set (&set->set, Pmode, TOC_REGNUM);
26715 }
26716
26717 \f
26718 /* Helper function for rs6000_split_logical to emit a logical instruction after
26719 spliting the operation to single GPR registers.
26720
26721 DEST is the destination register.
26722 OP1 and OP2 are the input source registers.
26723 CODE is the base operation (AND, IOR, XOR, NOT).
26724 MODE is the machine mode.
26725 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
26726 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
26727 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
26728
26729 static void
26730 rs6000_split_logical_inner (rtx dest,
26731 rtx op1,
26732 rtx op2,
26733 enum rtx_code code,
26734 machine_mode mode,
26735 bool complement_final_p,
26736 bool complement_op1_p,
26737 bool complement_op2_p)
26738 {
26739 rtx bool_rtx;
26740
26741 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
26742 if (op2 && CONST_INT_P (op2)
26743 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
26744 && !complement_final_p && !complement_op1_p && !complement_op2_p)
26745 {
26746 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
26747 HOST_WIDE_INT value = INTVAL (op2) & mask;
26748
26749 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
26750 if (code == AND)
26751 {
26752 if (value == 0)
26753 {
26754 emit_insn (gen_rtx_SET (dest, const0_rtx));
26755 return;
26756 }
26757
26758 else if (value == mask)
26759 {
26760 if (!rtx_equal_p (dest, op1))
26761 emit_insn (gen_rtx_SET (dest, op1));
26762 return;
26763 }
26764 }
26765
26766 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
26767 into separate ORI/ORIS or XORI/XORIS instrucitons. */
26768 else if (code == IOR || code == XOR)
26769 {
26770 if (value == 0)
26771 {
26772 if (!rtx_equal_p (dest, op1))
26773 emit_insn (gen_rtx_SET (dest, op1));
26774 return;
26775 }
26776 }
26777 }
26778
26779 if (code == AND && mode == SImode
26780 && !complement_final_p && !complement_op1_p && !complement_op2_p)
26781 {
26782 emit_insn (gen_andsi3 (dest, op1, op2));
26783 return;
26784 }
26785
26786 if (complement_op1_p)
26787 op1 = gen_rtx_NOT (mode, op1);
26788
26789 if (complement_op2_p)
26790 op2 = gen_rtx_NOT (mode, op2);
26791
26792 /* For canonical RTL, if only one arm is inverted it is the first. */
26793 if (!complement_op1_p && complement_op2_p)
26794 std::swap (op1, op2);
26795
26796 bool_rtx = ((code == NOT)
26797 ? gen_rtx_NOT (mode, op1)
26798 : gen_rtx_fmt_ee (code, mode, op1, op2));
26799
26800 if (complement_final_p)
26801 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
26802
26803 emit_insn (gen_rtx_SET (dest, bool_rtx));
26804 }
26805
26806 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
26807 operations are split immediately during RTL generation to allow for more
26808 optimizations of the AND/IOR/XOR.
26809
26810 OPERANDS is an array containing the destination and two input operands.
26811 CODE is the base operation (AND, IOR, XOR, NOT).
26812 MODE is the machine mode.
26813 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
26814 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
26815 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
26816 CLOBBER_REG is either NULL or a scratch register of type CC to allow
26817 formation of the AND instructions. */
26818
26819 static void
26820 rs6000_split_logical_di (rtx operands[3],
26821 enum rtx_code code,
26822 bool complement_final_p,
26823 bool complement_op1_p,
26824 bool complement_op2_p)
26825 {
26826 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
26827 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
26828 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
26829 enum hi_lo { hi = 0, lo = 1 };
26830 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
26831 size_t i;
26832
26833 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
26834 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
26835 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
26836 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
26837
26838 if (code == NOT)
26839 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
26840 else
26841 {
26842 if (!CONST_INT_P (operands[2]))
26843 {
26844 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
26845 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
26846 }
26847 else
26848 {
26849 HOST_WIDE_INT value = INTVAL (operands[2]);
26850 HOST_WIDE_INT value_hi_lo[2];
26851
26852 gcc_assert (!complement_final_p);
26853 gcc_assert (!complement_op1_p);
26854 gcc_assert (!complement_op2_p);
26855
26856 value_hi_lo[hi] = value >> 32;
26857 value_hi_lo[lo] = value & lower_32bits;
26858
26859 for (i = 0; i < 2; i++)
26860 {
26861 HOST_WIDE_INT sub_value = value_hi_lo[i];
26862
26863 if (sub_value & sign_bit)
26864 sub_value |= upper_32bits;
26865
26866 op2_hi_lo[i] = GEN_INT (sub_value);
26867
26868 /* If this is an AND instruction, check to see if we need to load
26869 the value in a register. */
26870 if (code == AND && sub_value != -1 && sub_value != 0
26871 && !and_operand (op2_hi_lo[i], SImode))
26872 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
26873 }
26874 }
26875 }
26876
26877 for (i = 0; i < 2; i++)
26878 {
26879 /* Split large IOR/XOR operations. */
26880 if ((code == IOR || code == XOR)
26881 && CONST_INT_P (op2_hi_lo[i])
26882 && !complement_final_p
26883 && !complement_op1_p
26884 && !complement_op2_p
26885 && !logical_const_operand (op2_hi_lo[i], SImode))
26886 {
26887 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
26888 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
26889 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
26890 rtx tmp = gen_reg_rtx (SImode);
26891
26892 /* Make sure the constant is sign extended. */
26893 if ((hi_16bits & sign_bit) != 0)
26894 hi_16bits |= upper_32bits;
26895
26896 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
26897 code, SImode, false, false, false);
26898
26899 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
26900 code, SImode, false, false, false);
26901 }
26902 else
26903 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
26904 code, SImode, complement_final_p,
26905 complement_op1_p, complement_op2_p);
26906 }
26907
26908 return;
26909 }
26910
26911 /* Split the insns that make up boolean operations operating on multiple GPR
26912 registers. The boolean MD patterns ensure that the inputs either are
26913 exactly the same as the output registers, or there is no overlap.
26914
26915 OPERANDS is an array containing the destination and two input operands.
26916 CODE is the base operation (AND, IOR, XOR, NOT).
26917 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
26918 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
26919 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
26920
26921 void
26922 rs6000_split_logical (rtx operands[3],
26923 enum rtx_code code,
26924 bool complement_final_p,
26925 bool complement_op1_p,
26926 bool complement_op2_p)
26927 {
26928 machine_mode mode = GET_MODE (operands[0]);
26929 machine_mode sub_mode;
26930 rtx op0, op1, op2;
26931 int sub_size, regno0, regno1, nregs, i;
26932
26933 /* If this is DImode, use the specialized version that can run before
26934 register allocation. */
26935 if (mode == DImode && !TARGET_POWERPC64)
26936 {
26937 rs6000_split_logical_di (operands, code, complement_final_p,
26938 complement_op1_p, complement_op2_p);
26939 return;
26940 }
26941
26942 op0 = operands[0];
26943 op1 = operands[1];
26944 op2 = (code == NOT) ? NULL_RTX : operands[2];
26945 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
26946 sub_size = GET_MODE_SIZE (sub_mode);
26947 regno0 = REGNO (op0);
26948 regno1 = REGNO (op1);
26949
26950 gcc_assert (reload_completed);
26951 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
26952 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
26953
26954 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
26955 gcc_assert (nregs > 1);
26956
26957 if (op2 && REG_P (op2))
26958 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
26959
26960 for (i = 0; i < nregs; i++)
26961 {
26962 int offset = i * sub_size;
26963 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
26964 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
26965 rtx sub_op2 = ((code == NOT)
26966 ? NULL_RTX
26967 : simplify_subreg (sub_mode, op2, mode, offset));
26968
26969 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
26970 complement_final_p, complement_op1_p,
26971 complement_op2_p);
26972 }
26973
26974 return;
26975 }
26976
26977 /* Emit instructions to move SRC to DST. Called by splitters for
26978 multi-register moves. It will emit at most one instruction for
26979 each register that is accessed; that is, it won't emit li/lis pairs
26980 (or equivalent for 64-bit code). One of SRC or DST must be a hard
26981 register. */
26982
26983 void
26984 rs6000_split_multireg_move (rtx dst, rtx src)
26985 {
26986 /* The register number of the first register being moved. */
26987 int reg;
26988 /* The mode that is to be moved. */
26989 machine_mode mode;
26990 /* The mode that the move is being done in, and its size. */
26991 machine_mode reg_mode;
26992 int reg_mode_size;
26993 /* The number of registers that will be moved. */
26994 int nregs;
26995
26996 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
26997 mode = GET_MODE (dst);
26998 nregs = hard_regno_nregs (reg, mode);
26999
27000 /* If we have a vector quad register for MMA, and this is a load or store,
27001 see if we can use vector paired load/stores. */
27002 if (mode == XOmode && TARGET_MMA
27003 && (MEM_P (dst) || MEM_P (src)))
27004 {
27005 reg_mode = OOmode;
27006 nregs /= 2;
27007 }
27008 /* If we have a vector pair/quad mode, split it into two/four separate
27009 vectors. */
27010 else if (mode == OOmode || mode == XOmode)
27011 reg_mode = V1TImode;
27012 else if (FP_REGNO_P (reg))
27013 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
27014 (TARGET_HARD_FLOAT ? DFmode : SFmode);
27015 else if (ALTIVEC_REGNO_P (reg))
27016 reg_mode = V16QImode;
27017 else
27018 reg_mode = word_mode;
27019 reg_mode_size = GET_MODE_SIZE (reg_mode);
27020
27021 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
27022
27023 /* TDmode residing in FP registers is special, since the ISA requires that
27024 the lower-numbered word of a register pair is always the most significant
27025 word, even in little-endian mode. This does not match the usual subreg
27026 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
27027 the appropriate constituent registers "by hand" in little-endian mode.
27028
27029 Note we do not need to check for destructive overlap here since TDmode
27030 can only reside in even/odd register pairs. */
27031 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
27032 {
27033 rtx p_src, p_dst;
27034 int i;
27035
27036 for (i = 0; i < nregs; i++)
27037 {
27038 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
27039 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
27040 else
27041 p_src = simplify_gen_subreg (reg_mode, src, mode,
27042 i * reg_mode_size);
27043
27044 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
27045 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
27046 else
27047 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
27048 i * reg_mode_size);
27049
27050 emit_insn (gen_rtx_SET (p_dst, p_src));
27051 }
27052
27053 return;
27054 }
27055
27056 /* The __vector_pair and __vector_quad modes are multi-register
27057 modes, so if we have to load or store the registers, we have to be
27058 careful to properly swap them if we're in little endian mode
27059 below. This means the last register gets the first memory
27060 location. We also need to be careful of using the right register
27061 numbers if we are splitting XO to OO. */
27062 if (mode == OOmode || mode == XOmode)
27063 {
27064 nregs = hard_regno_nregs (reg, mode);
27065 int reg_mode_nregs = hard_regno_nregs (reg, reg_mode);
27066 if (MEM_P (dst))
27067 {
27068 unsigned offset = 0;
27069 unsigned size = GET_MODE_SIZE (reg_mode);
27070
27071 /* If we are reading an accumulator register, we have to
27072 deprime it before we can access it. */
27073 if (TARGET_MMA
27074 && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
27075 emit_insn (gen_mma_xxmfacc (src, src));
27076
27077 for (int i = 0; i < nregs; i += reg_mode_nregs)
27078 {
27079 unsigned subreg
27080 = WORDS_BIG_ENDIAN ? i : (nregs - reg_mode_nregs - i);
27081 rtx dst2 = adjust_address (dst, reg_mode, offset);
27082 rtx src2 = gen_rtx_REG (reg_mode, reg + subreg);
27083 offset += size;
27084 emit_insn (gen_rtx_SET (dst2, src2));
27085 }
27086
27087 return;
27088 }
27089
27090 if (MEM_P (src))
27091 {
27092 unsigned offset = 0;
27093 unsigned size = GET_MODE_SIZE (reg_mode);
27094
27095 for (int i = 0; i < nregs; i += reg_mode_nregs)
27096 {
27097 unsigned subreg
27098 = WORDS_BIG_ENDIAN ? i : (nregs - reg_mode_nregs - i);
27099 rtx dst2 = gen_rtx_REG (reg_mode, reg + subreg);
27100 rtx src2 = adjust_address (src, reg_mode, offset);
27101 offset += size;
27102 emit_insn (gen_rtx_SET (dst2, src2));
27103 }
27104
27105 /* If we are writing an accumulator register, we have to
27106 prime it after we've written it. */
27107 if (TARGET_MMA
27108 && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
27109 emit_insn (gen_mma_xxmtacc (dst, dst));
27110
27111 return;
27112 }
27113
27114 if (GET_CODE (src) == UNSPEC
27115 || GET_CODE (src) == UNSPEC_VOLATILE)
27116 {
27117 gcc_assert (XINT (src, 1) == UNSPEC_VSX_ASSEMBLE
27118 || XINT (src, 1) == UNSPECV_MMA_ASSEMBLE);
27119 gcc_assert (REG_P (dst));
27120 if (GET_MODE (src) == XOmode)
27121 gcc_assert (FP_REGNO_P (REGNO (dst)));
27122 if (GET_MODE (src) == OOmode)
27123 gcc_assert (VSX_REGNO_P (REGNO (dst)));
27124
27125 int nvecs = XVECLEN (src, 0);
27126 for (int i = 0; i < nvecs; i++)
27127 {
27128 rtx op;
27129 int regno = reg + i;
27130
27131 if (WORDS_BIG_ENDIAN)
27132 {
27133 op = XVECEXP (src, 0, i);
27134
27135 /* If we are loading an even VSX register and the memory location
27136 is adjacent to the next register's memory location (if any),
27137 then we can load them both with one LXVP instruction. */
27138 if ((regno & 1) == 0)
27139 {
27140 rtx op2 = XVECEXP (src, 0, i + 1);
27141 if (adjacent_mem_locations (op, op2) == op)
27142 {
27143 op = adjust_address (op, OOmode, 0);
27144 /* Skip the next register, since we're going to
27145 load it together with this register. */
27146 i++;
27147 }
27148 }
27149 }
27150 else
27151 {
27152 op = XVECEXP (src, 0, nvecs - i - 1);
27153
27154 /* If we are loading an even VSX register and the memory location
27155 is adjacent to the next register's memory location (if any),
27156 then we can load them both with one LXVP instruction. */
27157 if ((regno & 1) == 0)
27158 {
27159 rtx op2 = XVECEXP (src, 0, nvecs - i - 2);
27160 if (adjacent_mem_locations (op2, op) == op2)
27161 {
27162 op = adjust_address (op2, OOmode, 0);
27163 /* Skip the next register, since we're going to
27164 load it together with this register. */
27165 i++;
27166 }
27167 }
27168 }
27169
27170 rtx dst_i = gen_rtx_REG (GET_MODE (op), regno);
27171 emit_insn (gen_rtx_SET (dst_i, op));
27172 }
27173
27174 /* We are writing an accumulator register, so we have to
27175 prime it after we've written it. */
27176 if (GET_MODE (src) == XOmode)
27177 emit_insn (gen_mma_xxmtacc (dst, dst));
27178
27179 return;
27180 }
27181
27182 /* Register -> register moves can use common code. */
27183 }
27184
27185 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
27186 {
27187 /* If we are reading an accumulator register, we have to
27188 deprime it before we can access it. */
27189 if (TARGET_MMA
27190 && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
27191 emit_insn (gen_mma_xxmfacc (src, src));
27192
27193 /* Move register range backwards, if we might have destructive
27194 overlap. */
27195 int i;
27196 /* XO/OO are opaque so cannot use subregs. */
27197 if (mode == OOmode || mode == XOmode )
27198 {
27199 for (i = nregs - 1; i >= 0; i--)
27200 {
27201 rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + i);
27202 rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + i);
27203 emit_insn (gen_rtx_SET (dst_i, src_i));
27204 }
27205 }
27206 else
27207 {
27208 for (i = nregs - 1; i >= 0; i--)
27209 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
27210 i * reg_mode_size),
27211 simplify_gen_subreg (reg_mode, src, mode,
27212 i * reg_mode_size)));
27213 }
27214
27215 /* If we are writing an accumulator register, we have to
27216 prime it after we've written it. */
27217 if (TARGET_MMA
27218 && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
27219 emit_insn (gen_mma_xxmtacc (dst, dst));
27220 }
27221 else
27222 {
27223 int i;
27224 int j = -1;
27225 bool used_update = false;
27226 rtx restore_basereg = NULL_RTX;
27227
27228 if (MEM_P (src) && INT_REGNO_P (reg))
27229 {
27230 rtx breg;
27231
27232 if (GET_CODE (XEXP (src, 0)) == PRE_INC
27233 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
27234 {
27235 rtx delta_rtx;
27236 breg = XEXP (XEXP (src, 0), 0);
27237 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
27238 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
27239 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
27240 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
27241 src = replace_equiv_address (src, breg);
27242 }
27243 else if (! rs6000_offsettable_memref_p (src, reg_mode, true))
27244 {
27245 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
27246 {
27247 rtx basereg = XEXP (XEXP (src, 0), 0);
27248 if (TARGET_UPDATE)
27249 {
27250 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
27251 emit_insn (gen_rtx_SET (ndst,
27252 gen_rtx_MEM (reg_mode,
27253 XEXP (src, 0))));
27254 used_update = true;
27255 }
27256 else
27257 emit_insn (gen_rtx_SET (basereg,
27258 XEXP (XEXP (src, 0), 1)));
27259 src = replace_equiv_address (src, basereg);
27260 }
27261 else
27262 {
27263 rtx basereg = gen_rtx_REG (Pmode, reg);
27264 emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
27265 src = replace_equiv_address (src, basereg);
27266 }
27267 }
27268
27269 breg = XEXP (src, 0);
27270 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
27271 breg = XEXP (breg, 0);
27272
27273 /* If the base register we are using to address memory is
27274 also a destination reg, then change that register last. */
27275 if (REG_P (breg)
27276 && REGNO (breg) >= REGNO (dst)
27277 && REGNO (breg) < REGNO (dst) + nregs)
27278 j = REGNO (breg) - REGNO (dst);
27279 }
27280 else if (MEM_P (dst) && INT_REGNO_P (reg))
27281 {
27282 rtx breg;
27283
27284 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
27285 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
27286 {
27287 rtx delta_rtx;
27288 breg = XEXP (XEXP (dst, 0), 0);
27289 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
27290 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
27291 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
27292
27293 /* We have to update the breg before doing the store.
27294 Use store with update, if available. */
27295
27296 if (TARGET_UPDATE)
27297 {
27298 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
27299 emit_insn (TARGET_32BIT
27300 ? (TARGET_POWERPC64
27301 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
27302 : gen_movsi_si_update (breg, breg, delta_rtx, nsrc))
27303 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
27304 used_update = true;
27305 }
27306 else
27307 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
27308 dst = replace_equiv_address (dst, breg);
27309 }
27310 else if (!rs6000_offsettable_memref_p (dst, reg_mode, true)
27311 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
27312 {
27313 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
27314 {
27315 rtx basereg = XEXP (XEXP (dst, 0), 0);
27316 if (TARGET_UPDATE)
27317 {
27318 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
27319 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
27320 XEXP (dst, 0)),
27321 nsrc));
27322 used_update = true;
27323 }
27324 else
27325 emit_insn (gen_rtx_SET (basereg,
27326 XEXP (XEXP (dst, 0), 1)));
27327 dst = replace_equiv_address (dst, basereg);
27328 }
27329 else
27330 {
27331 rtx basereg = XEXP (XEXP (dst, 0), 0);
27332 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
27333 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
27334 && REG_P (basereg)
27335 && REG_P (offsetreg)
27336 && REGNO (basereg) != REGNO (offsetreg));
27337 if (REGNO (basereg) == 0)
27338 {
27339 rtx tmp = offsetreg;
27340 offsetreg = basereg;
27341 basereg = tmp;
27342 }
27343 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
27344 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
27345 dst = replace_equiv_address (dst, basereg);
27346 }
27347 }
27348 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
27349 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode, true));
27350 }
27351
27352 /* If we are reading an accumulator register, we have to
27353 deprime it before we can access it. */
27354 if (TARGET_MMA && REG_P (src)
27355 && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
27356 emit_insn (gen_mma_xxmfacc (src, src));
27357
27358 for (i = 0; i < nregs; i++)
27359 {
27360 /* Calculate index to next subword. */
27361 ++j;
27362 if (j == nregs)
27363 j = 0;
27364
27365 /* If compiler already emitted move of first word by
27366 store with update, no need to do anything. */
27367 if (j == 0 && used_update)
27368 continue;
27369
27370 /* XO/OO are opaque so cannot use subregs. */
27371 if (mode == OOmode || mode == XOmode )
27372 {
27373 rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + j);
27374 rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + j);
27375 emit_insn (gen_rtx_SET (dst_i, src_i));
27376 }
27377 else
27378 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
27379 j * reg_mode_size),
27380 simplify_gen_subreg (reg_mode, src, mode,
27381 j * reg_mode_size)));
27382 }
27383
27384 /* If we are writing an accumulator register, we have to
27385 prime it after we've written it. */
27386 if (TARGET_MMA && REG_P (dst)
27387 && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
27388 emit_insn (gen_mma_xxmtacc (dst, dst));
27389
27390 if (restore_basereg != NULL_RTX)
27391 emit_insn (restore_basereg);
27392 }
27393 }
27394 \f
27395 /* Return true if the peephole2 can combine a load involving a combination of
27396 an addis instruction and a load with an offset that can be fused together on
27397 a power8. */
27398
27399 bool
27400 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
27401 rtx addis_value, /* addis value. */
27402 rtx target, /* target register that is loaded. */
27403 rtx mem) /* bottom part of the memory addr. */
27404 {
27405 rtx addr;
27406 rtx base_reg;
27407
27408 /* Validate arguments. */
27409 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
27410 return false;
27411
27412 if (!base_reg_operand (target, GET_MODE (target)))
27413 return false;
27414
27415 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
27416 return false;
27417
27418 /* Allow sign/zero extension. */
27419 if (GET_CODE (mem) == ZERO_EXTEND
27420 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
27421 mem = XEXP (mem, 0);
27422
27423 if (!MEM_P (mem))
27424 return false;
27425
27426 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
27427 return false;
27428
27429 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
27430 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
27431 return false;
27432
27433 /* Validate that the register used to load the high value is either the
27434 register being loaded, or we can safely replace its use.
27435
27436 This function is only called from the peephole2 pass and we assume that
27437 there are 2 instructions in the peephole (addis and load), so we want to
27438 check if the target register was not used in the memory address and the
27439 register to hold the addis result is dead after the peephole. */
27440 if (REGNO (addis_reg) != REGNO (target))
27441 {
27442 if (reg_mentioned_p (target, mem))
27443 return false;
27444
27445 if (!peep2_reg_dead_p (2, addis_reg))
27446 return false;
27447
27448 /* If the target register being loaded is the stack pointer, we must
27449 avoid loading any other value into it, even temporarily. */
27450 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
27451 return false;
27452 }
27453
27454 base_reg = XEXP (addr, 0);
27455 return REGNO (addis_reg) == REGNO (base_reg);
27456 }
27457
27458 /* During the peephole2 pass, adjust and expand the insns for a load fusion
27459 sequence. We adjust the addis register to use the target register. If the
27460 load sign extends, we adjust the code to do the zero extending load, and an
27461 explicit sign extension later since the fusion only covers zero extending
27462 loads.
27463
27464 The operands are:
27465 operands[0] register set with addis (to be replaced with target)
27466 operands[1] value set via addis
27467 operands[2] target register being loaded
27468 operands[3] D-form memory reference using operands[0]. */
27469
27470 void
27471 expand_fusion_gpr_load (rtx *operands)
27472 {
27473 rtx addis_value = operands[1];
27474 rtx target = operands[2];
27475 rtx orig_mem = operands[3];
27476 rtx new_addr, new_mem, orig_addr, offset;
27477 enum rtx_code plus_or_lo_sum;
27478 machine_mode target_mode = GET_MODE (target);
27479 machine_mode extend_mode = target_mode;
27480 machine_mode ptr_mode = Pmode;
27481 enum rtx_code extend = UNKNOWN;
27482
27483 if (GET_CODE (orig_mem) == ZERO_EXTEND
27484 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
27485 {
27486 extend = GET_CODE (orig_mem);
27487 orig_mem = XEXP (orig_mem, 0);
27488 target_mode = GET_MODE (orig_mem);
27489 }
27490
27491 gcc_assert (MEM_P (orig_mem));
27492
27493 orig_addr = XEXP (orig_mem, 0);
27494 plus_or_lo_sum = GET_CODE (orig_addr);
27495 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
27496
27497 offset = XEXP (orig_addr, 1);
27498 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
27499 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
27500
27501 if (extend != UNKNOWN)
27502 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
27503
27504 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
27505 UNSPEC_FUSION_GPR);
27506 emit_insn (gen_rtx_SET (target, new_mem));
27507
27508 if (extend == SIGN_EXTEND)
27509 {
27510 int sub_off = ((BYTES_BIG_ENDIAN)
27511 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
27512 : 0);
27513 rtx sign_reg
27514 = simplify_subreg (target_mode, target, extend_mode, sub_off);
27515
27516 emit_insn (gen_rtx_SET (target,
27517 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
27518 }
27519
27520 return;
27521 }
27522
27523 /* Emit the addis instruction that will be part of a fused instruction
27524 sequence. */
27525
27526 void
27527 emit_fusion_addis (rtx target, rtx addis_value)
27528 {
27529 rtx fuse_ops[10];
27530 const char *addis_str = NULL;
27531
27532 /* Emit the addis instruction. */
27533 fuse_ops[0] = target;
27534 if (satisfies_constraint_L (addis_value))
27535 {
27536 fuse_ops[1] = addis_value;
27537 addis_str = "lis %0,%v1";
27538 }
27539
27540 else if (GET_CODE (addis_value) == PLUS)
27541 {
27542 rtx op0 = XEXP (addis_value, 0);
27543 rtx op1 = XEXP (addis_value, 1);
27544
27545 if (REG_P (op0) && CONST_INT_P (op1)
27546 && satisfies_constraint_L (op1))
27547 {
27548 fuse_ops[1] = op0;
27549 fuse_ops[2] = op1;
27550 addis_str = "addis %0,%1,%v2";
27551 }
27552 }
27553
27554 else if (GET_CODE (addis_value) == HIGH)
27555 {
27556 rtx value = XEXP (addis_value, 0);
27557 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
27558 {
27559 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
27560 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
27561 if (TARGET_ELF)
27562 addis_str = "addis %0,%2,%1@toc@ha";
27563
27564 else if (TARGET_XCOFF)
27565 addis_str = "addis %0,%1@u(%2)";
27566
27567 else
27568 gcc_unreachable ();
27569 }
27570
27571 else if (GET_CODE (value) == PLUS)
27572 {
27573 rtx op0 = XEXP (value, 0);
27574 rtx op1 = XEXP (value, 1);
27575
27576 if (GET_CODE (op0) == UNSPEC
27577 && XINT (op0, 1) == UNSPEC_TOCREL
27578 && CONST_INT_P (op1))
27579 {
27580 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
27581 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
27582 fuse_ops[3] = op1;
27583 if (TARGET_ELF)
27584 addis_str = "addis %0,%2,%1+%3@toc@ha";
27585
27586 else if (TARGET_XCOFF)
27587 addis_str = "addis %0,%1+%3@u(%2)";
27588
27589 else
27590 gcc_unreachable ();
27591 }
27592 }
27593
27594 else if (satisfies_constraint_L (value))
27595 {
27596 fuse_ops[1] = value;
27597 addis_str = "lis %0,%v1";
27598 }
27599
27600 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
27601 {
27602 fuse_ops[1] = value;
27603 addis_str = "lis %0,%1@ha";
27604 }
27605 }
27606
27607 if (!addis_str)
27608 fatal_insn ("Could not generate addis value for fusion", addis_value);
27609
27610 output_asm_insn (addis_str, fuse_ops);
27611 }
27612
27613 /* Emit a D-form load or store instruction that is the second instruction
27614 of a fusion sequence. */
27615
27616 static void
27617 emit_fusion_load (rtx load_reg, rtx addis_reg, rtx offset, const char *insn_str)
27618 {
27619 rtx fuse_ops[10];
27620 char insn_template[80];
27621
27622 fuse_ops[0] = load_reg;
27623 fuse_ops[1] = addis_reg;
27624
27625 if (CONST_INT_P (offset) && satisfies_constraint_I (offset))
27626 {
27627 sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str);
27628 fuse_ops[2] = offset;
27629 output_asm_insn (insn_template, fuse_ops);
27630 }
27631
27632 else if (GET_CODE (offset) == UNSPEC
27633 && XINT (offset, 1) == UNSPEC_TOCREL)
27634 {
27635 if (TARGET_ELF)
27636 sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str);
27637
27638 else if (TARGET_XCOFF)
27639 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
27640
27641 else
27642 gcc_unreachable ();
27643
27644 fuse_ops[2] = XVECEXP (offset, 0, 0);
27645 output_asm_insn (insn_template, fuse_ops);
27646 }
27647
27648 else if (GET_CODE (offset) == PLUS
27649 && GET_CODE (XEXP (offset, 0)) == UNSPEC
27650 && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL
27651 && CONST_INT_P (XEXP (offset, 1)))
27652 {
27653 rtx tocrel_unspec = XEXP (offset, 0);
27654 if (TARGET_ELF)
27655 sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str);
27656
27657 else if (TARGET_XCOFF)
27658 sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str);
27659
27660 else
27661 gcc_unreachable ();
27662
27663 fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0);
27664 fuse_ops[3] = XEXP (offset, 1);
27665 output_asm_insn (insn_template, fuse_ops);
27666 }
27667
27668 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset))
27669 {
27670 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
27671
27672 fuse_ops[2] = offset;
27673 output_asm_insn (insn_template, fuse_ops);
27674 }
27675
27676 else
27677 fatal_insn ("Unable to generate load/store offset for fusion", offset);
27678
27679 return;
27680 }
27681
27682 /* Given an address, convert it into the addis and load offset parts. Addresses
27683 created during the peephole2 process look like:
27684 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
27685 (unspec [(...)] UNSPEC_TOCREL)) */
27686
27687 static void
27688 fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo)
27689 {
27690 rtx hi, lo;
27691
27692 if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
27693 {
27694 hi = XEXP (addr, 0);
27695 lo = XEXP (addr, 1);
27696 }
27697 else
27698 gcc_unreachable ();
27699
27700 *p_hi = hi;
27701 *p_lo = lo;
27702 }
27703
27704 /* Return a string to fuse an addis instruction with a gpr load to the same
27705 register that we loaded up the addis instruction. The address that is used
27706 is the logical address that was formed during peephole2:
27707 (lo_sum (high) (low-part))
27708
27709 The code is complicated, so we call output_asm_insn directly, and just
27710 return "". */
27711
27712 const char *
27713 emit_fusion_gpr_load (rtx target, rtx mem)
27714 {
27715 rtx addis_value;
27716 rtx addr;
27717 rtx load_offset;
27718 const char *load_str = NULL;
27719 machine_mode mode;
27720
27721 if (GET_CODE (mem) == ZERO_EXTEND)
27722 mem = XEXP (mem, 0);
27723
27724 gcc_assert (REG_P (target) && MEM_P (mem));
27725
27726 addr = XEXP (mem, 0);
27727 fusion_split_address (addr, &addis_value, &load_offset);
27728
27729 /* Now emit the load instruction to the same register. */
27730 mode = GET_MODE (mem);
27731 switch (mode)
27732 {
27733 case E_QImode:
27734 load_str = "lbz";
27735 break;
27736
27737 case E_HImode:
27738 load_str = "lhz";
27739 break;
27740
27741 case E_SImode:
27742 case E_SFmode:
27743 load_str = "lwz";
27744 break;
27745
27746 case E_DImode:
27747 case E_DFmode:
27748 gcc_assert (TARGET_POWERPC64);
27749 load_str = "ld";
27750 break;
27751
27752 default:
27753 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem));
27754 }
27755
27756 /* Emit the addis instruction. */
27757 emit_fusion_addis (target, addis_value);
27758
27759 /* Emit the D-form load instruction. */
27760 emit_fusion_load (target, target, load_offset, load_str);
27761
27762 return "";
27763 }
27764 \f
27765 /* This is not inside an #ifdef RS6000_GLIBC_ATOMIC_FENV because gengtype
27766 ignores it then. */
27767 static GTY(()) tree atomic_hold_decl;
27768 static GTY(()) tree atomic_clear_decl;
27769 static GTY(()) tree atomic_update_decl;
27770
27771 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
27772 static void
27773 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
27774 {
27775 if (!TARGET_HARD_FLOAT)
27776 {
27777 #ifdef RS6000_GLIBC_ATOMIC_FENV
27778 if (atomic_hold_decl == NULL_TREE)
27779 {
27780 atomic_hold_decl
27781 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
27782 get_identifier ("__atomic_feholdexcept"),
27783 build_function_type_list (void_type_node,
27784 double_ptr_type_node,
27785 NULL_TREE));
27786 TREE_PUBLIC (atomic_hold_decl) = 1;
27787 DECL_EXTERNAL (atomic_hold_decl) = 1;
27788 }
27789
27790 if (atomic_clear_decl == NULL_TREE)
27791 {
27792 atomic_clear_decl
27793 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
27794 get_identifier ("__atomic_feclearexcept"),
27795 build_function_type_list (void_type_node,
27796 NULL_TREE));
27797 TREE_PUBLIC (atomic_clear_decl) = 1;
27798 DECL_EXTERNAL (atomic_clear_decl) = 1;
27799 }
27800
27801 tree const_double = build_qualified_type (double_type_node,
27802 TYPE_QUAL_CONST);
27803 tree const_double_ptr = build_pointer_type (const_double);
27804 if (atomic_update_decl == NULL_TREE)
27805 {
27806 atomic_update_decl
27807 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
27808 get_identifier ("__atomic_feupdateenv"),
27809 build_function_type_list (void_type_node,
27810 const_double_ptr,
27811 NULL_TREE));
27812 TREE_PUBLIC (atomic_update_decl) = 1;
27813 DECL_EXTERNAL (atomic_update_decl) = 1;
27814 }
27815
27816 tree fenv_var = create_tmp_var_raw (double_type_node);
27817 TREE_ADDRESSABLE (fenv_var) = 1;
27818 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node,
27819 build4 (TARGET_EXPR, double_type_node, fenv_var,
27820 void_node, NULL_TREE, NULL_TREE));
27821
27822 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
27823 *clear = build_call_expr (atomic_clear_decl, 0);
27824 *update = build_call_expr (atomic_update_decl, 1,
27825 fold_convert (const_double_ptr, fenv_addr));
27826 #endif
27827 return;
27828 }
27829
27830 tree mffs = rs6000_builtin_decls[RS6000_BIF_MFFS];
27831 tree mtfsf = rs6000_builtin_decls[RS6000_BIF_MTFSF];
27832 tree call_mffs = build_call_expr (mffs, 0);
27833
27834 /* Generates the equivalent of feholdexcept (&fenv_var)
27835
27836 *fenv_var = __builtin_mffs ();
27837 double fenv_hold;
27838 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
27839 __builtin_mtfsf (0xff, fenv_hold); */
27840
27841 /* Mask to clear everything except for the rounding modes and non-IEEE
27842 arithmetic flag. */
27843 const unsigned HOST_WIDE_INT hold_exception_mask
27844 = HOST_WIDE_INT_C (0xffffffff00000007);
27845
27846 tree fenv_var = create_tmp_var_raw (double_type_node);
27847
27848 tree hold_mffs = build4 (TARGET_EXPR, double_type_node, fenv_var, call_mffs,
27849 NULL_TREE, NULL_TREE);
27850
27851 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
27852 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
27853 build_int_cst (uint64_type_node,
27854 hold_exception_mask));
27855
27856 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
27857 fenv_llu_and);
27858
27859 tree hold_mtfsf = build_call_expr (mtfsf, 2,
27860 build_int_cst (unsigned_type_node, 0xff),
27861 fenv_hold_mtfsf);
27862
27863 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
27864
27865 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
27866
27867 double fenv_clear = __builtin_mffs ();
27868 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
27869 __builtin_mtfsf (0xff, fenv_clear); */
27870
27871 /* Mask to clear everything except for the rounding modes and non-IEEE
27872 arithmetic flag. */
27873 const unsigned HOST_WIDE_INT clear_exception_mask
27874 = HOST_WIDE_INT_C (0xffffffff00000000);
27875
27876 tree fenv_clear = create_tmp_var_raw (double_type_node);
27877
27878 tree clear_mffs = build4 (TARGET_EXPR, double_type_node, fenv_clear,
27879 call_mffs, NULL_TREE, NULL_TREE);
27880
27881 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
27882 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
27883 fenv_clean_llu,
27884 build_int_cst (uint64_type_node,
27885 clear_exception_mask));
27886
27887 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
27888 fenv_clear_llu_and);
27889
27890 tree clear_mtfsf = build_call_expr (mtfsf, 2,
27891 build_int_cst (unsigned_type_node, 0xff),
27892 fenv_clear_mtfsf);
27893
27894 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
27895
27896 /* Generates the equivalent of feupdateenv (&fenv_var)
27897
27898 double old_fenv = __builtin_mffs ();
27899 double fenv_update;
27900 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
27901 (*(uint64_t*)fenv_var 0x1ff80fff);
27902 __builtin_mtfsf (0xff, fenv_update); */
27903
27904 const unsigned HOST_WIDE_INT update_exception_mask
27905 = HOST_WIDE_INT_C (0xffffffff1fffff00);
27906 const unsigned HOST_WIDE_INT new_exception_mask
27907 = HOST_WIDE_INT_C (0x1ff80fff);
27908
27909 tree old_fenv = create_tmp_var_raw (double_type_node);
27910 tree update_mffs = build4 (TARGET_EXPR, double_type_node, old_fenv,
27911 call_mffs, NULL_TREE, NULL_TREE);
27912
27913 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
27914 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
27915 build_int_cst (uint64_type_node,
27916 update_exception_mask));
27917
27918 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
27919 build_int_cst (uint64_type_node,
27920 new_exception_mask));
27921
27922 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
27923 old_llu_and, new_llu_and);
27924
27925 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
27926 new_llu_mask);
27927
27928 tree update_mtfsf = build_call_expr (mtfsf, 2,
27929 build_int_cst (unsigned_type_node, 0xff),
27930 fenv_update_mtfsf);
27931
27932 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
27933 }
27934
27935 void
27936 rs6000_generate_float2_double_code (rtx dst, rtx src1, rtx src2)
27937 {
27938 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
27939
27940 rtx_tmp0 = gen_reg_rtx (V2DFmode);
27941 rtx_tmp1 = gen_reg_rtx (V2DFmode);
27942
27943 /* The destination of the vmrgew instruction layout is:
27944 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
27945 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
27946 vmrgew instruction will be correct. */
27947 if (BYTES_BIG_ENDIAN)
27948 {
27949 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp0, src1, src2,
27950 GEN_INT (0)));
27951 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp1, src1, src2,
27952 GEN_INT (3)));
27953 }
27954 else
27955 {
27956 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (3)));
27957 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (0)));
27958 }
27959
27960 rtx_tmp2 = gen_reg_rtx (V4SFmode);
27961 rtx_tmp3 = gen_reg_rtx (V4SFmode);
27962
27963 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp2, rtx_tmp0));
27964 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp3, rtx_tmp1));
27965
27966 if (BYTES_BIG_ENDIAN)
27967 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
27968 else
27969 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
27970 }
27971
27972 void
27973 rs6000_generate_float2_code (bool signed_convert, rtx dst, rtx src1, rtx src2)
27974 {
27975 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
27976
27977 rtx_tmp0 = gen_reg_rtx (V2DImode);
27978 rtx_tmp1 = gen_reg_rtx (V2DImode);
27979
27980 /* The destination of the vmrgew instruction layout is:
27981 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
27982 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
27983 vmrgew instruction will be correct. */
27984 if (BYTES_BIG_ENDIAN)
27985 {
27986 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp0, src1, src2, GEN_INT (0)));
27987 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp1, src1, src2, GEN_INT (3)));
27988 }
27989 else
27990 {
27991 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp0, src1, src2, GEN_INT (3)));
27992 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp1, src1, src2, GEN_INT (0)));
27993 }
27994
27995 rtx_tmp2 = gen_reg_rtx (V4SFmode);
27996 rtx_tmp3 = gen_reg_rtx (V4SFmode);
27997
27998 if (signed_convert)
27999 {
28000 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp2, rtx_tmp0));
28001 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp3, rtx_tmp1));
28002 }
28003 else
28004 {
28005 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp2, rtx_tmp0));
28006 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp3, rtx_tmp1));
28007 }
28008
28009 if (BYTES_BIG_ENDIAN)
28010 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
28011 else
28012 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
28013 }
28014
28015 void
28016 rs6000_generate_vsigned2_code (bool signed_convert, rtx dst, rtx src1,
28017 rtx src2)
28018 {
28019 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
28020
28021 rtx_tmp0 = gen_reg_rtx (V2DFmode);
28022 rtx_tmp1 = gen_reg_rtx (V2DFmode);
28023
28024 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (0)));
28025 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (3)));
28026
28027 rtx_tmp2 = gen_reg_rtx (V4SImode);
28028 rtx_tmp3 = gen_reg_rtx (V4SImode);
28029
28030 if (signed_convert)
28031 {
28032 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp2, rtx_tmp0));
28033 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp3, rtx_tmp1));
28034 }
28035 else
28036 {
28037 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp2, rtx_tmp0));
28038 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp3, rtx_tmp1));
28039 }
28040
28041 emit_insn (gen_p8_vmrgew_v4si (dst, rtx_tmp2, rtx_tmp3));
28042 }
28043
28044 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
28045
28046 static bool
28047 rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode,
28048 optimization_type opt_type)
28049 {
28050 switch (op)
28051 {
28052 case rsqrt_optab:
28053 return (opt_type == OPTIMIZE_FOR_SPEED
28054 && RS6000_RECIP_AUTO_RSQRTE_P (mode1));
28055
28056 default:
28057 return true;
28058 }
28059 }
28060
28061 /* Implement TARGET_CONSTANT_ALIGNMENT. */
28062
28063 static HOST_WIDE_INT
28064 rs6000_constant_alignment (const_tree exp, HOST_WIDE_INT align)
28065 {
28066 if (TREE_CODE (exp) == STRING_CST
28067 && (STRICT_ALIGNMENT || !optimize_size))
28068 return MAX (align, BITS_PER_WORD);
28069 return align;
28070 }
28071
28072 /* Implement TARGET_STARTING_FRAME_OFFSET. */
28073
28074 static HOST_WIDE_INT
28075 rs6000_starting_frame_offset (void)
28076 {
28077 if (FRAME_GROWS_DOWNWARD)
28078 return 0;
28079 return RS6000_STARTING_FRAME_OFFSET;
28080 }
28081 \f
28082
28083 /* On 64-bit Linux and Freebsd systems, possibly switch the long double library
28084 function names from <foo>l to <foo>f128 if the default long double type is
28085 IEEE 128-bit. Typically, with the C and C++ languages, the standard math.h
28086 include file switches the names on systems that support long double as IEEE
28087 128-bit, but that doesn't work if the user uses __builtin_<foo>l directly.
28088 In the future, glibc will export names like __ieee128_sinf128 and we can
28089 switch to using those instead of using sinf128, which pollutes the user's
28090 namespace.
28091
28092 This will switch the names for Fortran math functions as well (which doesn't
28093 use math.h). However, Fortran needs other changes to the compiler and
28094 library before you can switch the real*16 type at compile time.
28095
28096 We use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change this name. We
28097 only do this transformation if the __float128 type is enabled. This
28098 prevents us from doing the transformation on older 32-bit ports that might
28099 have enabled using IEEE 128-bit floating point as the default long double
28100 type. */
28101
28102 static tree
28103 rs6000_mangle_decl_assembler_name (tree decl, tree id)
28104 {
28105 if (TARGET_FLOAT128_TYPE && TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128
28106 && TREE_CODE (decl) == FUNCTION_DECL
28107 && DECL_IS_UNDECLARED_BUILTIN (decl)
28108 && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
28109 {
28110 size_t len = IDENTIFIER_LENGTH (id);
28111 const char *name = IDENTIFIER_POINTER (id);
28112 char *newname = NULL;
28113
28114 /* See if it is one of the built-in functions with an unusual name. */
28115 switch (DECL_FUNCTION_CODE (decl))
28116 {
28117 case BUILT_IN_DREML:
28118 newname = xstrdup ("__remainderieee128");
28119 break;
28120
28121 case BUILT_IN_GAMMAL:
28122 newname = xstrdup ("__lgammaieee128");
28123 break;
28124
28125 case BUILT_IN_GAMMAL_R:
28126 case BUILT_IN_LGAMMAL_R:
28127 newname = xstrdup ("__lgammaieee128_r");
28128 break;
28129
28130 case BUILT_IN_NEXTTOWARD:
28131 newname = xstrdup ("__nexttoward_to_ieee128");
28132 break;
28133
28134 case BUILT_IN_NEXTTOWARDF:
28135 newname = xstrdup ("__nexttowardf_to_ieee128");
28136 break;
28137
28138 case BUILT_IN_NEXTTOWARDL:
28139 newname = xstrdup ("__nexttowardieee128");
28140 break;
28141
28142 case BUILT_IN_POW10L:
28143 newname = xstrdup ("__exp10ieee128");
28144 break;
28145
28146 case BUILT_IN_SCALBL:
28147 newname = xstrdup ("__scalbieee128");
28148 break;
28149
28150 case BUILT_IN_SIGNIFICANDL:
28151 newname = xstrdup ("__significandieee128");
28152 break;
28153
28154 case BUILT_IN_SINCOSL:
28155 newname = xstrdup ("__sincosieee128");
28156 break;
28157
28158 default:
28159 break;
28160 }
28161
28162 /* Update the __builtin_*printf and __builtin_*scanf functions. */
28163 if (!newname)
28164 {
28165 size_t printf_len = strlen ("printf");
28166 size_t scanf_len = strlen ("scanf");
28167 size_t printf_chk_len = strlen ("printf_chk");
28168
28169 if (len >= printf_len
28170 && strcmp (name + len - printf_len, "printf") == 0)
28171 newname = xasprintf ("__%sieee128", name);
28172
28173 else if (len >= scanf_len
28174 && strcmp (name + len - scanf_len, "scanf") == 0)
28175 newname = xasprintf ("__isoc99_%sieee128", name);
28176
28177 else if (len >= printf_chk_len
28178 && strcmp (name + len - printf_chk_len, "printf_chk") == 0)
28179 newname = xasprintf ("%sieee128", name);
28180
28181 else if (name[len - 1] == 'l')
28182 {
28183 bool uses_ieee128_p = false;
28184 tree type = TREE_TYPE (decl);
28185 machine_mode ret_mode = TYPE_MODE (type);
28186
28187 /* See if the function returns a IEEE 128-bit floating point type or
28188 complex type. */
28189 if (ret_mode == TFmode || ret_mode == TCmode)
28190 uses_ieee128_p = true;
28191 else
28192 {
28193 function_args_iterator args_iter;
28194 tree arg;
28195
28196 /* See if the function passes a IEEE 128-bit floating point type
28197 or complex type. */
28198 FOREACH_FUNCTION_ARGS (type, arg, args_iter)
28199 {
28200 machine_mode arg_mode = TYPE_MODE (arg);
28201 if (arg_mode == TFmode || arg_mode == TCmode)
28202 {
28203 uses_ieee128_p = true;
28204 break;
28205 }
28206 }
28207 }
28208
28209 /* If we passed or returned an IEEE 128-bit floating point type,
28210 change the name. Use __<name>ieee128, instead of <name>l. */
28211 if (uses_ieee128_p)
28212 newname = xasprintf ("__%.*sieee128", (int)(len - 1), name);
28213 }
28214 }
28215
28216 if (newname)
28217 {
28218 if (TARGET_DEBUG_BUILTIN)
28219 fprintf (stderr, "Map %s => %s\n", name, newname);
28220
28221 id = get_identifier (newname);
28222 free (newname);
28223 }
28224 }
28225
28226 return id;
28227 }
28228
28229 /* Predict whether the given loop in gimple will be transformed in the RTL
28230 doloop_optimize pass. */
28231
28232 static bool
28233 rs6000_predict_doloop_p (struct loop *loop)
28234 {
28235 gcc_assert (loop);
28236
28237 /* On rs6000, targetm.can_use_doloop_p is actually
28238 can_use_doloop_if_innermost. Just ensure the loop is innermost. */
28239 if (loop->inner != NULL)
28240 {
28241 if (dump_file && (dump_flags & TDF_DETAILS))
28242 fprintf (dump_file, "Predict doloop failure due to"
28243 " loop nesting.\n");
28244 return false;
28245 }
28246
28247 return true;
28248 }
28249
28250 /* Implement TARGET_PREFERRED_DOLOOP_MODE. */
28251
28252 static machine_mode
28253 rs6000_preferred_doloop_mode (machine_mode)
28254 {
28255 return word_mode;
28256 }
28257
28258 /* Implement TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P. */
28259
28260 static bool
28261 rs6000_cannot_substitute_mem_equiv_p (rtx mem)
28262 {
28263 gcc_assert (MEM_P (mem));
28264
28265 /* curr_insn_transform()'s handling of subregs cannot handle altivec AND:
28266 type addresses, so don't allow MEMs with those address types to be
28267 substituted as an equivalent expression. See PR93974 for details. */
28268 if (GET_CODE (XEXP (mem, 0)) == AND)
28269 return true;
28270
28271 return false;
28272 }
28273
28274 /* Implement TARGET_INVALID_CONVERSION. */
28275
28276 static const char *
28277 rs6000_invalid_conversion (const_tree fromtype, const_tree totype)
28278 {
28279 /* Make sure we're working with the canonical types. */
28280 if (TYPE_CANONICAL (fromtype) != NULL_TREE)
28281 fromtype = TYPE_CANONICAL (fromtype);
28282 if (TYPE_CANONICAL (totype) != NULL_TREE)
28283 totype = TYPE_CANONICAL (totype);
28284
28285 machine_mode frommode = TYPE_MODE (fromtype);
28286 machine_mode tomode = TYPE_MODE (totype);
28287
28288 if (frommode != tomode)
28289 {
28290 /* Do not allow conversions to/from XOmode and OOmode types. */
28291 if (frommode == XOmode)
28292 return N_("invalid conversion from type %<__vector_quad%>");
28293 if (tomode == XOmode)
28294 return N_("invalid conversion to type %<__vector_quad%>");
28295 if (frommode == OOmode)
28296 return N_("invalid conversion from type %<__vector_pair%>");
28297 if (tomode == OOmode)
28298 return N_("invalid conversion to type %<__vector_pair%>");
28299 }
28300 else if (POINTER_TYPE_P (fromtype) && POINTER_TYPE_P (totype))
28301 {
28302 /* We really care about the modes of the base types. */
28303 frommode = TYPE_MODE (TREE_TYPE (fromtype));
28304 tomode = TYPE_MODE (TREE_TYPE (totype));
28305
28306 /* Do not allow conversions to/from XOmode and OOmode pointer
28307 types, except to/from void pointers. */
28308 if (frommode != tomode
28309 && frommode != VOIDmode
28310 && tomode != VOIDmode)
28311 {
28312 if (frommode == XOmode)
28313 return N_("invalid conversion from type %<__vector_quad *%>");
28314 if (tomode == XOmode)
28315 return N_("invalid conversion to type %<__vector_quad *%>");
28316 if (frommode == OOmode)
28317 return N_("invalid conversion from type %<__vector_pair *%>");
28318 if (tomode == OOmode)
28319 return N_("invalid conversion to type %<__vector_pair *%>");
28320 }
28321 }
28322
28323 /* Conversion allowed. */
28324 return NULL;
28325 }
28326
28327 /* Convert a SFmode constant to the integer bit pattern. */
28328
28329 long
28330 rs6000_const_f32_to_i32 (rtx operand)
28331 {
28332 long value;
28333 const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (operand);
28334
28335 gcc_assert (GET_MODE (operand) == SFmode);
28336 REAL_VALUE_TO_TARGET_SINGLE (*rv, value);
28337 return value;
28338 }
28339
28340 void
28341 rs6000_emit_xxspltidp_v2df (rtx dst, long value)
28342 {
28343 if (((value & 0x7F800000) == 0) && ((value & 0x7FFFFF) != 0))
28344 inform (input_location,
28345 "the result for the xxspltidp instruction "
28346 "is undefined for subnormal input values");
28347 emit_insn( gen_xxspltidp_v2df_inst (dst, GEN_INT (value)));
28348 }
28349
28350 /* Implement TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC. */
28351
28352 static bool
28353 rs6000_gen_pic_addr_diff_vec (void)
28354 {
28355 return rs6000_relative_jumptables;
28356 }
28357
28358 void
28359 rs6000_output_addr_vec_elt (FILE *file, int value)
28360 {
28361 const char *directive = TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t";
28362 char buf[100];
28363
28364 fprintf (file, "%s", directive);
28365 ASM_GENERATE_INTERNAL_LABEL (buf, "L", value);
28366 assemble_name (file, buf);
28367 fprintf (file, "\n");
28368 }
28369
28370 \f
28371 /* Copy an integer constant to the vector constant structure. */
28372
28373 static void
28374 constant_int_to_128bit_vector (rtx op,
28375 machine_mode mode,
28376 size_t byte_num,
28377 vec_const_128bit_type *info)
28378 {
28379 unsigned HOST_WIDE_INT uvalue = UINTVAL (op);
28380 unsigned bitsize = GET_MODE_BITSIZE (mode);
28381
28382 for (int shift = bitsize - 8; shift >= 0; shift -= 8)
28383 info->bytes[byte_num++] = (uvalue >> shift) & 0xff;
28384 }
28385
28386 /* Copy a floating point constant to the vector constant structure. */
28387
28388 static void
28389 constant_fp_to_128bit_vector (rtx op,
28390 machine_mode mode,
28391 size_t byte_num,
28392 vec_const_128bit_type *info)
28393 {
28394 unsigned bitsize = GET_MODE_BITSIZE (mode);
28395 unsigned num_words = bitsize / 32;
28396 const REAL_VALUE_TYPE *rtype = CONST_DOUBLE_REAL_VALUE (op);
28397 long real_words[VECTOR_128BIT_WORDS];
28398
28399 /* Make sure we don't overflow the real_words array and that it is
28400 filled completely. */
28401 gcc_assert (num_words <= VECTOR_128BIT_WORDS && (bitsize % 32) == 0);
28402
28403 real_to_target (real_words, rtype, mode);
28404
28405 /* Iterate over each 32-bit word in the floating point constant. The
28406 real_to_target function puts out words in target endian fashion. We need
28407 to arrange the order so that the bytes are written in big endian order. */
28408 for (unsigned num = 0; num < num_words; num++)
28409 {
28410 unsigned endian_num = (BYTES_BIG_ENDIAN
28411 ? num
28412 : num_words - 1 - num);
28413
28414 unsigned uvalue = real_words[endian_num];
28415 for (int shift = 32 - 8; shift >= 0; shift -= 8)
28416 info->bytes[byte_num++] = (uvalue >> shift) & 0xff;
28417 }
28418
28419 /* Mark that this constant involves floating point. */
28420 info->fp_constant_p = true;
28421 }
28422
28423 /* Convert a vector constant OP with mode MODE to a vector 128-bit constant
28424 structure INFO.
28425
28426 Break out the constant out to bytes, half words, words, and double words.
28427 Return true if we have successfully converted the constant.
28428
28429 We handle CONST_INT, CONST_DOUBLE, CONST_VECTOR, and VEC_DUPLICATE of
28430 constants. Integer and floating point scalar constants are splatted to fill
28431 out the vector. */
28432
28433 bool
28434 vec_const_128bit_to_bytes (rtx op,
28435 machine_mode mode,
28436 vec_const_128bit_type *info)
28437 {
28438 /* Initialize the constant structure. */
28439 memset ((void *)info, 0, sizeof (vec_const_128bit_type));
28440
28441 /* Assume CONST_INTs are DImode. */
28442 if (mode == VOIDmode)
28443 mode = CONST_INT_P (op) ? DImode : GET_MODE (op);
28444
28445 if (mode == VOIDmode)
28446 return false;
28447
28448 unsigned size = GET_MODE_SIZE (mode);
28449 bool splat_p = false;
28450
28451 if (size > VECTOR_128BIT_BYTES)
28452 return false;
28453
28454 /* Set up the bits. */
28455 switch (GET_CODE (op))
28456 {
28457 /* Integer constants, default to double word. */
28458 case CONST_INT:
28459 {
28460 constant_int_to_128bit_vector (op, mode, 0, info);
28461 splat_p = true;
28462 break;
28463 }
28464
28465 /* Floating point constants. */
28466 case CONST_DOUBLE:
28467 {
28468 /* Fail if the floating point constant is the wrong mode. */
28469 if (GET_MODE (op) != mode)
28470 return false;
28471
28472 /* SFmode stored as scalars are stored in DFmode format. */
28473 if (mode == SFmode)
28474 {
28475 mode = DFmode;
28476 size = GET_MODE_SIZE (DFmode);
28477 }
28478
28479 constant_fp_to_128bit_vector (op, mode, 0, info);
28480 splat_p = true;
28481 break;
28482 }
28483
28484 /* Vector constants, iterate over each element. On little endian
28485 systems, we have to reverse the element numbers. */
28486 case CONST_VECTOR:
28487 {
28488 /* Fail if the vector constant is the wrong mode or size. */
28489 if (GET_MODE (op) != mode
28490 || GET_MODE_SIZE (mode) != VECTOR_128BIT_BYTES)
28491 return false;
28492
28493 machine_mode ele_mode = GET_MODE_INNER (mode);
28494 size_t ele_size = GET_MODE_SIZE (ele_mode);
28495 size_t nunits = GET_MODE_NUNITS (mode);
28496
28497 for (size_t num = 0; num < nunits; num++)
28498 {
28499 rtx ele = CONST_VECTOR_ELT (op, num);
28500 size_t byte_num = (BYTES_BIG_ENDIAN
28501 ? num
28502 : nunits - 1 - num) * ele_size;
28503
28504 if (CONST_INT_P (ele))
28505 constant_int_to_128bit_vector (ele, ele_mode, byte_num, info);
28506 else if (CONST_DOUBLE_P (ele))
28507 constant_fp_to_128bit_vector (ele, ele_mode, byte_num, info);
28508 else
28509 return false;
28510 }
28511
28512 break;
28513 }
28514
28515 /* Treat VEC_DUPLICATE of a constant just like a vector constant.
28516 Since we are duplicating the element, we don't have to worry about
28517 endian issues. */
28518 case VEC_DUPLICATE:
28519 {
28520 /* Fail if the vector duplicate is the wrong mode or size. */
28521 if (GET_MODE (op) != mode
28522 || GET_MODE_SIZE (mode) != VECTOR_128BIT_BYTES)
28523 return false;
28524
28525 machine_mode ele_mode = GET_MODE_INNER (mode);
28526 size_t ele_size = GET_MODE_SIZE (ele_mode);
28527 rtx ele = XEXP (op, 0);
28528 size_t nunits = GET_MODE_NUNITS (mode);
28529
28530 if (!CONST_INT_P (ele) && !CONST_DOUBLE_P (ele))
28531 return false;
28532
28533 for (size_t num = 0; num < nunits; num++)
28534 {
28535 size_t byte_num = num * ele_size;
28536
28537 if (CONST_INT_P (ele))
28538 constant_int_to_128bit_vector (ele, ele_mode, byte_num, info);
28539 else
28540 constant_fp_to_128bit_vector (ele, ele_mode, byte_num, info);
28541 }
28542
28543 break;
28544 }
28545
28546 /* Any thing else, just return failure. */
28547 default:
28548 return false;
28549 }
28550
28551 /* Splat the constant to fill 128 bits if desired. */
28552 if (splat_p && size < VECTOR_128BIT_BYTES)
28553 {
28554 if ((VECTOR_128BIT_BYTES % size) != 0)
28555 return false;
28556
28557 for (size_t offset = size;
28558 offset < VECTOR_128BIT_BYTES;
28559 offset += size)
28560 memcpy ((void *) &info->bytes[offset],
28561 (void *) &info->bytes[0],
28562 size);
28563 }
28564
28565 /* Remember original size. */
28566 info->original_size = size;
28567
28568 /* Determine if the bytes are all the same. */
28569 unsigned char first_byte = info->bytes[0];
28570 info->all_bytes_same = true;
28571 for (size_t i = 1; i < VECTOR_128BIT_BYTES; i++)
28572 if (first_byte != info->bytes[i])
28573 {
28574 info->all_bytes_same = false;
28575 break;
28576 }
28577
28578 /* Pack half words together & determine if all of the half words are the
28579 same. */
28580 for (size_t i = 0; i < VECTOR_128BIT_HALF_WORDS; i++)
28581 info->half_words[i] = ((info->bytes[i * 2] << 8)
28582 | info->bytes[(i * 2) + 1]);
28583
28584 unsigned short first_hword = info->half_words[0];
28585 info->all_half_words_same = true;
28586 for (size_t i = 1; i < VECTOR_128BIT_HALF_WORDS; i++)
28587 if (first_hword != info->half_words[i])
28588 {
28589 info->all_half_words_same = false;
28590 break;
28591 }
28592
28593 /* Pack words together & determine if all of the words are the same. */
28594 for (size_t i = 0; i < VECTOR_128BIT_WORDS; i++)
28595 info->words[i] = ((info->bytes[i * 4] << 24)
28596 | (info->bytes[(i * 4) + 1] << 16)
28597 | (info->bytes[(i * 4) + 2] << 8)
28598 | info->bytes[(i * 4) + 3]);
28599
28600 info->all_words_same
28601 = (info->words[0] == info->words[1]
28602 && info->words[0] == info->words[1]
28603 && info->words[0] == info->words[2]
28604 && info->words[0] == info->words[3]);
28605
28606 /* Pack double words together & determine if all of the double words are the
28607 same. */
28608 for (size_t i = 0; i < VECTOR_128BIT_DOUBLE_WORDS; i++)
28609 {
28610 unsigned HOST_WIDE_INT d_word = 0;
28611 for (size_t j = 0; j < 8; j++)
28612 d_word = (d_word << 8) | info->bytes[(i * 8) + j];
28613
28614 info->double_words[i] = d_word;
28615 }
28616
28617 info->all_double_words_same
28618 = (info->double_words[0] == info->double_words[1]);
28619
28620 return true;
28621 }
28622
28623 /* Determine if an IEEE 128-bit constant can be loaded with LXVKQ. Return zero
28624 if the LXVKQ instruction cannot be used. Otherwise return the immediate
28625 value to be used with the LXVKQ instruction. */
28626
28627 unsigned
28628 constant_generates_lxvkq (vec_const_128bit_type *vsx_const)
28629 {
28630 /* Is the instruction supported with power10 code generation, IEEE 128-bit
28631 floating point hardware and VSX registers are available. */
28632 if (!TARGET_IEEE128_CONSTANT || !TARGET_FLOAT128_HW || !TARGET_POWER10
28633 || !TARGET_VSX)
28634 return 0;
28635
28636 /* All of the constants that are generated by LXVKQ have the bottom 3 words
28637 that are 0. */
28638 if (vsx_const->words[1] != 0
28639 || vsx_const->words[2] != 0
28640 || vsx_const->words[3] != 0)
28641 return 0;
28642
28643 /* See if we have a match for the first word. */
28644 switch (vsx_const->words[0])
28645 {
28646 case 0x3FFF0000U: return 1; /* IEEE 128-bit +1.0. */
28647 case 0x40000000U: return 2; /* IEEE 128-bit +2.0. */
28648 case 0x40008000U: return 3; /* IEEE 128-bit +3.0. */
28649 case 0x40010000U: return 4; /* IEEE 128-bit +4.0. */
28650 case 0x40014000U: return 5; /* IEEE 128-bit +5.0. */
28651 case 0x40018000U: return 6; /* IEEE 128-bit +6.0. */
28652 case 0x4001C000U: return 7; /* IEEE 128-bit +7.0. */
28653 case 0x7FFF0000U: return 8; /* IEEE 128-bit +Infinity. */
28654 case 0x7FFF8000U: return 9; /* IEEE 128-bit quiet NaN. */
28655 case 0x80000000U: return 16; /* IEEE 128-bit -0.0. */
28656 case 0xBFFF0000U: return 17; /* IEEE 128-bit -1.0. */
28657 case 0xC0000000U: return 18; /* IEEE 128-bit -2.0. */
28658 case 0xC0008000U: return 19; /* IEEE 128-bit -3.0. */
28659 case 0xC0010000U: return 20; /* IEEE 128-bit -4.0. */
28660 case 0xC0014000U: return 21; /* IEEE 128-bit -5.0. */
28661 case 0xC0018000U: return 22; /* IEEE 128-bit -6.0. */
28662 case 0xC001C000U: return 23; /* IEEE 128-bit -7.0. */
28663 case 0xFFFF0000U: return 24; /* IEEE 128-bit -Infinity. */
28664
28665 /* anything else cannot be loaded. */
28666 default:
28667 break;
28668 }
28669
28670 return 0;
28671 }
28672
28673 /* Determine if a vector constant can be loaded with XXSPLTIW. Return zero if
28674 the XXSPLTIW instruction cannot be used. Otherwise return the immediate
28675 value to be used with the XXSPLTIW instruction. */
28676
28677 unsigned
28678 constant_generates_xxspltiw (vec_const_128bit_type *vsx_const)
28679 {
28680 if (!TARGET_SPLAT_WORD_CONSTANT || !TARGET_PREFIXED || !TARGET_VSX)
28681 return 0;
28682
28683 if (!vsx_const->all_words_same)
28684 return 0;
28685
28686 /* If we can use XXSPLTIB, don't generate XXSPLTIW. */
28687 if (vsx_const->all_bytes_same)
28688 return 0;
28689
28690 /* See if we can use VSPLTISH or VSPLTISW. */
28691 if (vsx_const->all_half_words_same)
28692 {
28693 unsigned short h_word = vsx_const->half_words[0];
28694 short sign_h_word = ((h_word & 0xffff) ^ 0x8000) - 0x8000;
28695 if (EASY_VECTOR_15 (sign_h_word))
28696 return 0;
28697 }
28698
28699 unsigned int word = vsx_const->words[0];
28700 int sign_word = ((word & 0xffffffff) ^ 0x80000000) - 0x80000000;
28701 if (EASY_VECTOR_15 (sign_word))
28702 return 0;
28703
28704 return vsx_const->words[0];
28705 }
28706
28707 /* Determine if a vector constant can be loaded with XXSPLTIDP. Return zero if
28708 the XXSPLTIDP instruction cannot be used. Otherwise return the immediate
28709 value to be used with the XXSPLTIDP instruction. */
28710
28711 unsigned
28712 constant_generates_xxspltidp (vec_const_128bit_type *vsx_const)
28713 {
28714 if (!TARGET_SPLAT_FLOAT_CONSTANT || !TARGET_PREFIXED || !TARGET_VSX)
28715 return 0;
28716
28717 /* Reject if the two 64-bit segments are not the same. */
28718 if (!vsx_const->all_double_words_same)
28719 return 0;
28720
28721 /* If the bytes, half words, or words are all the same, don't use XXSPLTIDP.
28722 Use a simpler instruction (XXSPLTIB, VSPLTISB, VSPLTISH, or VSPLTISW). */
28723 if (vsx_const->all_bytes_same
28724 || vsx_const->all_half_words_same
28725 || vsx_const->all_words_same)
28726 return 0;
28727
28728 unsigned HOST_WIDE_INT value = vsx_const->double_words[0];
28729
28730 /* Avoid values that look like DFmode NaN's, except for the normal NaN bit
28731 pattern and the signalling NaN bit pattern. Recognize infinity and
28732 negative infinity. */
28733
28734 /* Bit representation of DFmode normal quiet NaN. */
28735 #define RS6000_CONST_DF_NAN HOST_WIDE_INT_UC (0x7ff8000000000000)
28736
28737 /* Bit representation of DFmode normal signaling NaN. */
28738 #define RS6000_CONST_DF_NANS HOST_WIDE_INT_UC (0x7ff4000000000000)
28739
28740 /* Bit representation of DFmode positive infinity. */
28741 #define RS6000_CONST_DF_INF HOST_WIDE_INT_UC (0x7ff0000000000000)
28742
28743 /* Bit representation of DFmode negative infinity. */
28744 #define RS6000_CONST_DF_NEG_INF HOST_WIDE_INT_UC (0xfff0000000000000)
28745
28746 if (value != RS6000_CONST_DF_NAN
28747 && value != RS6000_CONST_DF_NANS
28748 && value != RS6000_CONST_DF_INF
28749 && value != RS6000_CONST_DF_NEG_INF)
28750 {
28751 /* The IEEE 754 64-bit floating format has 1 bit for sign, 11 bits for
28752 the exponent, and 52 bits for the mantissa (not counting the hidden
28753 bit used for normal numbers). NaN values have the exponent set to all
28754 1 bits, and the mantissa non-zero (mantissa == 0 is infinity). */
28755
28756 int df_exponent = (value >> 52) & 0x7ff;
28757 unsigned HOST_WIDE_INT
28758 df_mantissa = value & ((HOST_WIDE_INT_1U << 52) - HOST_WIDE_INT_1U);
28759
28760 if (df_exponent == 0x7ff && df_mantissa != 0) /* other NaNs. */
28761 return 0;
28762
28763 /* Avoid values that are DFmode subnormal values. Subnormal numbers have
28764 the exponent all 0 bits, and the mantissa non-zero. If the value is
28765 subnormal, then the hidden bit in the mantissa is not set. */
28766 if (df_exponent == 0 && df_mantissa != 0) /* subnormal. */
28767 return 0;
28768 }
28769
28770 /* Change the representation to DFmode constant. */
28771 long df_words[2] = { vsx_const->words[0], vsx_const->words[1] };
28772
28773 /* real_from_target takes the target words in target order. */
28774 if (!BYTES_BIG_ENDIAN)
28775 std::swap (df_words[0], df_words[1]);
28776
28777 REAL_VALUE_TYPE rv_type;
28778 real_from_target (&rv_type, df_words, DFmode);
28779
28780 const REAL_VALUE_TYPE *rv = &rv_type;
28781
28782 /* Validate that the number can be stored as a SFmode value. */
28783 if (!exact_real_truncate (SFmode, rv))
28784 return 0;
28785
28786 /* Validate that the number is not a SFmode subnormal value (exponent is 0,
28787 mantissa field is non-zero) which is undefined for the XXSPLTIDP
28788 instruction. */
28789 long sf_value;
28790 real_to_target (&sf_value, rv, SFmode);
28791
28792 /* IEEE 754 32-bit values have 1 bit for the sign, 8 bits for the exponent,
28793 and 23 bits for the mantissa. Subnormal numbers have the exponent all
28794 0 bits, and the mantissa non-zero. */
28795 long sf_exponent = (sf_value >> 23) & 0xFF;
28796 long sf_mantissa = sf_value & 0x7FFFFF;
28797
28798 if (sf_exponent == 0 && sf_mantissa != 0)
28799 return 0;
28800
28801 /* Return the immediate to be used. */
28802 return sf_value;
28803 }
28804
28805 \f
28806 struct gcc_target targetm = TARGET_INITIALIZER;
28807
28808 #include "gt-rs6000.h"
This page took 1.394221 seconds and 5 git commands to generate.