]> gcc.gnu.org Git - gcc.git/blob - gcc/config/rs6000/rs6000.cc
rs6000: Consolidate target built-ins code
[gcc.git] / gcc / config / rs6000 / rs6000.cc
1 // SPDX-License-Identifier: GPL-3.0-or-later
2 /* Subroutines used for code generation on IBM RS/6000.
3 Copyright (C) 1991-2022 Free Software Foundation, Inc.
4 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published
10 by the Free Software Foundation; either version 3, or (at your
11 option) any later version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
16 License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #define IN_TARGET_CODE 1
23
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "backend.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "memmodel.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "cfgloop.h"
34 #include "df.h"
35 #include "tm_p.h"
36 #include "stringpool.h"
37 #include "expmed.h"
38 #include "optabs.h"
39 #include "regs.h"
40 #include "ira.h"
41 #include "recog.h"
42 #include "cgraph.h"
43 #include "diagnostic-core.h"
44 #include "insn-attr.h"
45 #include "flags.h"
46 #include "alias.h"
47 #include "fold-const.h"
48 #include "attribs.h"
49 #include "stor-layout.h"
50 #include "calls.h"
51 #include "print-tree.h"
52 #include "varasm.h"
53 #include "explow.h"
54 #include "expr.h"
55 #include "output.h"
56 #include "common/common-target.h"
57 #include "langhooks.h"
58 #include "reload.h"
59 #include "sched-int.h"
60 #include "gimplify.h"
61 #include "gimple-fold.h"
62 #include "gimple-iterator.h"
63 #include "gimple-walk.h"
64 #include "ssa.h"
65 #include "tree-vectorizer.h"
66 #include "tree-ssa-propagate.h"
67 #include "intl.h"
68 #include "tm-constrs.h"
69 #include "target-globals.h"
70 #include "builtins.h"
71 #include "tree-vector-builder.h"
72 #include "context.h"
73 #include "tree-pass.h"
74 #include "symbol-summary.h"
75 #include "ipa-prop.h"
76 #include "ipa-fnsummary.h"
77 #include "except.h"
78 #if TARGET_XCOFF
79 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
80 #endif
81 #include "case-cfn-macros.h"
82 #include "ppc-auxv.h"
83 #include "rs6000-internal.h"
84 #include "opts.h"
85
86 /* This file should be included last. */
87 #include "target-def.h"
88
89 extern tree rs6000_builtin_mask_for_load (void);
90 extern tree rs6000_builtin_md_vectorized_function (tree, tree, tree);
91 extern tree rs6000_builtin_reciprocal (tree);
92
93 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
94 systems will also set long double to be IEEE 128-bit. AIX and Darwin
95 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
96 those systems will not pick up this default. This needs to be after all
97 of the include files, so that POWERPC_LINUX and POWERPC_FREEBSD are
98 properly defined. */
99 #ifndef TARGET_IEEEQUAD_DEFAULT
100 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
101 #define TARGET_IEEEQUAD_DEFAULT 1
102 #else
103 #define TARGET_IEEEQUAD_DEFAULT 0
104 #endif
105 #endif
106
107 /* Don't enable PC-relative addressing if the target does not support it. */
108 #ifndef PCREL_SUPPORTED_BY_OS
109 #define PCREL_SUPPORTED_BY_OS 0
110 #endif
111
112 #ifdef USING_ELFOS_H
113 /* Counter for labels which are to be placed in .fixup. */
114 int fixuplabelno = 0;
115 #endif
116
117 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
118 int dot_symbols;
119
120 /* Specify the machine mode that pointers have. After generation of rtl, the
121 compiler makes no further distinction between pointers and any other objects
122 of this machine mode. */
123 scalar_int_mode rs6000_pmode;
124
125 /* Track use of r13 in 64bit AIX TLS. */
126 static bool xcoff_tls_exec_model_detected = false;
127
128 /* Width in bits of a pointer. */
129 unsigned rs6000_pointer_size;
130
131 #ifdef HAVE_AS_GNU_ATTRIBUTE
132 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
133 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
134 # endif
135 /* Flag whether floating point values have been passed/returned.
136 Note that this doesn't say whether fprs are used, since the
137 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
138 should be set for soft-float values passed in gprs and ieee128
139 values passed in vsx registers. */
140 bool rs6000_passes_float = false;
141 bool rs6000_passes_long_double = false;
142 /* Flag whether vector values have been passed/returned. */
143 bool rs6000_passes_vector = false;
144 /* Flag whether small (<= 8 byte) structures have been returned. */
145 bool rs6000_returns_struct = false;
146 #endif
147
148 /* Value is TRUE if register/mode pair is acceptable. */
149 static bool rs6000_hard_regno_mode_ok_p
150 [NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
151
152 /* Maximum number of registers needed for a given register class and mode. */
153 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
154
155 /* How many registers are needed for a given register and mode. */
156 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
157
158 /* Map register number to register class. */
159 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
160
161 static int dbg_cost_ctrl;
162
163 /* Flag to say the TOC is initialized */
164 int toc_initialized, need_toc_init;
165 char toc_label_name[10];
166
167 /* Cached value of rs6000_variable_issue. This is cached in
168 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
169 static short cached_can_issue_more;
170
171 static GTY(()) section *read_only_data_section;
172 static GTY(()) section *private_data_section;
173 static GTY(()) section *tls_data_section;
174 static GTY(()) section *tls_private_data_section;
175 static GTY(()) section *read_only_private_data_section;
176 static GTY(()) section *sdata2_section;
177
178 section *toc_section = 0;
179
180 /* Describe the vector unit used for modes. */
181 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
182 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
183
184 /* Register classes for various constraints that are based on the target
185 switches. */
186 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
187
188 /* Describe the alignment of a vector. */
189 int rs6000_vector_align[NUM_MACHINE_MODES];
190
191 /* What modes to automatically generate reciprocal divide estimate (fre) and
192 reciprocal sqrt (frsqrte) for. */
193 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
194
195 /* Masks to determine which reciprocal esitmate instructions to generate
196 automatically. */
197 enum rs6000_recip_mask {
198 RECIP_SF_DIV = 0x001, /* Use divide estimate */
199 RECIP_DF_DIV = 0x002,
200 RECIP_V4SF_DIV = 0x004,
201 RECIP_V2DF_DIV = 0x008,
202
203 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
204 RECIP_DF_RSQRT = 0x020,
205 RECIP_V4SF_RSQRT = 0x040,
206 RECIP_V2DF_RSQRT = 0x080,
207
208 /* Various combination of flags for -mrecip=xxx. */
209 RECIP_NONE = 0,
210 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
211 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
212 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
213
214 RECIP_HIGH_PRECISION = RECIP_ALL,
215
216 /* On low precision machines like the power5, don't enable double precision
217 reciprocal square root estimate, since it isn't accurate enough. */
218 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
219 };
220
221 /* -mrecip options. */
222 static struct
223 {
224 const char *string; /* option name */
225 unsigned int mask; /* mask bits to set */
226 } recip_options[] = {
227 { "all", RECIP_ALL },
228 { "none", RECIP_NONE },
229 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
230 | RECIP_V2DF_DIV) },
231 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
232 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
233 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
234 | RECIP_V2DF_RSQRT) },
235 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
236 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
237 };
238
239 /* On PowerPC, we have a limited number of target clones that we care about
240 which means we can use an array to hold the options, rather than having more
241 elaborate data structures to identify each possible variation. Order the
242 clones from the default to the highest ISA. */
243 enum {
244 CLONE_DEFAULT = 0, /* default clone. */
245 CLONE_ISA_2_05, /* ISA 2.05 (power6). */
246 CLONE_ISA_2_06, /* ISA 2.06 (power7). */
247 CLONE_ISA_2_07, /* ISA 2.07 (power8). */
248 CLONE_ISA_3_00, /* ISA 3.0 (power9). */
249 CLONE_ISA_3_1, /* ISA 3.1 (power10). */
250 CLONE_MAX
251 };
252
253 /* Map compiler ISA bits into HWCAP names. */
254 struct clone_map {
255 HOST_WIDE_INT isa_mask; /* rs6000_isa mask */
256 const char *name; /* name to use in __builtin_cpu_supports. */
257 };
258
259 static const struct clone_map rs6000_clone_map[CLONE_MAX] = {
260 { 0, "" }, /* Default options. */
261 { OPTION_MASK_CMPB, "arch_2_05" }, /* ISA 2.05 (power6). */
262 { OPTION_MASK_POPCNTD, "arch_2_06" }, /* ISA 2.06 (power7). */
263 { OPTION_MASK_P8_VECTOR, "arch_2_07" }, /* ISA 2.07 (power8). */
264 { OPTION_MASK_P9_VECTOR, "arch_3_00" }, /* ISA 3.0 (power9). */
265 { OPTION_MASK_POWER10, "arch_3_1" }, /* ISA 3.1 (power10). */
266 };
267
268
269 /* Newer LIBCs explicitly export this symbol to declare that they provide
270 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
271 reference to this symbol whenever we expand a CPU builtin, so that
272 we never link against an old LIBC. */
273 const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform";
274
275 /* True if we have expanded a CPU builtin. */
276 bool cpu_builtin_p = false;
277
278 /* Pointer to function (in rs6000-c.cc) that can define or undefine target
279 macros that have changed. Languages that don't support the preprocessor
280 don't link in rs6000-c.cc, so we can't call it directly. */
281 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
282
283 /* Simplfy register classes into simpler classifications. We assume
284 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
285 check for standard register classes (gpr/floating/altivec/vsx) and
286 floating/vector classes (float/altivec/vsx). */
287
288 enum rs6000_reg_type {
289 NO_REG_TYPE,
290 PSEUDO_REG_TYPE,
291 GPR_REG_TYPE,
292 VSX_REG_TYPE,
293 ALTIVEC_REG_TYPE,
294 FPR_REG_TYPE,
295 SPR_REG_TYPE,
296 CR_REG_TYPE
297 };
298
299 /* Map register class to register type. */
300 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
301
302 /* First/last register type for the 'normal' register types (i.e. general
303 purpose, floating point, altivec, and VSX registers). */
304 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
305
306 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
307
308
309 /* Register classes we care about in secondary reload or go if legitimate
310 address. We only need to worry about GPR, FPR, and Altivec registers here,
311 along an ANY field that is the OR of the 3 register classes. */
312
313 enum rs6000_reload_reg_type {
314 RELOAD_REG_GPR, /* General purpose registers. */
315 RELOAD_REG_FPR, /* Traditional floating point regs. */
316 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
317 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
318 N_RELOAD_REG
319 };
320
321 /* For setting up register classes, loop through the 3 register classes mapping
322 into real registers, and skip the ANY class, which is just an OR of the
323 bits. */
324 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
325 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
326
327 /* Map reload register type to a register in the register class. */
328 struct reload_reg_map_type {
329 const char *name; /* Register class name. */
330 int reg; /* Register in the register class. */
331 };
332
333 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
334 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
335 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
336 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
337 { "Any", -1 }, /* RELOAD_REG_ANY. */
338 };
339
340 /* Mask bits for each register class, indexed per mode. Historically the
341 compiler has been more restrictive which types can do PRE_MODIFY instead of
342 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
343 typedef unsigned char addr_mask_type;
344
345 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
346 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
347 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
348 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
349 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
350 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
351 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
352 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
353
354 /* Register type masks based on the type, of valid addressing modes. */
355 struct rs6000_reg_addr {
356 enum insn_code reload_load; /* INSN to reload for loading. */
357 enum insn_code reload_store; /* INSN to reload for storing. */
358 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
359 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
360 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
361 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
362 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
363 };
364
365 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
366
367 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
368 static inline bool
369 mode_supports_pre_incdec_p (machine_mode mode)
370 {
371 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
372 != 0);
373 }
374
375 /* Helper function to say whether a mode supports PRE_MODIFY. */
376 static inline bool
377 mode_supports_pre_modify_p (machine_mode mode)
378 {
379 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
380 != 0);
381 }
382
383 /* Return true if we have D-form addressing in altivec registers. */
384 static inline bool
385 mode_supports_vmx_dform (machine_mode mode)
386 {
387 return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0);
388 }
389
390 /* Return true if we have D-form addressing in VSX registers. This addressing
391 is more limited than normal d-form addressing in that the offset must be
392 aligned on a 16-byte boundary. */
393 static inline bool
394 mode_supports_dq_form (machine_mode mode)
395 {
396 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET)
397 != 0);
398 }
399
400 /* Given that there exists at least one variable that is set (produced)
401 by OUT_INSN and read (consumed) by IN_INSN, return true iff
402 IN_INSN represents one or more memory store operations and none of
403 the variables set by OUT_INSN is used by IN_INSN as the address of a
404 store operation. If either IN_INSN or OUT_INSN does not represent
405 a "single" RTL SET expression (as loosely defined by the
406 implementation of the single_set function) or a PARALLEL with only
407 SETs, CLOBBERs, and USEs inside, this function returns false.
408
409 This rs6000-specific version of store_data_bypass_p checks for
410 certain conditions that result in assertion failures (and internal
411 compiler errors) in the generic store_data_bypass_p function and
412 returns false rather than calling store_data_bypass_p if one of the
413 problematic conditions is detected. */
414
415 int
416 rs6000_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
417 {
418 rtx out_set, in_set;
419 rtx out_pat, in_pat;
420 rtx out_exp, in_exp;
421 int i, j;
422
423 in_set = single_set (in_insn);
424 if (in_set)
425 {
426 if (MEM_P (SET_DEST (in_set)))
427 {
428 out_set = single_set (out_insn);
429 if (!out_set)
430 {
431 out_pat = PATTERN (out_insn);
432 if (GET_CODE (out_pat) == PARALLEL)
433 {
434 for (i = 0; i < XVECLEN (out_pat, 0); i++)
435 {
436 out_exp = XVECEXP (out_pat, 0, i);
437 if ((GET_CODE (out_exp) == CLOBBER)
438 || (GET_CODE (out_exp) == USE))
439 continue;
440 else if (GET_CODE (out_exp) != SET)
441 return false;
442 }
443 }
444 }
445 }
446 }
447 else
448 {
449 in_pat = PATTERN (in_insn);
450 if (GET_CODE (in_pat) != PARALLEL)
451 return false;
452
453 for (i = 0; i < XVECLEN (in_pat, 0); i++)
454 {
455 in_exp = XVECEXP (in_pat, 0, i);
456 if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE))
457 continue;
458 else if (GET_CODE (in_exp) != SET)
459 return false;
460
461 if (MEM_P (SET_DEST (in_exp)))
462 {
463 out_set = single_set (out_insn);
464 if (!out_set)
465 {
466 out_pat = PATTERN (out_insn);
467 if (GET_CODE (out_pat) != PARALLEL)
468 return false;
469 for (j = 0; j < XVECLEN (out_pat, 0); j++)
470 {
471 out_exp = XVECEXP (out_pat, 0, j);
472 if ((GET_CODE (out_exp) == CLOBBER)
473 || (GET_CODE (out_exp) == USE))
474 continue;
475 else if (GET_CODE (out_exp) != SET)
476 return false;
477 }
478 }
479 }
480 }
481 }
482 return store_data_bypass_p (out_insn, in_insn);
483 }
484
485 \f
486 /* Processor costs (relative to an add) */
487
488 const struct processor_costs *rs6000_cost;
489
490 /* Instruction size costs on 32bit processors. */
491 static const
492 struct processor_costs size32_cost = {
493 COSTS_N_INSNS (1), /* mulsi */
494 COSTS_N_INSNS (1), /* mulsi_const */
495 COSTS_N_INSNS (1), /* mulsi_const9 */
496 COSTS_N_INSNS (1), /* muldi */
497 COSTS_N_INSNS (1), /* divsi */
498 COSTS_N_INSNS (1), /* divdi */
499 COSTS_N_INSNS (1), /* fp */
500 COSTS_N_INSNS (1), /* dmul */
501 COSTS_N_INSNS (1), /* sdiv */
502 COSTS_N_INSNS (1), /* ddiv */
503 32, /* cache line size */
504 0, /* l1 cache */
505 0, /* l2 cache */
506 0, /* streams */
507 0, /* SF->DF convert */
508 };
509
510 /* Instruction size costs on 64bit processors. */
511 static const
512 struct processor_costs size64_cost = {
513 COSTS_N_INSNS (1), /* mulsi */
514 COSTS_N_INSNS (1), /* mulsi_const */
515 COSTS_N_INSNS (1), /* mulsi_const9 */
516 COSTS_N_INSNS (1), /* muldi */
517 COSTS_N_INSNS (1), /* divsi */
518 COSTS_N_INSNS (1), /* divdi */
519 COSTS_N_INSNS (1), /* fp */
520 COSTS_N_INSNS (1), /* dmul */
521 COSTS_N_INSNS (1), /* sdiv */
522 COSTS_N_INSNS (1), /* ddiv */
523 128, /* cache line size */
524 0, /* l1 cache */
525 0, /* l2 cache */
526 0, /* streams */
527 0, /* SF->DF convert */
528 };
529
530 /* Instruction costs on RS64A processors. */
531 static const
532 struct processor_costs rs64a_cost = {
533 COSTS_N_INSNS (20), /* mulsi */
534 COSTS_N_INSNS (12), /* mulsi_const */
535 COSTS_N_INSNS (8), /* mulsi_const9 */
536 COSTS_N_INSNS (34), /* muldi */
537 COSTS_N_INSNS (65), /* divsi */
538 COSTS_N_INSNS (67), /* divdi */
539 COSTS_N_INSNS (4), /* fp */
540 COSTS_N_INSNS (4), /* dmul */
541 COSTS_N_INSNS (31), /* sdiv */
542 COSTS_N_INSNS (31), /* ddiv */
543 128, /* cache line size */
544 128, /* l1 cache */
545 2048, /* l2 cache */
546 1, /* streams */
547 0, /* SF->DF convert */
548 };
549
550 /* Instruction costs on MPCCORE processors. */
551 static const
552 struct processor_costs mpccore_cost = {
553 COSTS_N_INSNS (2), /* mulsi */
554 COSTS_N_INSNS (2), /* mulsi_const */
555 COSTS_N_INSNS (2), /* mulsi_const9 */
556 COSTS_N_INSNS (2), /* muldi */
557 COSTS_N_INSNS (6), /* divsi */
558 COSTS_N_INSNS (6), /* divdi */
559 COSTS_N_INSNS (4), /* fp */
560 COSTS_N_INSNS (5), /* dmul */
561 COSTS_N_INSNS (10), /* sdiv */
562 COSTS_N_INSNS (17), /* ddiv */
563 32, /* cache line size */
564 4, /* l1 cache */
565 16, /* l2 cache */
566 1, /* streams */
567 0, /* SF->DF convert */
568 };
569
570 /* Instruction costs on PPC403 processors. */
571 static const
572 struct processor_costs ppc403_cost = {
573 COSTS_N_INSNS (4), /* mulsi */
574 COSTS_N_INSNS (4), /* mulsi_const */
575 COSTS_N_INSNS (4), /* mulsi_const9 */
576 COSTS_N_INSNS (4), /* muldi */
577 COSTS_N_INSNS (33), /* divsi */
578 COSTS_N_INSNS (33), /* divdi */
579 COSTS_N_INSNS (11), /* fp */
580 COSTS_N_INSNS (11), /* dmul */
581 COSTS_N_INSNS (11), /* sdiv */
582 COSTS_N_INSNS (11), /* ddiv */
583 32, /* cache line size */
584 4, /* l1 cache */
585 16, /* l2 cache */
586 1, /* streams */
587 0, /* SF->DF convert */
588 };
589
590 /* Instruction costs on PPC405 processors. */
591 static const
592 struct processor_costs ppc405_cost = {
593 COSTS_N_INSNS (5), /* mulsi */
594 COSTS_N_INSNS (4), /* mulsi_const */
595 COSTS_N_INSNS (3), /* mulsi_const9 */
596 COSTS_N_INSNS (5), /* muldi */
597 COSTS_N_INSNS (35), /* divsi */
598 COSTS_N_INSNS (35), /* divdi */
599 COSTS_N_INSNS (11), /* fp */
600 COSTS_N_INSNS (11), /* dmul */
601 COSTS_N_INSNS (11), /* sdiv */
602 COSTS_N_INSNS (11), /* ddiv */
603 32, /* cache line size */
604 16, /* l1 cache */
605 128, /* l2 cache */
606 1, /* streams */
607 0, /* SF->DF convert */
608 };
609
610 /* Instruction costs on PPC440 processors. */
611 static const
612 struct processor_costs ppc440_cost = {
613 COSTS_N_INSNS (3), /* mulsi */
614 COSTS_N_INSNS (2), /* mulsi_const */
615 COSTS_N_INSNS (2), /* mulsi_const9 */
616 COSTS_N_INSNS (3), /* muldi */
617 COSTS_N_INSNS (34), /* divsi */
618 COSTS_N_INSNS (34), /* divdi */
619 COSTS_N_INSNS (5), /* fp */
620 COSTS_N_INSNS (5), /* dmul */
621 COSTS_N_INSNS (19), /* sdiv */
622 COSTS_N_INSNS (33), /* ddiv */
623 32, /* cache line size */
624 32, /* l1 cache */
625 256, /* l2 cache */
626 1, /* streams */
627 0, /* SF->DF convert */
628 };
629
630 /* Instruction costs on PPC476 processors. */
631 static const
632 struct processor_costs ppc476_cost = {
633 COSTS_N_INSNS (4), /* mulsi */
634 COSTS_N_INSNS (4), /* mulsi_const */
635 COSTS_N_INSNS (4), /* mulsi_const9 */
636 COSTS_N_INSNS (4), /* muldi */
637 COSTS_N_INSNS (11), /* divsi */
638 COSTS_N_INSNS (11), /* divdi */
639 COSTS_N_INSNS (6), /* fp */
640 COSTS_N_INSNS (6), /* dmul */
641 COSTS_N_INSNS (19), /* sdiv */
642 COSTS_N_INSNS (33), /* ddiv */
643 32, /* l1 cache line size */
644 32, /* l1 cache */
645 512, /* l2 cache */
646 1, /* streams */
647 0, /* SF->DF convert */
648 };
649
650 /* Instruction costs on PPC601 processors. */
651 static const
652 struct processor_costs ppc601_cost = {
653 COSTS_N_INSNS (5), /* mulsi */
654 COSTS_N_INSNS (5), /* mulsi_const */
655 COSTS_N_INSNS (5), /* mulsi_const9 */
656 COSTS_N_INSNS (5), /* muldi */
657 COSTS_N_INSNS (36), /* divsi */
658 COSTS_N_INSNS (36), /* divdi */
659 COSTS_N_INSNS (4), /* fp */
660 COSTS_N_INSNS (5), /* dmul */
661 COSTS_N_INSNS (17), /* sdiv */
662 COSTS_N_INSNS (31), /* ddiv */
663 32, /* cache line size */
664 32, /* l1 cache */
665 256, /* l2 cache */
666 1, /* streams */
667 0, /* SF->DF convert */
668 };
669
670 /* Instruction costs on PPC603 processors. */
671 static const
672 struct processor_costs ppc603_cost = {
673 COSTS_N_INSNS (5), /* mulsi */
674 COSTS_N_INSNS (3), /* mulsi_const */
675 COSTS_N_INSNS (2), /* mulsi_const9 */
676 COSTS_N_INSNS (5), /* muldi */
677 COSTS_N_INSNS (37), /* divsi */
678 COSTS_N_INSNS (37), /* divdi */
679 COSTS_N_INSNS (3), /* fp */
680 COSTS_N_INSNS (4), /* dmul */
681 COSTS_N_INSNS (18), /* sdiv */
682 COSTS_N_INSNS (33), /* ddiv */
683 32, /* cache line size */
684 8, /* l1 cache */
685 64, /* l2 cache */
686 1, /* streams */
687 0, /* SF->DF convert */
688 };
689
690 /* Instruction costs on PPC604 processors. */
691 static const
692 struct processor_costs ppc604_cost = {
693 COSTS_N_INSNS (4), /* mulsi */
694 COSTS_N_INSNS (4), /* mulsi_const */
695 COSTS_N_INSNS (4), /* mulsi_const9 */
696 COSTS_N_INSNS (4), /* muldi */
697 COSTS_N_INSNS (20), /* divsi */
698 COSTS_N_INSNS (20), /* divdi */
699 COSTS_N_INSNS (3), /* fp */
700 COSTS_N_INSNS (3), /* dmul */
701 COSTS_N_INSNS (18), /* sdiv */
702 COSTS_N_INSNS (32), /* ddiv */
703 32, /* cache line size */
704 16, /* l1 cache */
705 512, /* l2 cache */
706 1, /* streams */
707 0, /* SF->DF convert */
708 };
709
710 /* Instruction costs on PPC604e processors. */
711 static const
712 struct processor_costs ppc604e_cost = {
713 COSTS_N_INSNS (2), /* mulsi */
714 COSTS_N_INSNS (2), /* mulsi_const */
715 COSTS_N_INSNS (2), /* mulsi_const9 */
716 COSTS_N_INSNS (2), /* muldi */
717 COSTS_N_INSNS (20), /* divsi */
718 COSTS_N_INSNS (20), /* divdi */
719 COSTS_N_INSNS (3), /* fp */
720 COSTS_N_INSNS (3), /* dmul */
721 COSTS_N_INSNS (18), /* sdiv */
722 COSTS_N_INSNS (32), /* ddiv */
723 32, /* cache line size */
724 32, /* l1 cache */
725 1024, /* l2 cache */
726 1, /* streams */
727 0, /* SF->DF convert */
728 };
729
730 /* Instruction costs on PPC620 processors. */
731 static const
732 struct processor_costs ppc620_cost = {
733 COSTS_N_INSNS (5), /* mulsi */
734 COSTS_N_INSNS (4), /* mulsi_const */
735 COSTS_N_INSNS (3), /* mulsi_const9 */
736 COSTS_N_INSNS (7), /* muldi */
737 COSTS_N_INSNS (21), /* divsi */
738 COSTS_N_INSNS (37), /* divdi */
739 COSTS_N_INSNS (3), /* fp */
740 COSTS_N_INSNS (3), /* dmul */
741 COSTS_N_INSNS (18), /* sdiv */
742 COSTS_N_INSNS (32), /* ddiv */
743 128, /* cache line size */
744 32, /* l1 cache */
745 1024, /* l2 cache */
746 1, /* streams */
747 0, /* SF->DF convert */
748 };
749
750 /* Instruction costs on PPC630 processors. */
751 static const
752 struct processor_costs ppc630_cost = {
753 COSTS_N_INSNS (5), /* mulsi */
754 COSTS_N_INSNS (4), /* mulsi_const */
755 COSTS_N_INSNS (3), /* mulsi_const9 */
756 COSTS_N_INSNS (7), /* muldi */
757 COSTS_N_INSNS (21), /* divsi */
758 COSTS_N_INSNS (37), /* divdi */
759 COSTS_N_INSNS (3), /* fp */
760 COSTS_N_INSNS (3), /* dmul */
761 COSTS_N_INSNS (17), /* sdiv */
762 COSTS_N_INSNS (21), /* ddiv */
763 128, /* cache line size */
764 64, /* l1 cache */
765 1024, /* l2 cache */
766 1, /* streams */
767 0, /* SF->DF convert */
768 };
769
770 /* Instruction costs on Cell processor. */
771 /* COSTS_N_INSNS (1) ~ one add. */
772 static const
773 struct processor_costs ppccell_cost = {
774 COSTS_N_INSNS (9/2)+2, /* mulsi */
775 COSTS_N_INSNS (6/2), /* mulsi_const */
776 COSTS_N_INSNS (6/2), /* mulsi_const9 */
777 COSTS_N_INSNS (15/2)+2, /* muldi */
778 COSTS_N_INSNS (38/2), /* divsi */
779 COSTS_N_INSNS (70/2), /* divdi */
780 COSTS_N_INSNS (10/2), /* fp */
781 COSTS_N_INSNS (10/2), /* dmul */
782 COSTS_N_INSNS (74/2), /* sdiv */
783 COSTS_N_INSNS (74/2), /* ddiv */
784 128, /* cache line size */
785 32, /* l1 cache */
786 512, /* l2 cache */
787 6, /* streams */
788 0, /* SF->DF convert */
789 };
790
791 /* Instruction costs on PPC750 and PPC7400 processors. */
792 static const
793 struct processor_costs ppc750_cost = {
794 COSTS_N_INSNS (5), /* mulsi */
795 COSTS_N_INSNS (3), /* mulsi_const */
796 COSTS_N_INSNS (2), /* mulsi_const9 */
797 COSTS_N_INSNS (5), /* muldi */
798 COSTS_N_INSNS (17), /* divsi */
799 COSTS_N_INSNS (17), /* divdi */
800 COSTS_N_INSNS (3), /* fp */
801 COSTS_N_INSNS (3), /* dmul */
802 COSTS_N_INSNS (17), /* sdiv */
803 COSTS_N_INSNS (31), /* ddiv */
804 32, /* cache line size */
805 32, /* l1 cache */
806 512, /* l2 cache */
807 1, /* streams */
808 0, /* SF->DF convert */
809 };
810
811 /* Instruction costs on PPC7450 processors. */
812 static const
813 struct processor_costs ppc7450_cost = {
814 COSTS_N_INSNS (4), /* mulsi */
815 COSTS_N_INSNS (3), /* mulsi_const */
816 COSTS_N_INSNS (3), /* mulsi_const9 */
817 COSTS_N_INSNS (4), /* muldi */
818 COSTS_N_INSNS (23), /* divsi */
819 COSTS_N_INSNS (23), /* divdi */
820 COSTS_N_INSNS (5), /* fp */
821 COSTS_N_INSNS (5), /* dmul */
822 COSTS_N_INSNS (21), /* sdiv */
823 COSTS_N_INSNS (35), /* ddiv */
824 32, /* cache line size */
825 32, /* l1 cache */
826 1024, /* l2 cache */
827 1, /* streams */
828 0, /* SF->DF convert */
829 };
830
831 /* Instruction costs on PPC8540 processors. */
832 static const
833 struct processor_costs ppc8540_cost = {
834 COSTS_N_INSNS (4), /* mulsi */
835 COSTS_N_INSNS (4), /* mulsi_const */
836 COSTS_N_INSNS (4), /* mulsi_const9 */
837 COSTS_N_INSNS (4), /* muldi */
838 COSTS_N_INSNS (19), /* divsi */
839 COSTS_N_INSNS (19), /* divdi */
840 COSTS_N_INSNS (4), /* fp */
841 COSTS_N_INSNS (4), /* dmul */
842 COSTS_N_INSNS (29), /* sdiv */
843 COSTS_N_INSNS (29), /* ddiv */
844 32, /* cache line size */
845 32, /* l1 cache */
846 256, /* l2 cache */
847 1, /* prefetch streams /*/
848 0, /* SF->DF convert */
849 };
850
851 /* Instruction costs on E300C2 and E300C3 cores. */
852 static const
853 struct processor_costs ppce300c2c3_cost = {
854 COSTS_N_INSNS (4), /* mulsi */
855 COSTS_N_INSNS (4), /* mulsi_const */
856 COSTS_N_INSNS (4), /* mulsi_const9 */
857 COSTS_N_INSNS (4), /* muldi */
858 COSTS_N_INSNS (19), /* divsi */
859 COSTS_N_INSNS (19), /* divdi */
860 COSTS_N_INSNS (3), /* fp */
861 COSTS_N_INSNS (4), /* dmul */
862 COSTS_N_INSNS (18), /* sdiv */
863 COSTS_N_INSNS (33), /* ddiv */
864 32,
865 16, /* l1 cache */
866 16, /* l2 cache */
867 1, /* prefetch streams /*/
868 0, /* SF->DF convert */
869 };
870
871 /* Instruction costs on PPCE500MC processors. */
872 static const
873 struct processor_costs ppce500mc_cost = {
874 COSTS_N_INSNS (4), /* mulsi */
875 COSTS_N_INSNS (4), /* mulsi_const */
876 COSTS_N_INSNS (4), /* mulsi_const9 */
877 COSTS_N_INSNS (4), /* muldi */
878 COSTS_N_INSNS (14), /* divsi */
879 COSTS_N_INSNS (14), /* divdi */
880 COSTS_N_INSNS (8), /* fp */
881 COSTS_N_INSNS (10), /* dmul */
882 COSTS_N_INSNS (36), /* sdiv */
883 COSTS_N_INSNS (66), /* ddiv */
884 64, /* cache line size */
885 32, /* l1 cache */
886 128, /* l2 cache */
887 1, /* prefetch streams /*/
888 0, /* SF->DF convert */
889 };
890
891 /* Instruction costs on PPCE500MC64 processors. */
892 static const
893 struct processor_costs ppce500mc64_cost = {
894 COSTS_N_INSNS (4), /* mulsi */
895 COSTS_N_INSNS (4), /* mulsi_const */
896 COSTS_N_INSNS (4), /* mulsi_const9 */
897 COSTS_N_INSNS (4), /* muldi */
898 COSTS_N_INSNS (14), /* divsi */
899 COSTS_N_INSNS (14), /* divdi */
900 COSTS_N_INSNS (4), /* fp */
901 COSTS_N_INSNS (10), /* dmul */
902 COSTS_N_INSNS (36), /* sdiv */
903 COSTS_N_INSNS (66), /* ddiv */
904 64, /* cache line size */
905 32, /* l1 cache */
906 128, /* l2 cache */
907 1, /* prefetch streams /*/
908 0, /* SF->DF convert */
909 };
910
911 /* Instruction costs on PPCE5500 processors. */
912 static const
913 struct processor_costs ppce5500_cost = {
914 COSTS_N_INSNS (5), /* mulsi */
915 COSTS_N_INSNS (5), /* mulsi_const */
916 COSTS_N_INSNS (4), /* mulsi_const9 */
917 COSTS_N_INSNS (5), /* muldi */
918 COSTS_N_INSNS (14), /* divsi */
919 COSTS_N_INSNS (14), /* divdi */
920 COSTS_N_INSNS (7), /* fp */
921 COSTS_N_INSNS (10), /* dmul */
922 COSTS_N_INSNS (36), /* sdiv */
923 COSTS_N_INSNS (66), /* ddiv */
924 64, /* cache line size */
925 32, /* l1 cache */
926 128, /* l2 cache */
927 1, /* prefetch streams /*/
928 0, /* SF->DF convert */
929 };
930
931 /* Instruction costs on PPCE6500 processors. */
932 static const
933 struct processor_costs ppce6500_cost = {
934 COSTS_N_INSNS (5), /* mulsi */
935 COSTS_N_INSNS (5), /* mulsi_const */
936 COSTS_N_INSNS (4), /* mulsi_const9 */
937 COSTS_N_INSNS (5), /* muldi */
938 COSTS_N_INSNS (14), /* divsi */
939 COSTS_N_INSNS (14), /* divdi */
940 COSTS_N_INSNS (7), /* fp */
941 COSTS_N_INSNS (10), /* dmul */
942 COSTS_N_INSNS (36), /* sdiv */
943 COSTS_N_INSNS (66), /* ddiv */
944 64, /* cache line size */
945 32, /* l1 cache */
946 128, /* l2 cache */
947 1, /* prefetch streams /*/
948 0, /* SF->DF convert */
949 };
950
951 /* Instruction costs on AppliedMicro Titan processors. */
952 static const
953 struct processor_costs titan_cost = {
954 COSTS_N_INSNS (5), /* mulsi */
955 COSTS_N_INSNS (5), /* mulsi_const */
956 COSTS_N_INSNS (5), /* mulsi_const9 */
957 COSTS_N_INSNS (5), /* muldi */
958 COSTS_N_INSNS (18), /* divsi */
959 COSTS_N_INSNS (18), /* divdi */
960 COSTS_N_INSNS (10), /* fp */
961 COSTS_N_INSNS (10), /* dmul */
962 COSTS_N_INSNS (46), /* sdiv */
963 COSTS_N_INSNS (72), /* ddiv */
964 32, /* cache line size */
965 32, /* l1 cache */
966 512, /* l2 cache */
967 1, /* prefetch streams /*/
968 0, /* SF->DF convert */
969 };
970
971 /* Instruction costs on POWER4 and POWER5 processors. */
972 static const
973 struct processor_costs power4_cost = {
974 COSTS_N_INSNS (3), /* mulsi */
975 COSTS_N_INSNS (2), /* mulsi_const */
976 COSTS_N_INSNS (2), /* mulsi_const9 */
977 COSTS_N_INSNS (4), /* muldi */
978 COSTS_N_INSNS (18), /* divsi */
979 COSTS_N_INSNS (34), /* divdi */
980 COSTS_N_INSNS (3), /* fp */
981 COSTS_N_INSNS (3), /* dmul */
982 COSTS_N_INSNS (17), /* sdiv */
983 COSTS_N_INSNS (17), /* ddiv */
984 128, /* cache line size */
985 32, /* l1 cache */
986 1024, /* l2 cache */
987 8, /* prefetch streams /*/
988 0, /* SF->DF convert */
989 };
990
991 /* Instruction costs on POWER6 processors. */
992 static const
993 struct processor_costs power6_cost = {
994 COSTS_N_INSNS (8), /* mulsi */
995 COSTS_N_INSNS (8), /* mulsi_const */
996 COSTS_N_INSNS (8), /* mulsi_const9 */
997 COSTS_N_INSNS (8), /* muldi */
998 COSTS_N_INSNS (22), /* divsi */
999 COSTS_N_INSNS (28), /* divdi */
1000 COSTS_N_INSNS (3), /* fp */
1001 COSTS_N_INSNS (3), /* dmul */
1002 COSTS_N_INSNS (13), /* sdiv */
1003 COSTS_N_INSNS (16), /* ddiv */
1004 128, /* cache line size */
1005 64, /* l1 cache */
1006 2048, /* l2 cache */
1007 16, /* prefetch streams */
1008 0, /* SF->DF convert */
1009 };
1010
1011 /* Instruction costs on POWER7 processors. */
1012 static const
1013 struct processor_costs power7_cost = {
1014 COSTS_N_INSNS (2), /* mulsi */
1015 COSTS_N_INSNS (2), /* mulsi_const */
1016 COSTS_N_INSNS (2), /* mulsi_const9 */
1017 COSTS_N_INSNS (2), /* muldi */
1018 COSTS_N_INSNS (18), /* divsi */
1019 COSTS_N_INSNS (34), /* divdi */
1020 COSTS_N_INSNS (3), /* fp */
1021 COSTS_N_INSNS (3), /* dmul */
1022 COSTS_N_INSNS (13), /* sdiv */
1023 COSTS_N_INSNS (16), /* ddiv */
1024 128, /* cache line size */
1025 32, /* l1 cache */
1026 256, /* l2 cache */
1027 12, /* prefetch streams */
1028 COSTS_N_INSNS (3), /* SF->DF convert */
1029 };
1030
1031 /* Instruction costs on POWER8 processors. */
1032 static const
1033 struct processor_costs power8_cost = {
1034 COSTS_N_INSNS (3), /* mulsi */
1035 COSTS_N_INSNS (3), /* mulsi_const */
1036 COSTS_N_INSNS (3), /* mulsi_const9 */
1037 COSTS_N_INSNS (3), /* muldi */
1038 COSTS_N_INSNS (19), /* divsi */
1039 COSTS_N_INSNS (35), /* divdi */
1040 COSTS_N_INSNS (3), /* fp */
1041 COSTS_N_INSNS (3), /* dmul */
1042 COSTS_N_INSNS (14), /* sdiv */
1043 COSTS_N_INSNS (17), /* ddiv */
1044 128, /* cache line size */
1045 32, /* l1 cache */
1046 512, /* l2 cache */
1047 12, /* prefetch streams */
1048 COSTS_N_INSNS (3), /* SF->DF convert */
1049 };
1050
1051 /* Instruction costs on POWER9 processors. */
1052 static const
1053 struct processor_costs power9_cost = {
1054 COSTS_N_INSNS (3), /* mulsi */
1055 COSTS_N_INSNS (3), /* mulsi_const */
1056 COSTS_N_INSNS (3), /* mulsi_const9 */
1057 COSTS_N_INSNS (3), /* muldi */
1058 COSTS_N_INSNS (8), /* divsi */
1059 COSTS_N_INSNS (12), /* divdi */
1060 COSTS_N_INSNS (3), /* fp */
1061 COSTS_N_INSNS (3), /* dmul */
1062 COSTS_N_INSNS (13), /* sdiv */
1063 COSTS_N_INSNS (18), /* ddiv */
1064 128, /* cache line size */
1065 32, /* l1 cache */
1066 512, /* l2 cache */
1067 8, /* prefetch streams */
1068 COSTS_N_INSNS (3), /* SF->DF convert */
1069 };
1070
1071 /* Instruction costs on POWER10 processors. */
1072 static const
1073 struct processor_costs power10_cost = {
1074 COSTS_N_INSNS (2), /* mulsi */
1075 COSTS_N_INSNS (2), /* mulsi_const */
1076 COSTS_N_INSNS (2), /* mulsi_const9 */
1077 COSTS_N_INSNS (2), /* muldi */
1078 COSTS_N_INSNS (6), /* divsi */
1079 COSTS_N_INSNS (6), /* divdi */
1080 COSTS_N_INSNS (2), /* fp */
1081 COSTS_N_INSNS (2), /* dmul */
1082 COSTS_N_INSNS (11), /* sdiv */
1083 COSTS_N_INSNS (13), /* ddiv */
1084 128, /* cache line size */
1085 32, /* l1 cache */
1086 512, /* l2 cache */
1087 16, /* prefetch streams */
1088 COSTS_N_INSNS (2), /* SF->DF convert */
1089 };
1090
1091 /* Instruction costs on POWER A2 processors. */
1092 static const
1093 struct processor_costs ppca2_cost = {
1094 COSTS_N_INSNS (16), /* mulsi */
1095 COSTS_N_INSNS (16), /* mulsi_const */
1096 COSTS_N_INSNS (16), /* mulsi_const9 */
1097 COSTS_N_INSNS (16), /* muldi */
1098 COSTS_N_INSNS (22), /* divsi */
1099 COSTS_N_INSNS (28), /* divdi */
1100 COSTS_N_INSNS (3), /* fp */
1101 COSTS_N_INSNS (3), /* dmul */
1102 COSTS_N_INSNS (59), /* sdiv */
1103 COSTS_N_INSNS (72), /* ddiv */
1104 64,
1105 16, /* l1 cache */
1106 2048, /* l2 cache */
1107 16, /* prefetch streams */
1108 0, /* SF->DF convert */
1109 };
1110
1111 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1112 static tree (*rs6000_veclib_handler) (combined_fn, tree, tree);
1113
1114 \f
1115 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1116 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1117 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1118 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1119 static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
1120 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1121 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1122 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
1123 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1124 bool);
1125 static int rs6000_debug_adjust_cost (rtx_insn *, int, rtx_insn *, int,
1126 unsigned int);
1127 static bool is_microcoded_insn (rtx_insn *);
1128 static bool is_nonpipeline_insn (rtx_insn *);
1129 static bool is_cracked_insn (rtx_insn *);
1130 static bool is_load_insn (rtx, rtx *);
1131 static bool is_store_insn (rtx, rtx *);
1132 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1133 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1134 static bool insn_must_be_first_in_group (rtx_insn *);
1135 static bool insn_must_be_last_in_group (rtx_insn *);
1136 bool easy_vector_constant (rtx, machine_mode);
1137 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1138 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1139 #if TARGET_MACHO
1140 static tree get_prev_label (tree);
1141 #endif
1142 static bool rs6000_mode_dependent_address (const_rtx);
1143 static bool rs6000_debug_mode_dependent_address (const_rtx);
1144 static bool rs6000_offsettable_memref_p (rtx, machine_mode, bool);
1145 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1146 machine_mode, rtx);
1147 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1148 machine_mode,
1149 rtx);
1150 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1151 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1152 enum reg_class);
1153 static bool rs6000_debug_secondary_memory_needed (machine_mode,
1154 reg_class_t,
1155 reg_class_t);
1156 static bool rs6000_debug_can_change_mode_class (machine_mode,
1157 machine_mode,
1158 reg_class_t);
1159
1160 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1161 = rs6000_mode_dependent_address;
1162
1163 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1164 machine_mode, rtx)
1165 = rs6000_secondary_reload_class;
1166
1167 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1168 = rs6000_preferred_reload_class;
1169
1170 const int INSN_NOT_AVAILABLE = -1;
1171
1172 static void rs6000_print_isa_options (FILE *, int, const char *,
1173 HOST_WIDE_INT);
1174 static void rs6000_print_builtin_options (FILE *, int, const char *,
1175 HOST_WIDE_INT);
1176 static HOST_WIDE_INT rs6000_disable_incompatible_switches (void);
1177
1178 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1179 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1180 enum rs6000_reg_type,
1181 machine_mode,
1182 secondary_reload_info *,
1183 bool);
1184 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1185
1186 /* Hash table stuff for keeping track of TOC entries. */
1187
1188 struct GTY((for_user)) toc_hash_struct
1189 {
1190 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1191 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1192 rtx key;
1193 machine_mode key_mode;
1194 int labelno;
1195 };
1196
1197 struct toc_hasher : ggc_ptr_hash<toc_hash_struct>
1198 {
1199 static hashval_t hash (toc_hash_struct *);
1200 static bool equal (toc_hash_struct *, toc_hash_struct *);
1201 };
1202
1203 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1204
1205
1206 \f
1207 /* Default register names. */
1208 char rs6000_reg_names[][8] =
1209 {
1210 /* GPRs */
1211 "0", "1", "2", "3", "4", "5", "6", "7",
1212 "8", "9", "10", "11", "12", "13", "14", "15",
1213 "16", "17", "18", "19", "20", "21", "22", "23",
1214 "24", "25", "26", "27", "28", "29", "30", "31",
1215 /* FPRs */
1216 "0", "1", "2", "3", "4", "5", "6", "7",
1217 "8", "9", "10", "11", "12", "13", "14", "15",
1218 "16", "17", "18", "19", "20", "21", "22", "23",
1219 "24", "25", "26", "27", "28", "29", "30", "31",
1220 /* VRs */
1221 "0", "1", "2", "3", "4", "5", "6", "7",
1222 "8", "9", "10", "11", "12", "13", "14", "15",
1223 "16", "17", "18", "19", "20", "21", "22", "23",
1224 "24", "25", "26", "27", "28", "29", "30", "31",
1225 /* lr ctr ca ap */
1226 "lr", "ctr", "ca", "ap",
1227 /* cr0..cr7 */
1228 "0", "1", "2", "3", "4", "5", "6", "7",
1229 /* vrsave vscr sfp */
1230 "vrsave", "vscr", "sfp",
1231 };
1232
1233 #ifdef TARGET_REGNAMES
1234 static const char alt_reg_names[][8] =
1235 {
1236 /* GPRs */
1237 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1238 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1239 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1240 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1241 /* FPRs */
1242 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1243 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1244 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1245 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1246 /* VRs */
1247 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1248 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1249 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1250 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1251 /* lr ctr ca ap */
1252 "lr", "ctr", "ca", "ap",
1253 /* cr0..cr7 */
1254 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1255 /* vrsave vscr sfp */
1256 "vrsave", "vscr", "sfp",
1257 };
1258 #endif
1259
1260 /* Table of valid machine attributes. */
1261
1262 static const struct attribute_spec rs6000_attribute_table[] =
1263 {
1264 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
1265 affects_type_identity, handler, exclude } */
1266 { "altivec", 1, 1, false, true, false, false,
1267 rs6000_handle_altivec_attribute, NULL },
1268 { "longcall", 0, 0, false, true, true, false,
1269 rs6000_handle_longcall_attribute, NULL },
1270 { "shortcall", 0, 0, false, true, true, false,
1271 rs6000_handle_longcall_attribute, NULL },
1272 { "ms_struct", 0, 0, false, false, false, false,
1273 rs6000_handle_struct_attribute, NULL },
1274 { "gcc_struct", 0, 0, false, false, false, false,
1275 rs6000_handle_struct_attribute, NULL },
1276 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1277 SUBTARGET_ATTRIBUTE_TABLE,
1278 #endif
1279 { NULL, 0, 0, false, false, false, false, NULL, NULL }
1280 };
1281 \f
1282 #ifndef TARGET_PROFILE_KERNEL
1283 #define TARGET_PROFILE_KERNEL 0
1284 #endif
1285 \f
1286 /* Initialize the GCC target structure. */
1287 #undef TARGET_ATTRIBUTE_TABLE
1288 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1289 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1290 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1291 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1292 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1293
1294 #undef TARGET_ASM_ALIGNED_DI_OP
1295 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1296
1297 /* Default unaligned ops are only provided for ELF. Find the ops needed
1298 for non-ELF systems. */
1299 #ifndef OBJECT_FORMAT_ELF
1300 #if TARGET_XCOFF
1301 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1302 64-bit targets. */
1303 #undef TARGET_ASM_UNALIGNED_HI_OP
1304 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1305 #undef TARGET_ASM_UNALIGNED_SI_OP
1306 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1307 #undef TARGET_ASM_UNALIGNED_DI_OP
1308 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1309 #else
1310 /* For Darwin. */
1311 #undef TARGET_ASM_UNALIGNED_HI_OP
1312 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1313 #undef TARGET_ASM_UNALIGNED_SI_OP
1314 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1315 #undef TARGET_ASM_UNALIGNED_DI_OP
1316 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1317 #undef TARGET_ASM_ALIGNED_DI_OP
1318 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1319 #endif
1320 #endif
1321
1322 /* This hook deals with fixups for relocatable code and DI-mode objects
1323 in 64-bit code. */
1324 #undef TARGET_ASM_INTEGER
1325 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1326
1327 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1328 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1329 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1330 #endif
1331
1332 #undef TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY
1333 #define TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY \
1334 rs6000_print_patchable_function_entry
1335
1336 #undef TARGET_SET_UP_BY_PROLOGUE
1337 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1338
1339 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1340 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1341 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1342 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1343 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1344 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1345 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1346 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1347 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1348 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1349 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1350 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1351
1352 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1353 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1354
1355 #undef TARGET_INTERNAL_ARG_POINTER
1356 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1357
1358 #undef TARGET_HAVE_TLS
1359 #define TARGET_HAVE_TLS HAVE_AS_TLS
1360
1361 #undef TARGET_CANNOT_FORCE_CONST_MEM
1362 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1363
1364 #undef TARGET_DELEGITIMIZE_ADDRESS
1365 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1366
1367 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1368 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1369
1370 #undef TARGET_LEGITIMATE_COMBINED_INSN
1371 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1372
1373 #undef TARGET_ASM_FUNCTION_PROLOGUE
1374 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1375 #undef TARGET_ASM_FUNCTION_EPILOGUE
1376 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1377
1378 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1379 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1380
1381 #undef TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC
1382 #define TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC rs6000_gen_pic_addr_diff_vec
1383
1384 #undef TARGET_LEGITIMIZE_ADDRESS
1385 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1386
1387 #undef TARGET_SCHED_VARIABLE_ISSUE
1388 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1389
1390 #undef TARGET_SCHED_ISSUE_RATE
1391 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1392 #undef TARGET_SCHED_ADJUST_COST
1393 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1394 #undef TARGET_SCHED_ADJUST_PRIORITY
1395 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1396 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1397 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1398 #undef TARGET_SCHED_INIT
1399 #define TARGET_SCHED_INIT rs6000_sched_init
1400 #undef TARGET_SCHED_FINISH
1401 #define TARGET_SCHED_FINISH rs6000_sched_finish
1402 #undef TARGET_SCHED_REORDER
1403 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1404 #undef TARGET_SCHED_REORDER2
1405 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1406
1407 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1408 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1409
1410 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1411 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1412
1413 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1414 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1415 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1416 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1417 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1418 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1419 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1420 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1421
1422 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1423 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1424
1425 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1426 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1427 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1428 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1429 rs6000_builtin_support_vector_misalignment
1430 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1431 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1432 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1433 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1434 rs6000_builtin_vectorization_cost
1435 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1436 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1437 rs6000_preferred_simd_mode
1438 #undef TARGET_VECTORIZE_CREATE_COSTS
1439 #define TARGET_VECTORIZE_CREATE_COSTS rs6000_vectorize_create_costs
1440
1441 #undef TARGET_LOOP_UNROLL_ADJUST
1442 #define TARGET_LOOP_UNROLL_ADJUST rs6000_loop_unroll_adjust
1443
1444 #undef TARGET_INIT_BUILTINS
1445 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1446 #undef TARGET_BUILTIN_DECL
1447 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1448
1449 #undef TARGET_FOLD_BUILTIN
1450 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1451 #undef TARGET_GIMPLE_FOLD_BUILTIN
1452 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1453
1454 #undef TARGET_EXPAND_BUILTIN
1455 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1456
1457 #undef TARGET_MANGLE_TYPE
1458 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1459
1460 #undef TARGET_INIT_LIBFUNCS
1461 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1462
1463 #if TARGET_MACHO
1464 #undef TARGET_BINDS_LOCAL_P
1465 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1466 #endif
1467
1468 #undef TARGET_MS_BITFIELD_LAYOUT_P
1469 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1470
1471 #undef TARGET_ASM_OUTPUT_MI_THUNK
1472 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1473
1474 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1475 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1476
1477 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1478 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1479
1480 #undef TARGET_REGISTER_MOVE_COST
1481 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1482 #undef TARGET_MEMORY_MOVE_COST
1483 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1484 #undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
1485 #define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
1486 rs6000_ira_change_pseudo_allocno_class
1487 #undef TARGET_CANNOT_COPY_INSN_P
1488 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1489 #undef TARGET_RTX_COSTS
1490 #define TARGET_RTX_COSTS rs6000_rtx_costs
1491 #undef TARGET_ADDRESS_COST
1492 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1493 #undef TARGET_INSN_COST
1494 #define TARGET_INSN_COST rs6000_insn_cost
1495
1496 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1497 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1498
1499 #undef TARGET_PROMOTE_FUNCTION_MODE
1500 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1501
1502 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
1503 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE rs6000_override_options_after_change
1504
1505 #undef TARGET_RETURN_IN_MEMORY
1506 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1507
1508 #undef TARGET_RETURN_IN_MSB
1509 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1510
1511 #undef TARGET_SETUP_INCOMING_VARARGS
1512 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1513
1514 /* Always strict argument naming on rs6000. */
1515 #undef TARGET_STRICT_ARGUMENT_NAMING
1516 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1517 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1518 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1519 #undef TARGET_SPLIT_COMPLEX_ARG
1520 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1521 #undef TARGET_MUST_PASS_IN_STACK
1522 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1523 #undef TARGET_PASS_BY_REFERENCE
1524 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1525 #undef TARGET_ARG_PARTIAL_BYTES
1526 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1527 #undef TARGET_FUNCTION_ARG_ADVANCE
1528 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1529 #undef TARGET_FUNCTION_ARG
1530 #define TARGET_FUNCTION_ARG rs6000_function_arg
1531 #undef TARGET_FUNCTION_ARG_PADDING
1532 #define TARGET_FUNCTION_ARG_PADDING rs6000_function_arg_padding
1533 #undef TARGET_FUNCTION_ARG_BOUNDARY
1534 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1535
1536 #undef TARGET_BUILD_BUILTIN_VA_LIST
1537 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1538
1539 #undef TARGET_EXPAND_BUILTIN_VA_START
1540 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1541
1542 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1543 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1544
1545 #undef TARGET_EH_RETURN_FILTER_MODE
1546 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1547
1548 #undef TARGET_TRANSLATE_MODE_ATTRIBUTE
1549 #define TARGET_TRANSLATE_MODE_ATTRIBUTE rs6000_translate_mode_attribute
1550
1551 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1552 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1553
1554 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
1555 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
1556 rs6000_libgcc_floating_mode_supported_p
1557
1558 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1559 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1560
1561 #undef TARGET_FLOATN_MODE
1562 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1563
1564 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1565 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1566
1567 #undef TARGET_MD_ASM_ADJUST
1568 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1569
1570 #undef TARGET_OPTION_OVERRIDE
1571 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1572
1573 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1574 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1575 rs6000_builtin_vectorized_function
1576
1577 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1578 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1579 rs6000_builtin_md_vectorized_function
1580
1581 #undef TARGET_STACK_PROTECT_GUARD
1582 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1583
1584 #if !TARGET_MACHO
1585 #undef TARGET_STACK_PROTECT_FAIL
1586 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1587 #endif
1588
1589 #ifdef HAVE_AS_TLS
1590 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1591 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1592 #endif
1593
1594 /* Use a 32-bit anchor range. This leads to sequences like:
1595
1596 addis tmp,anchor,high
1597 add dest,tmp,low
1598
1599 where tmp itself acts as an anchor, and can be shared between
1600 accesses to the same 64k page. */
1601 #undef TARGET_MIN_ANCHOR_OFFSET
1602 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1603 #undef TARGET_MAX_ANCHOR_OFFSET
1604 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1605 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1606 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1607 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1608 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1609
1610 #undef TARGET_BUILTIN_RECIPROCAL
1611 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1612
1613 #undef TARGET_SECONDARY_RELOAD
1614 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1615 #undef TARGET_SECONDARY_MEMORY_NEEDED
1616 #define TARGET_SECONDARY_MEMORY_NEEDED rs6000_secondary_memory_needed
1617 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
1618 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE rs6000_secondary_memory_needed_mode
1619
1620 #undef TARGET_LEGITIMATE_ADDRESS_P
1621 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1622
1623 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1624 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1625
1626 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1627 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1628
1629 #undef TARGET_CAN_ELIMINATE
1630 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1631
1632 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1633 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1634
1635 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1636 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1637
1638 #undef TARGET_TRAMPOLINE_INIT
1639 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1640
1641 #undef TARGET_FUNCTION_VALUE
1642 #define TARGET_FUNCTION_VALUE rs6000_function_value
1643
1644 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1645 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1646
1647 #undef TARGET_OPTION_SAVE
1648 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1649
1650 #undef TARGET_OPTION_RESTORE
1651 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1652
1653 #undef TARGET_OPTION_PRINT
1654 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1655
1656 #undef TARGET_CAN_INLINE_P
1657 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1658
1659 #undef TARGET_SET_CURRENT_FUNCTION
1660 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1661
1662 #undef TARGET_LEGITIMATE_CONSTANT_P
1663 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1664
1665 #undef TARGET_VECTORIZE_VEC_PERM_CONST
1666 #define TARGET_VECTORIZE_VEC_PERM_CONST rs6000_vectorize_vec_perm_const
1667
1668 #undef TARGET_CAN_USE_DOLOOP_P
1669 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1670
1671 #undef TARGET_PREDICT_DOLOOP_P
1672 #define TARGET_PREDICT_DOLOOP_P rs6000_predict_doloop_p
1673
1674 #undef TARGET_HAVE_COUNT_REG_DECR_P
1675 #define TARGET_HAVE_COUNT_REG_DECR_P true
1676
1677 /* 1000000000 is infinite cost in IVOPTs. */
1678 #undef TARGET_DOLOOP_COST_FOR_GENERIC
1679 #define TARGET_DOLOOP_COST_FOR_GENERIC 1000000000
1680
1681 #undef TARGET_DOLOOP_COST_FOR_ADDRESS
1682 #define TARGET_DOLOOP_COST_FOR_ADDRESS 1000000000
1683
1684 #undef TARGET_PREFERRED_DOLOOP_MODE
1685 #define TARGET_PREFERRED_DOLOOP_MODE rs6000_preferred_doloop_mode
1686
1687 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1688 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1689
1690 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1691 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1692 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1693 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1694 #undef TARGET_UNWIND_WORD_MODE
1695 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1696
1697 #undef TARGET_OFFLOAD_OPTIONS
1698 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1699
1700 #undef TARGET_C_MODE_FOR_SUFFIX
1701 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1702
1703 #undef TARGET_INVALID_BINARY_OP
1704 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1705
1706 #undef TARGET_OPTAB_SUPPORTED_P
1707 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1708
1709 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1710 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1711
1712 #undef TARGET_COMPARE_VERSION_PRIORITY
1713 #define TARGET_COMPARE_VERSION_PRIORITY rs6000_compare_version_priority
1714
1715 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
1716 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
1717 rs6000_generate_version_dispatcher_body
1718
1719 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
1720 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
1721 rs6000_get_function_versions_dispatcher
1722
1723 #undef TARGET_OPTION_FUNCTION_VERSIONS
1724 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
1725
1726 #undef TARGET_HARD_REGNO_NREGS
1727 #define TARGET_HARD_REGNO_NREGS rs6000_hard_regno_nregs_hook
1728 #undef TARGET_HARD_REGNO_MODE_OK
1729 #define TARGET_HARD_REGNO_MODE_OK rs6000_hard_regno_mode_ok
1730
1731 #undef TARGET_MODES_TIEABLE_P
1732 #define TARGET_MODES_TIEABLE_P rs6000_modes_tieable_p
1733
1734 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
1735 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
1736 rs6000_hard_regno_call_part_clobbered
1737
1738 #undef TARGET_SLOW_UNALIGNED_ACCESS
1739 #define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access
1740
1741 #undef TARGET_CAN_CHANGE_MODE_CLASS
1742 #define TARGET_CAN_CHANGE_MODE_CLASS rs6000_can_change_mode_class
1743
1744 #undef TARGET_CONSTANT_ALIGNMENT
1745 #define TARGET_CONSTANT_ALIGNMENT rs6000_constant_alignment
1746
1747 #undef TARGET_STARTING_FRAME_OFFSET
1748 #define TARGET_STARTING_FRAME_OFFSET rs6000_starting_frame_offset
1749
1750 #undef TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P
1751 #define TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P hook_bool_void_true
1752
1753 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
1754 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME rs6000_mangle_decl_assembler_name
1755
1756 #undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P
1757 #define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P \
1758 rs6000_cannot_substitute_mem_equiv_p
1759
1760 #undef TARGET_INVALID_CONVERSION
1761 #define TARGET_INVALID_CONVERSION rs6000_invalid_conversion
1762
1763 #undef TARGET_NEED_IPA_FN_TARGET_INFO
1764 #define TARGET_NEED_IPA_FN_TARGET_INFO rs6000_need_ipa_fn_target_info
1765
1766 #undef TARGET_UPDATE_IPA_FN_TARGET_INFO
1767 #define TARGET_UPDATE_IPA_FN_TARGET_INFO rs6000_update_ipa_fn_target_info
1768 \f
1769
1770 /* Processor table. */
1771 struct rs6000_ptt
1772 {
1773 const char *const name; /* Canonical processor name. */
1774 const enum processor_type processor; /* Processor type enum value. */
1775 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1776 };
1777
1778 static struct rs6000_ptt const processor_target_table[] =
1779 {
1780 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1781 #include "rs6000-cpus.def"
1782 #undef RS6000_CPU
1783 };
1784
1785 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1786 name is invalid. */
1787
1788 static int
1789 rs6000_cpu_name_lookup (const char *name)
1790 {
1791 size_t i;
1792
1793 if (name != NULL)
1794 {
1795 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
1796 if (! strcmp (name, processor_target_table[i].name))
1797 return (int)i;
1798 }
1799
1800 return -1;
1801 }
1802
1803 \f
1804 /* Return number of consecutive hard regs needed starting at reg REGNO
1805 to hold something of mode MODE.
1806 This is ordinarily the length in words of a value of mode MODE
1807 but can be less for certain modes in special long registers.
1808
1809 POWER and PowerPC GPRs hold 32 bits worth;
1810 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1811
1812 static int
1813 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
1814 {
1815 unsigned HOST_WIDE_INT reg_size;
1816
1817 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
1818 128-bit floating point that can go in vector registers, which has VSX
1819 memory addressing. */
1820 if (FP_REGNO_P (regno))
1821 reg_size = (VECTOR_MEM_VSX_P (mode) || VECTOR_ALIGNMENT_P (mode)
1822 ? UNITS_PER_VSX_WORD
1823 : UNITS_PER_FP_WORD);
1824
1825 else if (ALTIVEC_REGNO_P (regno))
1826 reg_size = UNITS_PER_ALTIVEC_WORD;
1827
1828 else
1829 reg_size = UNITS_PER_WORD;
1830
1831 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
1832 }
1833
1834 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1835 MODE. */
1836 static int
1837 rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
1838 {
1839 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
1840
1841 if (COMPLEX_MODE_P (mode))
1842 mode = GET_MODE_INNER (mode);
1843
1844 /* Vector pair modes need even/odd VSX register pairs. Only allow vector
1845 registers. */
1846 if (mode == OOmode)
1847 return (TARGET_MMA && VSX_REGNO_P (regno) && (regno & 1) == 0);
1848
1849 /* MMA accumulator modes need FPR registers divisible by 4. */
1850 if (mode == XOmode)
1851 return (TARGET_MMA && FP_REGNO_P (regno) && (regno & 3) == 0);
1852
1853 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1854 register combinations, and use PTImode where we need to deal with quad
1855 word memory operations. Don't allow quad words in the argument or frame
1856 pointer registers, just registers 0..31. */
1857 if (mode == PTImode)
1858 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1859 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1860 && ((regno & 1) == 0));
1861
1862 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1863 implementations. Don't allow an item to be split between a FP register
1864 and an Altivec register. Allow TImode in all VSX registers if the user
1865 asked for it. */
1866 if (TARGET_VSX && VSX_REGNO_P (regno)
1867 && (VECTOR_MEM_VSX_P (mode)
1868 || VECTOR_ALIGNMENT_P (mode)
1869 || reg_addr[mode].scalar_in_vmx_p
1870 || mode == TImode
1871 || (TARGET_VADDUQM && mode == V1TImode)))
1872 {
1873 if (FP_REGNO_P (regno))
1874 return FP_REGNO_P (last_regno);
1875
1876 if (ALTIVEC_REGNO_P (regno))
1877 {
1878 if (GET_MODE_SIZE (mode) < 16 && !reg_addr[mode].scalar_in_vmx_p)
1879 return 0;
1880
1881 return ALTIVEC_REGNO_P (last_regno);
1882 }
1883 }
1884
1885 /* The GPRs can hold any mode, but values bigger than one register
1886 cannot go past R31. */
1887 if (INT_REGNO_P (regno))
1888 return INT_REGNO_P (last_regno);
1889
1890 /* The float registers (except for VSX vector modes) can only hold floating
1891 modes and DImode. */
1892 if (FP_REGNO_P (regno))
1893 {
1894 if (VECTOR_ALIGNMENT_P (mode))
1895 return false;
1896
1897 if (SCALAR_FLOAT_MODE_P (mode)
1898 && (mode != TDmode || (regno % 2) == 0)
1899 && FP_REGNO_P (last_regno))
1900 return 1;
1901
1902 if (GET_MODE_CLASS (mode) == MODE_INT)
1903 {
1904 if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
1905 return 1;
1906
1907 if (TARGET_P8_VECTOR && (mode == SImode))
1908 return 1;
1909
1910 if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode))
1911 return 1;
1912 }
1913
1914 return 0;
1915 }
1916
1917 /* The CR register can only hold CC modes. */
1918 if (CR_REGNO_P (regno))
1919 return GET_MODE_CLASS (mode) == MODE_CC;
1920
1921 if (CA_REGNO_P (regno))
1922 return mode == Pmode || mode == SImode;
1923
1924 /* AltiVec only in AldyVec registers. */
1925 if (ALTIVEC_REGNO_P (regno))
1926 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
1927 || mode == V1TImode);
1928
1929 /* We cannot put non-VSX TImode or PTImode anywhere except general register
1930 and it must be able to fit within the register set. */
1931
1932 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
1933 }
1934
1935 /* Implement TARGET_HARD_REGNO_NREGS. */
1936
1937 static unsigned int
1938 rs6000_hard_regno_nregs_hook (unsigned int regno, machine_mode mode)
1939 {
1940 return rs6000_hard_regno_nregs[mode][regno];
1941 }
1942
1943 /* Implement TARGET_HARD_REGNO_MODE_OK. */
1944
1945 static bool
1946 rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
1947 {
1948 return rs6000_hard_regno_mode_ok_p[mode][regno];
1949 }
1950
1951 /* Implement TARGET_MODES_TIEABLE_P.
1952
1953 PTImode cannot tie with other modes because PTImode is restricted to even
1954 GPR registers, and TImode can go in any GPR as well as VSX registers (PR
1955 57744).
1956
1957 Similarly, don't allow OOmode (vector pair, restricted to even VSX
1958 registers) or XOmode (vector quad, restricted to FPR registers divisible
1959 by 4) to tie with other modes.
1960
1961 Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
1962 128-bit floating point on VSX systems ties with other vectors. */
1963
1964 static bool
1965 rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
1966 {
1967 if (mode1 == PTImode || mode1 == OOmode || mode1 == XOmode
1968 || mode2 == PTImode || mode2 == OOmode || mode2 == XOmode)
1969 return mode1 == mode2;
1970
1971 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
1972 return ALTIVEC_OR_VSX_VECTOR_MODE (mode2);
1973 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode2))
1974 return false;
1975
1976 if (SCALAR_FLOAT_MODE_P (mode1))
1977 return SCALAR_FLOAT_MODE_P (mode2);
1978 if (SCALAR_FLOAT_MODE_P (mode2))
1979 return false;
1980
1981 if (GET_MODE_CLASS (mode1) == MODE_CC)
1982 return GET_MODE_CLASS (mode2) == MODE_CC;
1983 if (GET_MODE_CLASS (mode2) == MODE_CC)
1984 return false;
1985
1986 return true;
1987 }
1988
1989 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. */
1990
1991 static bool
1992 rs6000_hard_regno_call_part_clobbered (unsigned int, unsigned int regno,
1993 machine_mode mode)
1994 {
1995 if (TARGET_32BIT
1996 && TARGET_POWERPC64
1997 && GET_MODE_SIZE (mode) > 4
1998 && INT_REGNO_P (regno))
1999 return true;
2000
2001 if (TARGET_VSX
2002 && FP_REGNO_P (regno)
2003 && GET_MODE_SIZE (mode) > 8
2004 && !FLOAT128_2REG_P (mode))
2005 return true;
2006
2007 return false;
2008 }
2009
2010 /* Print interesting facts about registers. */
2011 static void
2012 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
2013 {
2014 int r, m;
2015
2016 for (r = first_regno; r <= last_regno; ++r)
2017 {
2018 const char *comma = "";
2019 int len;
2020
2021 if (first_regno == last_regno)
2022 fprintf (stderr, "%s:\t", reg_name);
2023 else
2024 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
2025
2026 len = 8;
2027 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2028 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
2029 {
2030 if (len > 70)
2031 {
2032 fprintf (stderr, ",\n\t");
2033 len = 8;
2034 comma = "";
2035 }
2036
2037 if (rs6000_hard_regno_nregs[m][r] > 1)
2038 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
2039 rs6000_hard_regno_nregs[m][r]);
2040 else
2041 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
2042
2043 comma = ", ";
2044 }
2045
2046 if (call_used_or_fixed_reg_p (r))
2047 {
2048 if (len > 70)
2049 {
2050 fprintf (stderr, ",\n\t");
2051 len = 8;
2052 comma = "";
2053 }
2054
2055 len += fprintf (stderr, "%s%s", comma, "call-used");
2056 comma = ", ";
2057 }
2058
2059 if (fixed_regs[r])
2060 {
2061 if (len > 70)
2062 {
2063 fprintf (stderr, ",\n\t");
2064 len = 8;
2065 comma = "";
2066 }
2067
2068 len += fprintf (stderr, "%s%s", comma, "fixed");
2069 comma = ", ";
2070 }
2071
2072 if (len > 70)
2073 {
2074 fprintf (stderr, ",\n\t");
2075 comma = "";
2076 }
2077
2078 len += fprintf (stderr, "%sreg-class = %s", comma,
2079 reg_class_names[(int)rs6000_regno_regclass[r]]);
2080 comma = ", ";
2081
2082 if (len > 70)
2083 {
2084 fprintf (stderr, ",\n\t");
2085 comma = "";
2086 }
2087
2088 fprintf (stderr, "%sregno = %d\n", comma, r);
2089 }
2090 }
2091
2092 static const char *
2093 rs6000_debug_vector_unit (enum rs6000_vector v)
2094 {
2095 const char *ret;
2096
2097 switch (v)
2098 {
2099 case VECTOR_NONE: ret = "none"; break;
2100 case VECTOR_ALTIVEC: ret = "altivec"; break;
2101 case VECTOR_VSX: ret = "vsx"; break;
2102 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
2103 default: ret = "unknown"; break;
2104 }
2105
2106 return ret;
2107 }
2108
2109 /* Inner function printing just the address mask for a particular reload
2110 register class. */
2111 DEBUG_FUNCTION char *
2112 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
2113 {
2114 static char ret[8];
2115 char *p = ret;
2116
2117 if ((mask & RELOAD_REG_VALID) != 0)
2118 *p++ = 'v';
2119 else if (keep_spaces)
2120 *p++ = ' ';
2121
2122 if ((mask & RELOAD_REG_MULTIPLE) != 0)
2123 *p++ = 'm';
2124 else if (keep_spaces)
2125 *p++ = ' ';
2126
2127 if ((mask & RELOAD_REG_INDEXED) != 0)
2128 *p++ = 'i';
2129 else if (keep_spaces)
2130 *p++ = ' ';
2131
2132 if ((mask & RELOAD_REG_QUAD_OFFSET) != 0)
2133 *p++ = 'O';
2134 else if ((mask & RELOAD_REG_OFFSET) != 0)
2135 *p++ = 'o';
2136 else if (keep_spaces)
2137 *p++ = ' ';
2138
2139 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2140 *p++ = '+';
2141 else if (keep_spaces)
2142 *p++ = ' ';
2143
2144 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2145 *p++ = '+';
2146 else if (keep_spaces)
2147 *p++ = ' ';
2148
2149 if ((mask & RELOAD_REG_AND_M16) != 0)
2150 *p++ = '&';
2151 else if (keep_spaces)
2152 *p++ = ' ';
2153
2154 *p = '\0';
2155
2156 return ret;
2157 }
2158
2159 /* Print the address masks in a human readble fashion. */
2160 DEBUG_FUNCTION void
2161 rs6000_debug_print_mode (ssize_t m)
2162 {
2163 ssize_t rc;
2164 int spaces = 0;
2165
2166 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2167 for (rc = 0; rc < N_RELOAD_REG; rc++)
2168 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2169 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2170
2171 if ((reg_addr[m].reload_store != CODE_FOR_nothing)
2172 || (reg_addr[m].reload_load != CODE_FOR_nothing))
2173 {
2174 fprintf (stderr, "%*s Reload=%c%c", spaces, "",
2175 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2176 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
2177 spaces = 0;
2178 }
2179 else
2180 spaces += strlen (" Reload=sl");
2181
2182 if (reg_addr[m].scalar_in_vmx_p)
2183 {
2184 fprintf (stderr, "%*s Upper=y", spaces, "");
2185 spaces = 0;
2186 }
2187 else
2188 spaces += strlen (" Upper=y");
2189
2190 if (rs6000_vector_unit[m] != VECTOR_NONE
2191 || rs6000_vector_mem[m] != VECTOR_NONE)
2192 {
2193 fprintf (stderr, "%*s vector: arith=%-10s mem=%s",
2194 spaces, "",
2195 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2196 rs6000_debug_vector_unit (rs6000_vector_mem[m]));
2197 }
2198
2199 fputs ("\n", stderr);
2200 }
2201
2202 #define DEBUG_FMT_ID "%-32s= "
2203 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2204 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2205 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2206
2207 /* Print various interesting information with -mdebug=reg. */
2208 static void
2209 rs6000_debug_reg_global (void)
2210 {
2211 static const char *const tf[2] = { "false", "true" };
2212 const char *nl = (const char *)0;
2213 int m;
2214 size_t m1, m2, v;
2215 char costly_num[20];
2216 char nop_num[20];
2217 char flags_buffer[40];
2218 const char *costly_str;
2219 const char *nop_str;
2220 const char *trace_str;
2221 const char *abi_str;
2222 const char *cmodel_str;
2223 struct cl_target_option cl_opts;
2224
2225 /* Modes we want tieable information on. */
2226 static const machine_mode print_tieable_modes[] = {
2227 QImode,
2228 HImode,
2229 SImode,
2230 DImode,
2231 TImode,
2232 PTImode,
2233 SFmode,
2234 DFmode,
2235 TFmode,
2236 IFmode,
2237 KFmode,
2238 SDmode,
2239 DDmode,
2240 TDmode,
2241 V2SImode,
2242 V2SFmode,
2243 V16QImode,
2244 V8HImode,
2245 V4SImode,
2246 V2DImode,
2247 V1TImode,
2248 V32QImode,
2249 V16HImode,
2250 V8SImode,
2251 V4DImode,
2252 V2TImode,
2253 V4SFmode,
2254 V2DFmode,
2255 V8SFmode,
2256 V4DFmode,
2257 OOmode,
2258 XOmode,
2259 CCmode,
2260 CCUNSmode,
2261 CCEQmode,
2262 CCFPmode,
2263 };
2264
2265 /* Virtual regs we are interested in. */
2266 const static struct {
2267 int regno; /* register number. */
2268 const char *name; /* register name. */
2269 } virtual_regs[] = {
2270 { STACK_POINTER_REGNUM, "stack pointer:" },
2271 { TOC_REGNUM, "toc: " },
2272 { STATIC_CHAIN_REGNUM, "static chain: " },
2273 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2274 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2275 { ARG_POINTER_REGNUM, "arg pointer: " },
2276 { FRAME_POINTER_REGNUM, "frame pointer:" },
2277 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2278 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2279 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2280 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2281 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2282 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2283 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2284 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2285 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2286 };
2287
2288 fputs ("\nHard register information:\n", stderr);
2289 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2290 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2291 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2292 LAST_ALTIVEC_REGNO,
2293 "vs");
2294 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2295 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2296 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2297 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2298 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2299 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2300
2301 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2302 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2303 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2304
2305 fprintf (stderr,
2306 "\n"
2307 "d reg_class = %s\n"
2308 "f reg_class = %s\n"
2309 "v reg_class = %s\n"
2310 "wa reg_class = %s\n"
2311 "we reg_class = %s\n"
2312 "wr reg_class = %s\n"
2313 "wx reg_class = %s\n"
2314 "wA reg_class = %s\n"
2315 "\n",
2316 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2317 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2318 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2319 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2320 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
2321 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2322 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2323 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]]);
2324
2325 nl = "\n";
2326 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2327 rs6000_debug_print_mode (m);
2328
2329 fputs ("\n", stderr);
2330
2331 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2332 {
2333 machine_mode mode1 = print_tieable_modes[m1];
2334 bool first_time = true;
2335
2336 nl = (const char *)0;
2337 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2338 {
2339 machine_mode mode2 = print_tieable_modes[m2];
2340 if (mode1 != mode2 && rs6000_modes_tieable_p (mode1, mode2))
2341 {
2342 if (first_time)
2343 {
2344 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2345 nl = "\n";
2346 first_time = false;
2347 }
2348
2349 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2350 }
2351 }
2352
2353 if (!first_time)
2354 fputs ("\n", stderr);
2355 }
2356
2357 if (nl)
2358 fputs (nl, stderr);
2359
2360 if (rs6000_recip_control)
2361 {
2362 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2363
2364 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2365 if (rs6000_recip_bits[m])
2366 {
2367 fprintf (stderr,
2368 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2369 GET_MODE_NAME (m),
2370 (RS6000_RECIP_AUTO_RE_P (m)
2371 ? "auto"
2372 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2373 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2374 ? "auto"
2375 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2376 }
2377
2378 fputs ("\n", stderr);
2379 }
2380
2381 if (rs6000_cpu_index >= 0)
2382 {
2383 const char *name = processor_target_table[rs6000_cpu_index].name;
2384 HOST_WIDE_INT flags
2385 = processor_target_table[rs6000_cpu_index].target_enable;
2386
2387 sprintf (flags_buffer, "-mcpu=%s flags", name);
2388 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2389 }
2390 else
2391 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2392
2393 if (rs6000_tune_index >= 0)
2394 {
2395 const char *name = processor_target_table[rs6000_tune_index].name;
2396 HOST_WIDE_INT flags
2397 = processor_target_table[rs6000_tune_index].target_enable;
2398
2399 sprintf (flags_buffer, "-mtune=%s flags", name);
2400 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2401 }
2402 else
2403 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2404
2405 cl_target_option_save (&cl_opts, &global_options, &global_options_set);
2406 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2407 rs6000_isa_flags);
2408
2409 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2410 rs6000_isa_flags_explicit);
2411
2412 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2413 rs6000_builtin_mask);
2414
2415 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2416
2417 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2418 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2419
2420 switch (rs6000_sched_costly_dep)
2421 {
2422 case max_dep_latency:
2423 costly_str = "max_dep_latency";
2424 break;
2425
2426 case no_dep_costly:
2427 costly_str = "no_dep_costly";
2428 break;
2429
2430 case all_deps_costly:
2431 costly_str = "all_deps_costly";
2432 break;
2433
2434 case true_store_to_load_dep_costly:
2435 costly_str = "true_store_to_load_dep_costly";
2436 break;
2437
2438 case store_to_load_dep_costly:
2439 costly_str = "store_to_load_dep_costly";
2440 break;
2441
2442 default:
2443 costly_str = costly_num;
2444 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2445 break;
2446 }
2447
2448 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2449
2450 switch (rs6000_sched_insert_nops)
2451 {
2452 case sched_finish_regroup_exact:
2453 nop_str = "sched_finish_regroup_exact";
2454 break;
2455
2456 case sched_finish_pad_groups:
2457 nop_str = "sched_finish_pad_groups";
2458 break;
2459
2460 case sched_finish_none:
2461 nop_str = "sched_finish_none";
2462 break;
2463
2464 default:
2465 nop_str = nop_num;
2466 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2467 break;
2468 }
2469
2470 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2471
2472 switch (rs6000_sdata)
2473 {
2474 default:
2475 case SDATA_NONE:
2476 break;
2477
2478 case SDATA_DATA:
2479 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2480 break;
2481
2482 case SDATA_SYSV:
2483 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2484 break;
2485
2486 case SDATA_EABI:
2487 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2488 break;
2489
2490 }
2491
2492 switch (rs6000_traceback)
2493 {
2494 case traceback_default: trace_str = "default"; break;
2495 case traceback_none: trace_str = "none"; break;
2496 case traceback_part: trace_str = "part"; break;
2497 case traceback_full: trace_str = "full"; break;
2498 default: trace_str = "unknown"; break;
2499 }
2500
2501 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2502
2503 switch (rs6000_current_cmodel)
2504 {
2505 case CMODEL_SMALL: cmodel_str = "small"; break;
2506 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2507 case CMODEL_LARGE: cmodel_str = "large"; break;
2508 default: cmodel_str = "unknown"; break;
2509 }
2510
2511 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2512
2513 switch (rs6000_current_abi)
2514 {
2515 case ABI_NONE: abi_str = "none"; break;
2516 case ABI_AIX: abi_str = "aix"; break;
2517 case ABI_ELFv2: abi_str = "ELFv2"; break;
2518 case ABI_V4: abi_str = "V4"; break;
2519 case ABI_DARWIN: abi_str = "darwin"; break;
2520 default: abi_str = "unknown"; break;
2521 }
2522
2523 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2524
2525 if (rs6000_altivec_abi)
2526 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2527
2528 if (rs6000_aix_extabi)
2529 fprintf (stderr, DEBUG_FMT_S, "AIX vec-extabi", "true");
2530
2531 if (rs6000_darwin64_abi)
2532 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2533
2534 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2535 (TARGET_SOFT_FLOAT ? "true" : "false"));
2536
2537 if (TARGET_LINK_STACK)
2538 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2539
2540 if (TARGET_P8_FUSION)
2541 {
2542 char options[80];
2543
2544 strcpy (options, "power8");
2545 if (TARGET_P8_FUSION_SIGN)
2546 strcat (options, ", sign");
2547
2548 fprintf (stderr, DEBUG_FMT_S, "fusion", options);
2549 }
2550
2551 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2552 TARGET_SECURE_PLT ? "secure" : "bss");
2553 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2554 aix_struct_return ? "aix" : "sysv");
2555 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2556 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2557 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2558 tf[!!rs6000_align_branch_targets]);
2559 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2560 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2561 rs6000_long_double_type_size);
2562 if (rs6000_long_double_type_size > 64)
2563 {
2564 fprintf (stderr, DEBUG_FMT_S, "long double type",
2565 TARGET_IEEEQUAD ? "IEEE" : "IBM");
2566 fprintf (stderr, DEBUG_FMT_S, "default long double type",
2567 TARGET_IEEEQUAD_DEFAULT ? "IEEE" : "IBM");
2568 }
2569 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2570 (int)rs6000_sched_restricted_insns_priority);
2571 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2572 (int)END_BUILTINS);
2573
2574 fprintf (stderr, DEBUG_FMT_D, "Enable float128 on VSX",
2575 (int)TARGET_FLOAT128_ENABLE_TYPE);
2576
2577 if (TARGET_VSX)
2578 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2579 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2580
2581 if (TARGET_DIRECT_MOVE_128)
2582 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element",
2583 (int)VECTOR_ELEMENT_MFVSRLD_64BIT);
2584 }
2585
2586 \f
2587 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2588 legitimate address support to figure out the appropriate addressing to
2589 use. */
2590
2591 static void
2592 rs6000_setup_reg_addr_masks (void)
2593 {
2594 ssize_t rc, reg, m, nregs;
2595 addr_mask_type any_addr_mask, addr_mask;
2596
2597 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2598 {
2599 machine_mode m2 = (machine_mode) m;
2600 bool complex_p = false;
2601 bool small_int_p = (m2 == QImode || m2 == HImode || m2 == SImode);
2602 size_t msize;
2603
2604 if (COMPLEX_MODE_P (m2))
2605 {
2606 complex_p = true;
2607 m2 = GET_MODE_INNER (m2);
2608 }
2609
2610 msize = GET_MODE_SIZE (m2);
2611
2612 /* SDmode is special in that we want to access it only via REG+REG
2613 addressing on power7 and above, since we want to use the LFIWZX and
2614 STFIWZX instructions to load it. */
2615 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2616
2617 any_addr_mask = 0;
2618 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2619 {
2620 addr_mask = 0;
2621 reg = reload_reg_map[rc].reg;
2622
2623 /* Can mode values go in the GPR/FPR/Altivec registers? */
2624 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2625 {
2626 bool small_int_vsx_p = (small_int_p
2627 && (rc == RELOAD_REG_FPR
2628 || rc == RELOAD_REG_VMX));
2629
2630 nregs = rs6000_hard_regno_nregs[m][reg];
2631 addr_mask |= RELOAD_REG_VALID;
2632
2633 /* Indicate if the mode takes more than 1 physical register. If
2634 it takes a single register, indicate it can do REG+REG
2635 addressing. Small integers in VSX registers can only do
2636 REG+REG addressing. */
2637 if (small_int_vsx_p)
2638 addr_mask |= RELOAD_REG_INDEXED;
2639 else if (nregs > 1 || m == BLKmode || complex_p)
2640 addr_mask |= RELOAD_REG_MULTIPLE;
2641 else
2642 addr_mask |= RELOAD_REG_INDEXED;
2643
2644 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2645 addressing. If we allow scalars into Altivec registers,
2646 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY.
2647
2648 For VSX systems, we don't allow update addressing for
2649 DFmode/SFmode if those registers can go in both the
2650 traditional floating point registers and Altivec registers.
2651 The load/store instructions for the Altivec registers do not
2652 have update forms. If we allowed update addressing, it seems
2653 to break IV-OPT code using floating point if the index type is
2654 int instead of long (PR target/81550 and target/84042). */
2655
2656 if (TARGET_UPDATE
2657 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2658 && msize <= 8
2659 && !VECTOR_MODE_P (m2)
2660 && !VECTOR_ALIGNMENT_P (m2)
2661 && !complex_p
2662 && (m != E_DFmode || !TARGET_VSX)
2663 && (m != E_SFmode || !TARGET_P8_VECTOR)
2664 && !small_int_vsx_p)
2665 {
2666 addr_mask |= RELOAD_REG_PRE_INCDEC;
2667
2668 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2669 we don't allow PRE_MODIFY for some multi-register
2670 operations. */
2671 switch (m)
2672 {
2673 default:
2674 addr_mask |= RELOAD_REG_PRE_MODIFY;
2675 break;
2676
2677 case E_DImode:
2678 if (TARGET_POWERPC64)
2679 addr_mask |= RELOAD_REG_PRE_MODIFY;
2680 break;
2681
2682 case E_DFmode:
2683 case E_DDmode:
2684 if (TARGET_HARD_FLOAT)
2685 addr_mask |= RELOAD_REG_PRE_MODIFY;
2686 break;
2687 }
2688 }
2689 }
2690
2691 /* GPR and FPR registers can do REG+OFFSET addressing, except
2692 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
2693 for 64-bit scalars and 32-bit SFmode to altivec registers. */
2694 if ((addr_mask != 0) && !indexed_only_p
2695 && msize <= 8
2696 && (rc == RELOAD_REG_GPR
2697 || ((msize == 8 || m2 == SFmode)
2698 && (rc == RELOAD_REG_FPR
2699 || (rc == RELOAD_REG_VMX && TARGET_P9_VECTOR)))))
2700 addr_mask |= RELOAD_REG_OFFSET;
2701
2702 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2703 instructions are enabled. The offset for 128-bit VSX registers is
2704 only 12-bits. While GPRs can handle the full offset range, VSX
2705 registers can only handle the restricted range. */
2706 else if ((addr_mask != 0) && !indexed_only_p
2707 && msize == 16 && TARGET_P9_VECTOR
2708 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2)
2709 || (m2 == TImode && TARGET_VSX)))
2710 {
2711 addr_mask |= RELOAD_REG_OFFSET;
2712 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2713 addr_mask |= RELOAD_REG_QUAD_OFFSET;
2714 }
2715
2716 /* Vector pairs can do both indexed and offset loads if the
2717 instructions are enabled, otherwise they can only do offset loads
2718 since it will be broken into two vector moves. Vector quads can
2719 only do offset loads. */
2720 else if ((addr_mask != 0) && TARGET_MMA
2721 && (m2 == OOmode || m2 == XOmode))
2722 {
2723 addr_mask |= RELOAD_REG_OFFSET;
2724 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2725 {
2726 addr_mask |= RELOAD_REG_QUAD_OFFSET;
2727 if (m2 == OOmode)
2728 addr_mask |= RELOAD_REG_INDEXED;
2729 }
2730 }
2731
2732 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2733 addressing on 128-bit types. */
2734 if (rc == RELOAD_REG_VMX && msize == 16
2735 && (addr_mask & RELOAD_REG_VALID) != 0)
2736 addr_mask |= RELOAD_REG_AND_M16;
2737
2738 reg_addr[m].addr_mask[rc] = addr_mask;
2739 any_addr_mask |= addr_mask;
2740 }
2741
2742 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2743 }
2744 }
2745
2746 \f
2747 /* Initialize the various global tables that are based on register size. */
2748 static void
2749 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2750 {
2751 ssize_t r, m, c;
2752 int align64;
2753 int align32;
2754
2755 /* Precalculate REGNO_REG_CLASS. */
2756 rs6000_regno_regclass[0] = GENERAL_REGS;
2757 for (r = 1; r < 32; ++r)
2758 rs6000_regno_regclass[r] = BASE_REGS;
2759
2760 for (r = 32; r < 64; ++r)
2761 rs6000_regno_regclass[r] = FLOAT_REGS;
2762
2763 for (r = 64; HARD_REGISTER_NUM_P (r); ++r)
2764 rs6000_regno_regclass[r] = NO_REGS;
2765
2766 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2767 rs6000_regno_regclass[r] = ALTIVEC_REGS;
2768
2769 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2770 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
2771 rs6000_regno_regclass[r] = CR_REGS;
2772
2773 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
2774 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
2775 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
2776 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
2777 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
2778 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
2779 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
2780
2781 /* Precalculate register class to simpler reload register class. We don't
2782 need all of the register classes that are combinations of different
2783 classes, just the simple ones that have constraint letters. */
2784 for (c = 0; c < N_REG_CLASSES; c++)
2785 reg_class_to_reg_type[c] = NO_REG_TYPE;
2786
2787 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
2788 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
2789 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
2790 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
2791 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
2792 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
2793 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
2794 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
2795 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
2796 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
2797
2798 if (TARGET_VSX)
2799 {
2800 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
2801 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
2802 }
2803 else
2804 {
2805 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
2806 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
2807 }
2808
2809 /* Precalculate the valid memory formats as well as the vector information,
2810 this must be set up before the rs6000_hard_regno_nregs_internal calls
2811 below. */
2812 gcc_assert ((int)VECTOR_NONE == 0);
2813 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
2814 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_mem));
2815
2816 gcc_assert ((int)CODE_FOR_nothing == 0);
2817 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
2818
2819 gcc_assert ((int)NO_REGS == 0);
2820 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
2821
2822 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2823 believes it can use native alignment or still uses 128-bit alignment. */
2824 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
2825 {
2826 align64 = 64;
2827 align32 = 32;
2828 }
2829 else
2830 {
2831 align64 = 128;
2832 align32 = 128;
2833 }
2834
2835 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
2836 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
2837 if (TARGET_FLOAT128_TYPE)
2838 {
2839 rs6000_vector_mem[KFmode] = VECTOR_VSX;
2840 rs6000_vector_align[KFmode] = 128;
2841
2842 if (FLOAT128_IEEE_P (TFmode))
2843 {
2844 rs6000_vector_mem[TFmode] = VECTOR_VSX;
2845 rs6000_vector_align[TFmode] = 128;
2846 }
2847 }
2848
2849 /* V2DF mode, VSX only. */
2850 if (TARGET_VSX)
2851 {
2852 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
2853 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
2854 rs6000_vector_align[V2DFmode] = align64;
2855 }
2856
2857 /* V4SF mode, either VSX or Altivec. */
2858 if (TARGET_VSX)
2859 {
2860 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
2861 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
2862 rs6000_vector_align[V4SFmode] = align32;
2863 }
2864 else if (TARGET_ALTIVEC)
2865 {
2866 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
2867 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
2868 rs6000_vector_align[V4SFmode] = align32;
2869 }
2870
2871 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2872 and stores. */
2873 if (TARGET_ALTIVEC)
2874 {
2875 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
2876 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
2877 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
2878 rs6000_vector_align[V4SImode] = align32;
2879 rs6000_vector_align[V8HImode] = align32;
2880 rs6000_vector_align[V16QImode] = align32;
2881
2882 if (TARGET_VSX)
2883 {
2884 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
2885 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
2886 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
2887 }
2888 else
2889 {
2890 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
2891 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
2892 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
2893 }
2894 }
2895
2896 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
2897 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
2898 if (TARGET_VSX)
2899 {
2900 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
2901 rs6000_vector_unit[V2DImode]
2902 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2903 rs6000_vector_align[V2DImode] = align64;
2904
2905 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
2906 rs6000_vector_unit[V1TImode]
2907 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2908 rs6000_vector_align[V1TImode] = 128;
2909 }
2910
2911 /* DFmode, see if we want to use the VSX unit. Memory is handled
2912 differently, so don't set rs6000_vector_mem. */
2913 if (TARGET_VSX)
2914 {
2915 rs6000_vector_unit[DFmode] = VECTOR_VSX;
2916 rs6000_vector_align[DFmode] = 64;
2917 }
2918
2919 /* SFmode, see if we want to use the VSX unit. */
2920 if (TARGET_P8_VECTOR)
2921 {
2922 rs6000_vector_unit[SFmode] = VECTOR_VSX;
2923 rs6000_vector_align[SFmode] = 32;
2924 }
2925
2926 /* Allow TImode in VSX register and set the VSX memory macros. */
2927 if (TARGET_VSX)
2928 {
2929 rs6000_vector_mem[TImode] = VECTOR_VSX;
2930 rs6000_vector_align[TImode] = align64;
2931 }
2932
2933 /* Add support for vector pairs and vector quad registers. */
2934 if (TARGET_MMA)
2935 {
2936 rs6000_vector_unit[OOmode] = VECTOR_NONE;
2937 rs6000_vector_mem[OOmode] = VECTOR_VSX;
2938 rs6000_vector_align[OOmode] = 256;
2939
2940 rs6000_vector_unit[XOmode] = VECTOR_NONE;
2941 rs6000_vector_mem[XOmode] = VECTOR_VSX;
2942 rs6000_vector_align[XOmode] = 512;
2943 }
2944
2945 /* Register class constraints for the constraints that depend on compile
2946 switches. When the VSX code was added, different constraints were added
2947 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
2948 of the VSX registers are used. The register classes for scalar floating
2949 point types is set, based on whether we allow that type into the upper
2950 (Altivec) registers. GCC has register classes to target the Altivec
2951 registers for load/store operations, to select using a VSX memory
2952 operation instead of the traditional floating point operation. The
2953 constraints are:
2954
2955 d - Register class to use with traditional DFmode instructions.
2956 f - Register class to use with traditional SFmode instructions.
2957 v - Altivec register.
2958 wa - Any VSX register.
2959 wc - Reserved to represent individual CR bits (used in LLVM).
2960 wn - always NO_REGS.
2961 wr - GPR if 64-bit mode is permitted.
2962 wx - Float register if we can do 32-bit int stores. */
2963
2964 if (TARGET_HARD_FLOAT)
2965 {
2966 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */
2967 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */
2968 }
2969
2970 if (TARGET_VSX)
2971 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
2972
2973 /* Add conditional constraints based on various options, to allow us to
2974 collapse multiple insn patterns. */
2975 if (TARGET_ALTIVEC)
2976 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
2977
2978 if (TARGET_POWERPC64)
2979 {
2980 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
2981 rs6000_constraints[RS6000_CONSTRAINT_wA] = BASE_REGS;
2982 }
2983
2984 if (TARGET_STFIWX)
2985 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
2986
2987 /* Support for new direct moves (ISA 3.0 + 64bit). */
2988 if (TARGET_DIRECT_MOVE_128)
2989 rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
2990
2991 /* Set up the reload helper and direct move functions. */
2992 if (TARGET_VSX || TARGET_ALTIVEC)
2993 {
2994 if (TARGET_64BIT)
2995 {
2996 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
2997 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
2998 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
2999 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
3000 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
3001 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
3002 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
3003 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
3004 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
3005 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
3006 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
3007 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
3008 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
3009 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
3010 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
3011 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
3012 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
3013 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
3014 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
3015 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
3016
3017 if (FLOAT128_VECTOR_P (KFmode))
3018 {
3019 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store;
3020 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load;
3021 }
3022
3023 if (FLOAT128_VECTOR_P (TFmode))
3024 {
3025 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store;
3026 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load;
3027 }
3028
3029 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3030 available. */
3031 if (TARGET_NO_SDMODE_STACK)
3032 {
3033 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
3034 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
3035 }
3036
3037 if (TARGET_VSX)
3038 {
3039 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
3040 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
3041 }
3042
3043 if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128)
3044 {
3045 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
3046 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
3047 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
3048 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
3049 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
3050 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
3051 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
3052 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
3053 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
3054
3055 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
3056 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
3057 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
3058 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
3059 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
3060 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
3061 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
3062 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
3063 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
3064
3065 if (FLOAT128_VECTOR_P (KFmode))
3066 {
3067 reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf;
3068 reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf;
3069 }
3070
3071 if (FLOAT128_VECTOR_P (TFmode))
3072 {
3073 reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf;
3074 reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf;
3075 }
3076
3077 if (TARGET_MMA)
3078 {
3079 reg_addr[OOmode].reload_store = CODE_FOR_reload_oo_di_store;
3080 reg_addr[OOmode].reload_load = CODE_FOR_reload_oo_di_load;
3081 reg_addr[XOmode].reload_store = CODE_FOR_reload_xo_di_store;
3082 reg_addr[XOmode].reload_load = CODE_FOR_reload_xo_di_load;
3083 }
3084 }
3085 }
3086 else
3087 {
3088 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
3089 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
3090 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
3091 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
3092 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
3093 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
3094 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
3095 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
3096 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
3097 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
3098 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
3099 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
3100 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
3101 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
3102 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
3103 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
3104 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
3105 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
3106 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
3107 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
3108
3109 if (FLOAT128_VECTOR_P (KFmode))
3110 {
3111 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store;
3112 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load;
3113 }
3114
3115 if (FLOAT128_IEEE_P (TFmode))
3116 {
3117 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store;
3118 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load;
3119 }
3120
3121 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3122 available. */
3123 if (TARGET_NO_SDMODE_STACK)
3124 {
3125 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
3126 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
3127 }
3128
3129 if (TARGET_VSX)
3130 {
3131 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
3132 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
3133 }
3134
3135 if (TARGET_DIRECT_MOVE)
3136 {
3137 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
3138 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
3139 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
3140 }
3141 }
3142
3143 reg_addr[DFmode].scalar_in_vmx_p = true;
3144 reg_addr[DImode].scalar_in_vmx_p = true;
3145
3146 if (TARGET_P8_VECTOR)
3147 {
3148 reg_addr[SFmode].scalar_in_vmx_p = true;
3149 reg_addr[SImode].scalar_in_vmx_p = true;
3150
3151 if (TARGET_P9_VECTOR)
3152 {
3153 reg_addr[HImode].scalar_in_vmx_p = true;
3154 reg_addr[QImode].scalar_in_vmx_p = true;
3155 }
3156 }
3157 }
3158
3159 /* Precalculate HARD_REGNO_NREGS. */
3160 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3161 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3162 rs6000_hard_regno_nregs[m][r]
3163 = rs6000_hard_regno_nregs_internal (r, (machine_mode) m);
3164
3165 /* Precalculate TARGET_HARD_REGNO_MODE_OK. */
3166 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3167 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3168 rs6000_hard_regno_mode_ok_p[m][r]
3169 = rs6000_hard_regno_mode_ok_uncached (r, (machine_mode) m);
3170
3171 /* Precalculate CLASS_MAX_NREGS sizes. */
3172 for (c = 0; c < LIM_REG_CLASSES; ++c)
3173 {
3174 int reg_size;
3175
3176 if (TARGET_VSX && VSX_REG_CLASS_P (c))
3177 reg_size = UNITS_PER_VSX_WORD;
3178
3179 else if (c == ALTIVEC_REGS)
3180 reg_size = UNITS_PER_ALTIVEC_WORD;
3181
3182 else if (c == FLOAT_REGS)
3183 reg_size = UNITS_PER_FP_WORD;
3184
3185 else
3186 reg_size = UNITS_PER_WORD;
3187
3188 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3189 {
3190 machine_mode m2 = (machine_mode)m;
3191 int reg_size2 = reg_size;
3192
3193 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3194 in VSX. */
3195 if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m))
3196 reg_size2 = UNITS_PER_FP_WORD;
3197
3198 rs6000_class_max_nregs[m][c]
3199 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3200 }
3201 }
3202
3203 /* Calculate which modes to automatically generate code to use a the
3204 reciprocal divide and square root instructions. In the future, possibly
3205 automatically generate the instructions even if the user did not specify
3206 -mrecip. The older machines double precision reciprocal sqrt estimate is
3207 not accurate enough. */
3208 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3209 if (TARGET_FRES)
3210 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3211 if (TARGET_FRE)
3212 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3213 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3214 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3215 if (VECTOR_UNIT_VSX_P (V2DFmode))
3216 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3217
3218 if (TARGET_FRSQRTES)
3219 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3220 if (TARGET_FRSQRTE)
3221 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3222 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3223 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3224 if (VECTOR_UNIT_VSX_P (V2DFmode))
3225 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3226
3227 if (rs6000_recip_control)
3228 {
3229 if (!flag_finite_math_only)
3230 warning (0, "%qs requires %qs or %qs", "-mrecip", "-ffinite-math",
3231 "-ffast-math");
3232 if (flag_trapping_math)
3233 warning (0, "%qs requires %qs or %qs", "-mrecip",
3234 "-fno-trapping-math", "-ffast-math");
3235 if (!flag_reciprocal_math)
3236 warning (0, "%qs requires %qs or %qs", "-mrecip", "-freciprocal-math",
3237 "-ffast-math");
3238 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3239 {
3240 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3241 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3242 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3243
3244 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3245 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3246 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3247
3248 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3249 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3250 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3251
3252 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3253 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3254 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3255
3256 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3257 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3258 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3259
3260 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3261 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3262 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3263
3264 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3265 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3266 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3267
3268 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3269 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3270 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3271 }
3272 }
3273
3274 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3275 legitimate address support to figure out the appropriate addressing to
3276 use. */
3277 rs6000_setup_reg_addr_masks ();
3278
3279 if (global_init_p || TARGET_DEBUG_TARGET)
3280 {
3281 if (TARGET_DEBUG_REG)
3282 rs6000_debug_reg_global ();
3283
3284 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3285 fprintf (stderr,
3286 "SImode variable mult cost = %d\n"
3287 "SImode constant mult cost = %d\n"
3288 "SImode short constant mult cost = %d\n"
3289 "DImode multipliciation cost = %d\n"
3290 "SImode division cost = %d\n"
3291 "DImode division cost = %d\n"
3292 "Simple fp operation cost = %d\n"
3293 "DFmode multiplication cost = %d\n"
3294 "SFmode division cost = %d\n"
3295 "DFmode division cost = %d\n"
3296 "cache line size = %d\n"
3297 "l1 cache size = %d\n"
3298 "l2 cache size = %d\n"
3299 "simultaneous prefetches = %d\n"
3300 "\n",
3301 rs6000_cost->mulsi,
3302 rs6000_cost->mulsi_const,
3303 rs6000_cost->mulsi_const9,
3304 rs6000_cost->muldi,
3305 rs6000_cost->divsi,
3306 rs6000_cost->divdi,
3307 rs6000_cost->fp,
3308 rs6000_cost->dmul,
3309 rs6000_cost->sdiv,
3310 rs6000_cost->ddiv,
3311 rs6000_cost->cache_line_size,
3312 rs6000_cost->l1_cache_size,
3313 rs6000_cost->l2_cache_size,
3314 rs6000_cost->simultaneous_prefetches);
3315 }
3316 }
3317
3318 #if TARGET_MACHO
3319 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3320
3321 static void
3322 darwin_rs6000_override_options (void)
3323 {
3324 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3325 off. */
3326 rs6000_altivec_abi = 1;
3327 TARGET_ALTIVEC_VRSAVE = 1;
3328 rs6000_current_abi = ABI_DARWIN;
3329
3330 if (DEFAULT_ABI == ABI_DARWIN
3331 && TARGET_64BIT)
3332 darwin_one_byte_bool = 1;
3333
3334 if (TARGET_64BIT && ! TARGET_POWERPC64)
3335 {
3336 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3337 warning (0, "%qs requires PowerPC64 architecture, enabling", "-m64");
3338 }
3339
3340 /* The linkers [ld64] that support 64Bit do not need the JBSR longcall
3341 optimisation, and will not work with the most generic case (where the
3342 symbol is undefined external, but there is no symbl stub). */
3343 if (TARGET_64BIT)
3344 rs6000_default_long_calls = 0;
3345
3346 /* ld_classic is (so far) still used for kernel (static) code, and supports
3347 the JBSR longcall / branch islands. */
3348 if (flag_mkernel)
3349 {
3350 rs6000_default_long_calls = 1;
3351
3352 /* Allow a kext author to do -mkernel -mhard-float. */
3353 if (! (rs6000_isa_flags_explicit & OPTION_MASK_SOFT_FLOAT))
3354 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3355 }
3356
3357 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3358 Altivec. */
3359 if (!flag_mkernel && !flag_apple_kext
3360 && TARGET_64BIT
3361 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3362 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3363
3364 /* Unless the user (not the configurer) has explicitly overridden
3365 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3366 G4 unless targeting the kernel. */
3367 if (!flag_mkernel
3368 && !flag_apple_kext
3369 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3370 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3371 && ! OPTION_SET_P (rs6000_cpu_index))
3372 {
3373 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3374 }
3375 }
3376 #endif
3377
3378 /* If not otherwise specified by a target, make 'long double' equivalent to
3379 'double'. */
3380
3381 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3382 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3383 #endif
3384
3385 /* Return the builtin mask of the various options used that could affect which
3386 builtins were used. In the past we used target_flags, but we've run out of
3387 bits, and some options are no longer in target_flags. */
3388
3389 HOST_WIDE_INT
3390 rs6000_builtin_mask_calculate (void)
3391 {
3392 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
3393 | ((TARGET_CMPB) ? RS6000_BTM_CMPB : 0)
3394 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
3395 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
3396 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
3397 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
3398 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
3399 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
3400 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
3401 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
3402 | ((TARGET_P9_VECTOR) ? RS6000_BTM_P9_VECTOR : 0)
3403 | ((TARGET_P9_MISC) ? RS6000_BTM_P9_MISC : 0)
3404 | ((TARGET_MODULO) ? RS6000_BTM_MODULO : 0)
3405 | ((TARGET_64BIT) ? RS6000_BTM_64BIT : 0)
3406 | ((TARGET_POWERPC64) ? RS6000_BTM_POWERPC64 : 0)
3407 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
3408 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
3409 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
3410 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
3411 | ((TARGET_LONG_DOUBLE_128
3412 && TARGET_HARD_FLOAT
3413 && !TARGET_IEEEQUAD) ? RS6000_BTM_LDBL128 : 0)
3414 | ((TARGET_FLOAT128_TYPE) ? RS6000_BTM_FLOAT128 : 0)
3415 | ((TARGET_FLOAT128_HW) ? RS6000_BTM_FLOAT128_HW : 0)
3416 | ((TARGET_MMA) ? RS6000_BTM_MMA : 0)
3417 | ((TARGET_POWER10) ? RS6000_BTM_P10 : 0));
3418 }
3419
3420 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3421 to clobber the XER[CA] bit because clobbering that bit without telling
3422 the compiler worked just fine with versions of GCC before GCC 5, and
3423 breaking a lot of older code in ways that are hard to track down is
3424 not such a great idea. */
3425
3426 static rtx_insn *
3427 rs6000_md_asm_adjust (vec<rtx> & /*outputs*/, vec<rtx> & /*inputs*/,
3428 vec<machine_mode> & /*input_modes*/,
3429 vec<const char *> & /*constraints*/, vec<rtx> &clobbers,
3430 HARD_REG_SET &clobbered_regs, location_t /*loc*/)
3431 {
3432 clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
3433 SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
3434 return NULL;
3435 }
3436
3437 /* This target function is similar to the hook TARGET_OPTION_OVERRIDE
3438 but is called when the optimize level is changed via an attribute or
3439 pragma or when it is reset at the end of the code affected by the
3440 attribute or pragma. It is not called at the beginning of compilation
3441 when TARGET_OPTION_OVERRIDE is called so if you want to perform these
3442 actions then, you should have TARGET_OPTION_OVERRIDE call
3443 TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE. */
3444
3445 static void
3446 rs6000_override_options_after_change (void)
3447 {
3448 /* Explicit -funroll-loops turns -munroll-only-small-loops off, and
3449 turns -frename-registers on. */
3450 if ((OPTION_SET_P (flag_unroll_loops) && flag_unroll_loops)
3451 || (OPTION_SET_P (flag_unroll_all_loops)
3452 && flag_unroll_all_loops))
3453 {
3454 if (!OPTION_SET_P (unroll_only_small_loops))
3455 unroll_only_small_loops = 0;
3456 if (!OPTION_SET_P (flag_rename_registers))
3457 flag_rename_registers = 1;
3458 if (!OPTION_SET_P (flag_cunroll_grow_size))
3459 flag_cunroll_grow_size = 1;
3460 }
3461 else if (!OPTION_SET_P (flag_cunroll_grow_size))
3462 flag_cunroll_grow_size = flag_peel_loops || optimize >= 3;
3463
3464 /* If we are inserting ROP-protect instructions, disable shrink wrap. */
3465 if (rs6000_rop_protect)
3466 flag_shrink_wrap = 0;
3467 }
3468
3469 #ifdef TARGET_USES_LINUX64_OPT
3470 static void
3471 rs6000_linux64_override_options ()
3472 {
3473 if (!OPTION_SET_P (rs6000_alignment_flags))
3474 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
3475 if (rs6000_isa_flags & OPTION_MASK_64BIT)
3476 {
3477 if (DEFAULT_ABI != ABI_AIX)
3478 {
3479 rs6000_current_abi = ABI_AIX;
3480 error (INVALID_64BIT, "call");
3481 }
3482 dot_symbols = !strcmp (rs6000_abi_name, "aixdesc");
3483 if (ELFv2_ABI_CHECK)
3484 {
3485 rs6000_current_abi = ABI_ELFv2;
3486 if (dot_symbols)
3487 error ("%<-mcall-aixdesc%> incompatible with %<-mabi=elfv2%>");
3488 }
3489 if (rs6000_isa_flags & OPTION_MASK_RELOCATABLE)
3490 {
3491 rs6000_isa_flags &= ~OPTION_MASK_RELOCATABLE;
3492 error (INVALID_64BIT, "relocatable");
3493 }
3494 if (rs6000_isa_flags & OPTION_MASK_EABI)
3495 {
3496 rs6000_isa_flags &= ~OPTION_MASK_EABI;
3497 error (INVALID_64BIT, "eabi");
3498 }
3499 if (TARGET_PROTOTYPE)
3500 {
3501 target_prototype = 0;
3502 error (INVALID_64BIT, "prototype");
3503 }
3504 if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) == 0)
3505 {
3506 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3507 error ("%<-m64%> requires a PowerPC64 cpu");
3508 }
3509 if (!OPTION_SET_P (rs6000_current_cmodel))
3510 SET_CMODEL (CMODEL_MEDIUM);
3511 if ((rs6000_isa_flags_explicit & OPTION_MASK_MINIMAL_TOC) != 0)
3512 {
3513 if (OPTION_SET_P (rs6000_current_cmodel)
3514 && rs6000_current_cmodel != CMODEL_SMALL)
3515 error ("%<-mcmodel incompatible with other toc options%>");
3516 if (TARGET_MINIMAL_TOC)
3517 SET_CMODEL (CMODEL_SMALL);
3518 else if (TARGET_PCREL
3519 || (PCREL_SUPPORTED_BY_OS
3520 && (rs6000_isa_flags_explicit & OPTION_MASK_PCREL) == 0))
3521 /* Ignore -mno-minimal-toc. */
3522 ;
3523 else
3524 SET_CMODEL (CMODEL_SMALL);
3525 }
3526 if (rs6000_current_cmodel != CMODEL_SMALL)
3527 {
3528 if (!OPTION_SET_P (TARGET_NO_FP_IN_TOC))
3529 TARGET_NO_FP_IN_TOC = rs6000_current_cmodel == CMODEL_MEDIUM;
3530 if (!OPTION_SET_P (TARGET_NO_SUM_IN_TOC))
3531 TARGET_NO_SUM_IN_TOC = 0;
3532 }
3533 if (TARGET_PLTSEQ && DEFAULT_ABI != ABI_ELFv2)
3534 {
3535 if (OPTION_SET_P (rs6000_pltseq))
3536 warning (0, "%qs unsupported for this ABI",
3537 "-mpltseq");
3538 rs6000_pltseq = false;
3539 }
3540 }
3541 else if (TARGET_64BIT)
3542 error (INVALID_32BIT, "32");
3543 else
3544 {
3545 if (TARGET_PROFILE_KERNEL)
3546 {
3547 profile_kernel = 0;
3548 error (INVALID_32BIT, "profile-kernel");
3549 }
3550 if (OPTION_SET_P (rs6000_current_cmodel))
3551 {
3552 SET_CMODEL (CMODEL_SMALL);
3553 error (INVALID_32BIT, "cmodel");
3554 }
3555 }
3556 }
3557 #endif
3558
3559 /* Return true if we are using GLIBC, and it supports IEEE 128-bit long double.
3560 This support is only in little endian GLIBC 2.32 or newer. */
3561 static bool
3562 glibc_supports_ieee_128bit (void)
3563 {
3564 #ifdef OPTION_GLIBC
3565 if (OPTION_GLIBC && !BYTES_BIG_ENDIAN
3566 && ((TARGET_GLIBC_MAJOR * 1000) + TARGET_GLIBC_MINOR) >= 2032)
3567 return true;
3568 #endif /* OPTION_GLIBC. */
3569
3570 return false;
3571 }
3572
3573 /* Override command line options.
3574
3575 Combine build-specific configuration information with options
3576 specified on the command line to set various state variables which
3577 influence code generation, optimization, and expansion of built-in
3578 functions. Assure that command-line configuration preferences are
3579 compatible with each other and with the build configuration; issue
3580 warnings while adjusting configuration or error messages while
3581 rejecting configuration.
3582
3583 Upon entry to this function:
3584
3585 This function is called once at the beginning of
3586 compilation, and then again at the start and end of compiling
3587 each section of code that has a different configuration, as
3588 indicated, for example, by adding the
3589
3590 __attribute__((__target__("cpu=power9")))
3591
3592 qualifier to a function definition or, for example, by bracketing
3593 code between
3594
3595 #pragma GCC target("altivec")
3596
3597 and
3598
3599 #pragma GCC reset_options
3600
3601 directives. Parameter global_init_p is true for the initial
3602 invocation, which initializes global variables, and false for all
3603 subsequent invocations.
3604
3605
3606 Various global state information is assumed to be valid. This
3607 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
3608 default CPU specified at build configure time, TARGET_DEFAULT,
3609 representing the default set of option flags for the default
3610 target, and OPTION_SET_P (rs6000_isa_flags), representing
3611 which options were requested on the command line.
3612
3613 Upon return from this function:
3614
3615 rs6000_isa_flags_explicit has a non-zero bit for each flag that
3616 was set by name on the command line. Additionally, if certain
3617 attributes are automatically enabled or disabled by this function
3618 in order to assure compatibility between options and
3619 configuration, the flags associated with those attributes are
3620 also set. By setting these "explicit bits", we avoid the risk
3621 that other code might accidentally overwrite these particular
3622 attributes with "default values".
3623
3624 The various bits of rs6000_isa_flags are set to indicate the
3625 target options that have been selected for the most current
3626 compilation efforts. This has the effect of also turning on the
3627 associated TARGET_XXX values since these are macros which are
3628 generally defined to test the corresponding bit of the
3629 rs6000_isa_flags variable.
3630
3631 The variable rs6000_builtin_mask is set to represent the target
3632 options for the most current compilation efforts, consistent with
3633 the current contents of rs6000_isa_flags. This variable controls
3634 expansion of built-in functions.
3635
3636 Various other global variables and fields of global structures
3637 (over 50 in all) are initialized to reflect the desired options
3638 for the most current compilation efforts. */
3639
3640 static bool
3641 rs6000_option_override_internal (bool global_init_p)
3642 {
3643 bool ret = true;
3644
3645 HOST_WIDE_INT set_masks;
3646 HOST_WIDE_INT ignore_masks;
3647 int cpu_index = -1;
3648 int tune_index;
3649 struct cl_target_option *main_target_opt
3650 = ((global_init_p || target_option_default_node == NULL)
3651 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3652
3653 /* Print defaults. */
3654 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
3655 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
3656
3657 /* Remember the explicit arguments. */
3658 if (global_init_p)
3659 rs6000_isa_flags_explicit = OPTION_SET_P (rs6000_isa_flags);
3660
3661 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3662 library functions, so warn about it. The flag may be useful for
3663 performance studies from time to time though, so don't disable it
3664 entirely. */
3665 if (OPTION_SET_P (rs6000_alignment_flags)
3666 && rs6000_alignment_flags == MASK_ALIGN_POWER
3667 && DEFAULT_ABI == ABI_DARWIN
3668 && TARGET_64BIT)
3669 warning (0, "%qs is not supported for 64-bit Darwin;"
3670 " it is incompatible with the installed C and C++ libraries",
3671 "-malign-power");
3672
3673 /* Numerous experiment shows that IRA based loop pressure
3674 calculation works better for RTL loop invariant motion on targets
3675 with enough (>= 32) registers. It is an expensive optimization.
3676 So it is on only for peak performance. */
3677 if (optimize >= 3 && global_init_p
3678 && !OPTION_SET_P (flag_ira_loop_pressure))
3679 flag_ira_loop_pressure = 1;
3680
3681 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
3682 for tracebacks to be complete but not if any -fasynchronous-unwind-tables
3683 options were already specified. */
3684 if (flag_sanitize & SANITIZE_USER_ADDRESS
3685 && !OPTION_SET_P (flag_asynchronous_unwind_tables))
3686 flag_asynchronous_unwind_tables = 1;
3687
3688 /* -fvariable-expansion-in-unroller is a win for POWER whenever the
3689 loop unroller is active. It is only checked during unrolling, so
3690 we can just set it on by default. */
3691 if (!OPTION_SET_P (flag_variable_expansion_in_unroller))
3692 flag_variable_expansion_in_unroller = 1;
3693
3694 /* Set the pointer size. */
3695 if (TARGET_64BIT)
3696 {
3697 rs6000_pmode = DImode;
3698 rs6000_pointer_size = 64;
3699 }
3700 else
3701 {
3702 rs6000_pmode = SImode;
3703 rs6000_pointer_size = 32;
3704 }
3705
3706 /* Some OSs don't support saving the high part of 64-bit registers on context
3707 switch. Other OSs don't support saving Altivec registers. On those OSs,
3708 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3709 if the user wants either, the user must explicitly specify them and we
3710 won't interfere with the user's specification. */
3711
3712 set_masks = POWERPC_MASKS;
3713 #ifdef OS_MISSING_POWERPC64
3714 if (OS_MISSING_POWERPC64)
3715 set_masks &= ~OPTION_MASK_POWERPC64;
3716 #endif
3717 #ifdef OS_MISSING_ALTIVEC
3718 if (OS_MISSING_ALTIVEC)
3719 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX
3720 | OTHER_VSX_VECTOR_MASKS);
3721 #endif
3722
3723 /* Don't override by the processor default if given explicitly. */
3724 set_masks &= ~rs6000_isa_flags_explicit;
3725
3726 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3727 the cpu in a target attribute or pragma, but did not specify a tuning
3728 option, use the cpu for the tuning option rather than the option specified
3729 with -mtune on the command line. Process a '--with-cpu' configuration
3730 request as an implicit --cpu. */
3731 if (rs6000_cpu_index >= 0)
3732 cpu_index = rs6000_cpu_index;
3733 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3734 cpu_index = main_target_opt->x_rs6000_cpu_index;
3735 else if (OPTION_TARGET_CPU_DEFAULT)
3736 cpu_index = rs6000_cpu_name_lookup (OPTION_TARGET_CPU_DEFAULT);
3737
3738 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3739 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3740 with those from the cpu, except for options that were explicitly set. If
3741 we don't have a cpu, do not override the target bits set in
3742 TARGET_DEFAULT. */
3743 if (cpu_index >= 0)
3744 {
3745 rs6000_cpu_index = cpu_index;
3746 rs6000_isa_flags &= ~set_masks;
3747 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3748 & set_masks);
3749 }
3750 else
3751 {
3752 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3753 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3754 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3755 to using rs6000_isa_flags, we need to do the initialization here.
3756
3757 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
3758 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
3759 HOST_WIDE_INT flags;
3760 if (TARGET_DEFAULT)
3761 flags = TARGET_DEFAULT;
3762 else
3763 {
3764 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3765 const char *default_cpu = (!TARGET_POWERPC64
3766 ? "powerpc"
3767 : (BYTES_BIG_ENDIAN
3768 ? "powerpc64"
3769 : "powerpc64le"));
3770 int default_cpu_index = rs6000_cpu_name_lookup (default_cpu);
3771 flags = processor_target_table[default_cpu_index].target_enable;
3772 }
3773 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
3774 }
3775
3776 if (rs6000_tune_index >= 0)
3777 tune_index = rs6000_tune_index;
3778 else if (cpu_index >= 0)
3779 rs6000_tune_index = tune_index = cpu_index;
3780 else
3781 {
3782 size_t i;
3783 enum processor_type tune_proc
3784 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
3785
3786 tune_index = -1;
3787 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3788 if (processor_target_table[i].processor == tune_proc)
3789 {
3790 tune_index = i;
3791 break;
3792 }
3793 }
3794
3795 if (cpu_index >= 0)
3796 rs6000_cpu = processor_target_table[cpu_index].processor;
3797 else
3798 rs6000_cpu = TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT;
3799
3800 gcc_assert (tune_index >= 0);
3801 rs6000_tune = processor_target_table[tune_index].processor;
3802
3803 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
3804 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
3805 || rs6000_cpu == PROCESSOR_PPCE5500)
3806 {
3807 if (TARGET_ALTIVEC)
3808 error ("AltiVec not supported in this target");
3809 }
3810
3811 /* If we are optimizing big endian systems for space, use the load/store
3812 multiple instructions. */
3813 if (BYTES_BIG_ENDIAN && optimize_size)
3814 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE;
3815
3816 /* Don't allow -mmultiple on little endian systems unless the cpu is a 750,
3817 because the hardware doesn't support the instructions used in little
3818 endian mode, and causes an alignment trap. The 750 does not cause an
3819 alignment trap (except when the target is unaligned). */
3820
3821 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750 && TARGET_MULTIPLE)
3822 {
3823 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
3824 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
3825 warning (0, "%qs is not supported on little endian systems",
3826 "-mmultiple");
3827 }
3828
3829 /* If little-endian, default to -mstrict-align on older processors.
3830 Testing for direct_move matches power8 and later. */
3831 if (!BYTES_BIG_ENDIAN
3832 && !(processor_target_table[tune_index].target_enable
3833 & OPTION_MASK_DIRECT_MOVE))
3834 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
3835
3836 if (!rs6000_fold_gimple)
3837 fprintf (stderr,
3838 "gimple folding of rs6000 builtins has been disabled.\n");
3839
3840 /* Add some warnings for VSX. */
3841 if (TARGET_VSX)
3842 {
3843 const char *msg = NULL;
3844 if (!TARGET_HARD_FLOAT)
3845 {
3846 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3847 msg = N_("%<-mvsx%> requires hardware floating point");
3848 else
3849 {
3850 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3851 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3852 }
3853 }
3854 else if (TARGET_AVOID_XFORM > 0)
3855 msg = N_("%<-mvsx%> needs indexed addressing");
3856 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
3857 & OPTION_MASK_ALTIVEC))
3858 {
3859 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3860 msg = N_("%<-mvsx%> and %<-mno-altivec%> are incompatible");
3861 else
3862 msg = N_("%<-mno-altivec%> disables vsx");
3863 }
3864
3865 if (msg)
3866 {
3867 warning (0, msg);
3868 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3869 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3870 }
3871 }
3872
3873 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
3874 the -mcpu setting to enable options that conflict. */
3875 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
3876 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
3877 | OPTION_MASK_ALTIVEC
3878 | OPTION_MASK_VSX)) != 0)
3879 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
3880 | OPTION_MASK_DIRECT_MOVE)
3881 & ~rs6000_isa_flags_explicit);
3882
3883 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3884 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
3885
3886 #ifdef XCOFF_DEBUGGING_INFO
3887 /* For AIX default to 64-bit DWARF. */
3888 if (!OPTION_SET_P (dwarf_offset_size))
3889 dwarf_offset_size = POINTER_SIZE_UNITS;
3890 #endif
3891
3892 /* Handle explicit -mno-{altivec,vsx,power8-vector,power9-vector} and turn
3893 off all of the options that depend on those flags. */
3894 ignore_masks = rs6000_disable_incompatible_switches ();
3895
3896 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
3897 unless the user explicitly used the -mno-<option> to disable the code. */
3898 if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_MISC)
3899 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3900 else if (TARGET_P9_MINMAX)
3901 {
3902 if (cpu_index >= 0)
3903 {
3904 if (cpu_index == PROCESSOR_POWER9)
3905 {
3906 /* legacy behavior: allow -mcpu=power9 with certain
3907 capabilities explicitly disabled. */
3908 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3909 }
3910 else
3911 error ("power9 target option is incompatible with %<%s=<xxx>%> "
3912 "for <xxx> less than power9", "-mcpu");
3913 }
3914 else if ((ISA_3_0_MASKS_SERVER & rs6000_isa_flags_explicit)
3915 != (ISA_3_0_MASKS_SERVER & rs6000_isa_flags
3916 & rs6000_isa_flags_explicit))
3917 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
3918 were explicitly cleared. */
3919 error ("%qs incompatible with explicitly disabled options",
3920 "-mpower9-minmax");
3921 else
3922 rs6000_isa_flags |= ISA_3_0_MASKS_SERVER;
3923 }
3924 else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
3925 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~ignore_masks);
3926 else if (TARGET_VSX)
3927 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~ignore_masks);
3928 else if (TARGET_POPCNTD)
3929 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~ignore_masks);
3930 else if (TARGET_DFP)
3931 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks);
3932 else if (TARGET_CMPB)
3933 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks);
3934 else if (TARGET_FPRND)
3935 rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks);
3936 else if (TARGET_POPCNTB)
3937 rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks);
3938 else if (TARGET_ALTIVEC)
3939 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~ignore_masks);
3940
3941 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
3942 {
3943 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
3944 error ("%qs requires %qs", "-mcrypto", "-maltivec");
3945 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
3946 }
3947
3948 if (!TARGET_FPRND && TARGET_VSX)
3949 {
3950 if (rs6000_isa_flags_explicit & OPTION_MASK_FPRND)
3951 /* TARGET_VSX = 1 implies Power 7 and newer */
3952 error ("%qs requires %qs", "-mvsx", "-mfprnd");
3953 rs6000_isa_flags &= ~OPTION_MASK_FPRND;
3954 }
3955
3956 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
3957 {
3958 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
3959 error ("%qs requires %qs", "-mdirect-move", "-mvsx");
3960 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
3961 }
3962
3963 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
3964 {
3965 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3966 error ("%qs requires %qs", "-mpower8-vector", "-maltivec");
3967 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3968 }
3969
3970 if (TARGET_P8_VECTOR && !TARGET_VSX)
3971 {
3972 if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3973 && (rs6000_isa_flags_explicit & OPTION_MASK_VSX))
3974 error ("%qs requires %qs", "-mpower8-vector", "-mvsx");
3975 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) == 0)
3976 {
3977 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3978 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3979 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
3980 }
3981 else
3982 {
3983 /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is
3984 not explicit. */
3985 rs6000_isa_flags |= OPTION_MASK_VSX;
3986 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3987 }
3988 }
3989
3990 if (TARGET_DFP && !TARGET_HARD_FLOAT)
3991 {
3992 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
3993 error ("%qs requires %qs", "-mhard-dfp", "-mhard-float");
3994 rs6000_isa_flags &= ~OPTION_MASK_DFP;
3995 }
3996
3997 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
3998 silently turn off quad memory mode. */
3999 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
4000 {
4001 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4002 warning (0, N_("%<-mquad-memory%> requires 64-bit mode"));
4003
4004 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
4005 warning (0, N_("%<-mquad-memory-atomic%> requires 64-bit mode"));
4006
4007 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
4008 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
4009 }
4010
4011 /* Non-atomic quad memory load/store are disabled for little endian, since
4012 the words are reversed, but atomic operations can still be done by
4013 swapping the words. */
4014 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
4015 {
4016 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4017 warning (0, N_("%<-mquad-memory%> is not available in little endian "
4018 "mode"));
4019
4020 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
4021 }
4022
4023 /* Assume if the user asked for normal quad memory instructions, they want
4024 the atomic versions as well, unless they explicity told us not to use quad
4025 word atomic instructions. */
4026 if (TARGET_QUAD_MEMORY
4027 && !TARGET_QUAD_MEMORY_ATOMIC
4028 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
4029 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
4030
4031 /* If we can shrink-wrap the TOC register save separately, then use
4032 -msave-toc-indirect unless explicitly disabled. */
4033 if ((rs6000_isa_flags_explicit & OPTION_MASK_SAVE_TOC_INDIRECT) == 0
4034 && flag_shrink_wrap_separate
4035 && optimize_function_for_speed_p (cfun))
4036 rs6000_isa_flags |= OPTION_MASK_SAVE_TOC_INDIRECT;
4037
4038 /* Enable power8 fusion if we are tuning for power8, even if we aren't
4039 generating power8 instructions. Power9 does not optimize power8 fusion
4040 cases. */
4041 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
4042 {
4043 if (processor_target_table[tune_index].processor == PROCESSOR_POWER8)
4044 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4045 else
4046 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
4047 }
4048
4049 /* Setting additional fusion flags turns on base fusion. */
4050 if (!TARGET_P8_FUSION && TARGET_P8_FUSION_SIGN)
4051 {
4052 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4053 {
4054 if (TARGET_P8_FUSION_SIGN)
4055 error ("%qs requires %qs", "-mpower8-fusion-sign",
4056 "-mpower8-fusion");
4057
4058 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
4059 }
4060 else
4061 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4062 }
4063
4064 /* Power8 does not fuse sign extended loads with the addis. If we are
4065 optimizing at high levels for speed, convert a sign extended load into a
4066 zero extending load, and an explicit sign extension. */
4067 if (TARGET_P8_FUSION
4068 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
4069 && optimize_function_for_speed_p (cfun)
4070 && optimize >= 3)
4071 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
4072
4073 /* ISA 3.0 vector instructions include ISA 2.07. */
4074 if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
4075 {
4076 /* We prefer to not mention undocumented options in
4077 error messages. However, if users have managed to select
4078 power9-vector without selecting power8-vector, they
4079 already know about undocumented flags. */
4080 if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) &&
4081 (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR))
4082 error ("%qs requires %qs", "-mpower9-vector", "-mpower8-vector");
4083 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) == 0)
4084 {
4085 rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR;
4086 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4087 rs6000_isa_flags_explicit |= OPTION_MASK_P9_VECTOR;
4088 }
4089 else
4090 {
4091 /* OPTION_MASK_P9_VECTOR is explicit and
4092 OPTION_MASK_P8_VECTOR is not explicit. */
4093 rs6000_isa_flags |= OPTION_MASK_P8_VECTOR;
4094 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
4095 }
4096 }
4097
4098 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4099 support. If we only have ISA 2.06 support, and the user did not specify
4100 the switch, leave it set to -1 so the movmisalign patterns are enabled,
4101 but we don't enable the full vectorization support */
4102 if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
4103 TARGET_ALLOW_MOVMISALIGN = 1;
4104
4105 else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
4106 {
4107 if (TARGET_ALLOW_MOVMISALIGN > 0
4108 && OPTION_SET_P (TARGET_ALLOW_MOVMISALIGN))
4109 error ("%qs requires %qs", "-mallow-movmisalign", "-mvsx");
4110
4111 TARGET_ALLOW_MOVMISALIGN = 0;
4112 }
4113
4114 /* Determine when unaligned vector accesses are permitted, and when
4115 they are preferred over masked Altivec loads. Note that if
4116 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4117 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
4118 not true. */
4119 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4120 {
4121 if (!TARGET_VSX)
4122 {
4123 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4124 error ("%qs requires %qs", "-mefficient-unaligned-vsx", "-mvsx");
4125
4126 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4127 }
4128
4129 else if (!TARGET_ALLOW_MOVMISALIGN)
4130 {
4131 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4132 error ("%qs requires %qs", "-munefficient-unaligned-vsx",
4133 "-mallow-movmisalign");
4134
4135 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4136 }
4137 }
4138
4139 if (!(rs6000_isa_flags_explicit & OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX))
4140 {
4141 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4142 rs6000_isa_flags |= OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX;
4143 else
4144 rs6000_isa_flags &= ~OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX;
4145 }
4146
4147 if (!(rs6000_isa_flags_explicit & OPTION_MASK_BLOCK_OPS_VECTOR_PAIR))
4148 {
4149 if (TARGET_MMA && TARGET_EFFICIENT_UNALIGNED_VSX)
4150 rs6000_isa_flags |= OPTION_MASK_BLOCK_OPS_VECTOR_PAIR;
4151 else
4152 rs6000_isa_flags &= ~OPTION_MASK_BLOCK_OPS_VECTOR_PAIR;
4153 }
4154
4155 /* Use long double size to select the appropriate long double. We use
4156 TYPE_PRECISION to differentiate the 3 different long double types. We map
4157 128 into the precision used for TFmode. */
4158 int default_long_double_size = (RS6000_DEFAULT_LONG_DOUBLE_SIZE == 64
4159 ? 64
4160 : FLOAT_PRECISION_TFmode);
4161
4162 /* Set long double size before the IEEE 128-bit tests. */
4163 if (!OPTION_SET_P (rs6000_long_double_type_size))
4164 {
4165 if (main_target_opt != NULL
4166 && (main_target_opt->x_rs6000_long_double_type_size
4167 != default_long_double_size))
4168 error ("target attribute or pragma changes %<long double%> size");
4169 else
4170 rs6000_long_double_type_size = default_long_double_size;
4171 }
4172 else if (rs6000_long_double_type_size == FLOAT_PRECISION_TFmode)
4173 ; /* The option value can be seen when cl_target_option_restore is called. */
4174 else if (rs6000_long_double_type_size == 128)
4175 rs6000_long_double_type_size = FLOAT_PRECISION_TFmode;
4176 else if (OPTION_SET_P (rs6000_ieeequad))
4177 {
4178 if (global_options.x_rs6000_ieeequad)
4179 error ("%qs requires %qs", "-mabi=ieeelongdouble", "-mlong-double-128");
4180 else
4181 error ("%qs requires %qs", "-mabi=ibmlongdouble", "-mlong-double-128");
4182 }
4183
4184 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
4185 systems will also set long double to be IEEE 128-bit. AIX and Darwin
4186 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
4187 those systems will not pick up this default. Warn if the user changes the
4188 default unless -Wno-psabi. */
4189 if (!OPTION_SET_P (rs6000_ieeequad))
4190 rs6000_ieeequad = TARGET_IEEEQUAD_DEFAULT;
4191
4192 else
4193 {
4194 if (global_options.x_rs6000_ieeequad
4195 && (!TARGET_POPCNTD || !TARGET_VSX))
4196 error ("%qs requires full ISA 2.06 support", "-mabi=ieeelongdouble");
4197
4198 if (rs6000_ieeequad != TARGET_IEEEQUAD_DEFAULT && TARGET_LONG_DOUBLE_128)
4199 {
4200 /* Determine if the user can change the default long double type at
4201 compilation time. You need GLIBC 2.32 or newer to be able to
4202 change the long double type. Only issue one warning. */
4203 static bool warned_change_long_double;
4204
4205 if (!warned_change_long_double && !glibc_supports_ieee_128bit ())
4206 {
4207 warned_change_long_double = true;
4208 if (TARGET_IEEEQUAD)
4209 warning (OPT_Wpsabi, "Using IEEE extended precision "
4210 "%<long double%>");
4211 else
4212 warning (OPT_Wpsabi, "Using IBM extended precision "
4213 "%<long double%>");
4214 }
4215 }
4216 }
4217
4218 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
4219 sytems. In GCC 7, we would enable the IEEE 128-bit floating point
4220 infrastructure (-mfloat128-type) but not enable the actual __float128 type
4221 unless the user used the explicit -mfloat128. In GCC 8, we enable both
4222 the keyword as well as the type. */
4223 TARGET_FLOAT128_TYPE = TARGET_FLOAT128_ENABLE_TYPE && TARGET_VSX;
4224
4225 /* IEEE 128-bit floating point requires VSX support. */
4226 if (TARGET_FLOAT128_KEYWORD)
4227 {
4228 if (!TARGET_VSX)
4229 {
4230 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
4231 error ("%qs requires VSX support", "-mfloat128");
4232
4233 TARGET_FLOAT128_TYPE = 0;
4234 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_KEYWORD
4235 | OPTION_MASK_FLOAT128_HW);
4236 }
4237 else if (!TARGET_FLOAT128_TYPE)
4238 {
4239 TARGET_FLOAT128_TYPE = 1;
4240 warning (0, "The %<-mfloat128%> option may not be fully supported");
4241 }
4242 }
4243
4244 /* Enable the __float128 keyword under Linux by default. */
4245 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_KEYWORD
4246 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) == 0)
4247 rs6000_isa_flags |= OPTION_MASK_FLOAT128_KEYWORD;
4248
4249 /* If we have are supporting the float128 type and full ISA 3.0 support,
4250 enable -mfloat128-hardware by default. However, don't enable the
4251 __float128 keyword if it was explicitly turned off. 64-bit mode is needed
4252 because sometimes the compiler wants to put things in an integer
4253 container, and if we don't have __int128 support, it is impossible. */
4254 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW && TARGET_64BIT
4255 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) == ISA_3_0_MASKS_IEEE
4256 && !(rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW))
4257 rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
4258
4259 if (TARGET_FLOAT128_HW
4260 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) != ISA_3_0_MASKS_IEEE)
4261 {
4262 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4263 error ("%qs requires full ISA 3.0 support", "%<-mfloat128-hardware%>");
4264
4265 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4266 }
4267
4268 if (TARGET_FLOAT128_HW && !TARGET_64BIT)
4269 {
4270 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4271 error ("%qs requires %qs", "%<-mfloat128-hardware%>", "-m64");
4272
4273 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4274 }
4275
4276 /* Enable -mprefixed by default on power10 systems. */
4277 if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED) == 0)
4278 rs6000_isa_flags |= OPTION_MASK_PREFIXED;
4279
4280 /* -mprefixed requires -mcpu=power10 (or later). */
4281 else if (TARGET_PREFIXED && !TARGET_POWER10)
4282 {
4283 if ((rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED) != 0)
4284 error ("%qs requires %qs", "-mprefixed", "-mcpu=power10");
4285
4286 rs6000_isa_flags &= ~OPTION_MASK_PREFIXED;
4287 }
4288
4289 /* -mpcrel requires prefixed load/store addressing. */
4290 if (TARGET_PCREL && !TARGET_PREFIXED)
4291 {
4292 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4293 error ("%qs requires %qs", "-mpcrel", "-mprefixed");
4294
4295 rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4296 }
4297
4298 /* Print the options after updating the defaults. */
4299 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4300 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
4301
4302 /* E500mc does "better" if we inline more aggressively. Respect the
4303 user's opinion, though. */
4304 if (rs6000_block_move_inline_limit == 0
4305 && (rs6000_tune == PROCESSOR_PPCE500MC
4306 || rs6000_tune == PROCESSOR_PPCE500MC64
4307 || rs6000_tune == PROCESSOR_PPCE5500
4308 || rs6000_tune == PROCESSOR_PPCE6500))
4309 rs6000_block_move_inline_limit = 128;
4310
4311 /* store_one_arg depends on expand_block_move to handle at least the
4312 size of reg_parm_stack_space. */
4313 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
4314 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
4315
4316 if (global_init_p)
4317 {
4318 /* If the appropriate debug option is enabled, replace the target hooks
4319 with debug versions that call the real version and then prints
4320 debugging information. */
4321 if (TARGET_DEBUG_COST)
4322 {
4323 targetm.rtx_costs = rs6000_debug_rtx_costs;
4324 targetm.address_cost = rs6000_debug_address_cost;
4325 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
4326 }
4327
4328 if (TARGET_DEBUG_ADDR)
4329 {
4330 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
4331 targetm.legitimize_address = rs6000_debug_legitimize_address;
4332 rs6000_secondary_reload_class_ptr
4333 = rs6000_debug_secondary_reload_class;
4334 targetm.secondary_memory_needed
4335 = rs6000_debug_secondary_memory_needed;
4336 targetm.can_change_mode_class
4337 = rs6000_debug_can_change_mode_class;
4338 rs6000_preferred_reload_class_ptr
4339 = rs6000_debug_preferred_reload_class;
4340 rs6000_mode_dependent_address_ptr
4341 = rs6000_debug_mode_dependent_address;
4342 }
4343
4344 if (rs6000_veclibabi_name)
4345 {
4346 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
4347 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
4348 else
4349 {
4350 error ("unknown vectorization library ABI type (%qs) for "
4351 "%qs switch", rs6000_veclibabi_name, "-mveclibabi=");
4352 ret = false;
4353 }
4354 }
4355 }
4356
4357 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
4358 target attribute or pragma which automatically enables both options,
4359 unless the altivec ABI was set. This is set by default for 64-bit, but
4360 not for 32-bit. */
4361 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4362 {
4363 TARGET_FLOAT128_TYPE = 0;
4364 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC
4365 | OPTION_MASK_FLOAT128_KEYWORD)
4366 & ~rs6000_isa_flags_explicit);
4367 }
4368
4369 /* Enable Altivec ABI for AIX -maltivec. */
4370 if (TARGET_XCOFF
4371 && (TARGET_ALTIVEC || TARGET_VSX)
4372 && !OPTION_SET_P (rs6000_altivec_abi))
4373 {
4374 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4375 error ("target attribute or pragma changes AltiVec ABI");
4376 else
4377 rs6000_altivec_abi = 1;
4378 }
4379
4380 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4381 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4382 be explicitly overridden in either case. */
4383 if (TARGET_ELF)
4384 {
4385 if (!OPTION_SET_P (rs6000_altivec_abi)
4386 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
4387 {
4388 if (main_target_opt != NULL &&
4389 !main_target_opt->x_rs6000_altivec_abi)
4390 error ("target attribute or pragma changes AltiVec ABI");
4391 else
4392 rs6000_altivec_abi = 1;
4393 }
4394 }
4395
4396 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4397 So far, the only darwin64 targets are also MACH-O. */
4398 if (TARGET_MACHO
4399 && DEFAULT_ABI == ABI_DARWIN
4400 && TARGET_64BIT)
4401 {
4402 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
4403 error ("target attribute or pragma changes darwin64 ABI");
4404 else
4405 {
4406 rs6000_darwin64_abi = 1;
4407 /* Default to natural alignment, for better performance. */
4408 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
4409 }
4410 }
4411
4412 /* Place FP constants in the constant pool instead of TOC
4413 if section anchors enabled. */
4414 if (flag_section_anchors
4415 && !OPTION_SET_P (TARGET_NO_FP_IN_TOC))
4416 TARGET_NO_FP_IN_TOC = 1;
4417
4418 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4419 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
4420
4421 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4422 SUBTARGET_OVERRIDE_OPTIONS;
4423 #endif
4424 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4425 SUBSUBTARGET_OVERRIDE_OPTIONS;
4426 #endif
4427 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4428 SUB3TARGET_OVERRIDE_OPTIONS;
4429 #endif
4430
4431 /* If the ABI has support for PC-relative relocations, enable it by default.
4432 This test depends on the sub-target tests above setting the code model to
4433 medium for ELF v2 systems. */
4434 if (PCREL_SUPPORTED_BY_OS
4435 && (rs6000_isa_flags_explicit & OPTION_MASK_PCREL) == 0)
4436 rs6000_isa_flags |= OPTION_MASK_PCREL;
4437
4438 /* -mpcrel requires -mcmodel=medium, but we can't check TARGET_CMODEL until
4439 after the subtarget override options are done. */
4440 else if (TARGET_PCREL && TARGET_CMODEL != CMODEL_MEDIUM)
4441 {
4442 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4443 error ("%qs requires %qs", "-mpcrel", "-mcmodel=medium");
4444
4445 rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4446 }
4447
4448 /* Enable -mmma by default on power10 systems. */
4449 if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_MMA) == 0)
4450 rs6000_isa_flags |= OPTION_MASK_MMA;
4451
4452 if (TARGET_POWER10
4453 && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION) == 0)
4454 rs6000_isa_flags |= OPTION_MASK_P10_FUSION;
4455
4456 if (TARGET_POWER10 &&
4457 (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_LD_CMPI) == 0)
4458 rs6000_isa_flags |= OPTION_MASK_P10_FUSION_LD_CMPI;
4459
4460 if (TARGET_POWER10
4461 && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_2LOGICAL) == 0)
4462 rs6000_isa_flags |= OPTION_MASK_P10_FUSION_2LOGICAL;
4463
4464 if (TARGET_POWER10
4465 && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_LOGADD) == 0)
4466 rs6000_isa_flags |= OPTION_MASK_P10_FUSION_LOGADD;
4467
4468 if (TARGET_POWER10
4469 && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_ADDLOG) == 0)
4470 rs6000_isa_flags |= OPTION_MASK_P10_FUSION_ADDLOG;
4471
4472 if (TARGET_POWER10
4473 && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_2ADD) == 0)
4474 rs6000_isa_flags |= OPTION_MASK_P10_FUSION_2ADD;
4475
4476 if (TARGET_POWER10
4477 && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_2STORE) == 0)
4478 rs6000_isa_flags |= OPTION_MASK_P10_FUSION_2STORE;
4479
4480 /* Turn off vector pair/mma options on non-power10 systems. */
4481 else if (!TARGET_POWER10 && TARGET_MMA)
4482 {
4483 if ((rs6000_isa_flags_explicit & OPTION_MASK_MMA) != 0)
4484 error ("%qs requires %qs", "-mmma", "-mcpu=power10");
4485
4486 rs6000_isa_flags &= ~OPTION_MASK_MMA;
4487 }
4488
4489 if (!TARGET_PCREL && TARGET_PCREL_OPT)
4490 rs6000_isa_flags &= ~OPTION_MASK_PCREL_OPT;
4491
4492 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4493 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
4494
4495 rs6000_always_hint = (rs6000_tune != PROCESSOR_POWER4
4496 && rs6000_tune != PROCESSOR_POWER5
4497 && rs6000_tune != PROCESSOR_POWER6
4498 && rs6000_tune != PROCESSOR_POWER7
4499 && rs6000_tune != PROCESSOR_POWER8
4500 && rs6000_tune != PROCESSOR_POWER9
4501 && rs6000_tune != PROCESSOR_POWER10
4502 && rs6000_tune != PROCESSOR_PPCA2
4503 && rs6000_tune != PROCESSOR_CELL
4504 && rs6000_tune != PROCESSOR_PPC476);
4505 rs6000_sched_groups = (rs6000_tune == PROCESSOR_POWER4
4506 || rs6000_tune == PROCESSOR_POWER5
4507 || rs6000_tune == PROCESSOR_POWER7
4508 || rs6000_tune == PROCESSOR_POWER8);
4509 rs6000_align_branch_targets = (rs6000_tune == PROCESSOR_POWER4
4510 || rs6000_tune == PROCESSOR_POWER5
4511 || rs6000_tune == PROCESSOR_POWER6
4512 || rs6000_tune == PROCESSOR_POWER7
4513 || rs6000_tune == PROCESSOR_POWER8
4514 || rs6000_tune == PROCESSOR_POWER9
4515 || rs6000_tune == PROCESSOR_POWER10
4516 || rs6000_tune == PROCESSOR_PPCE500MC
4517 || rs6000_tune == PROCESSOR_PPCE500MC64
4518 || rs6000_tune == PROCESSOR_PPCE5500
4519 || rs6000_tune == PROCESSOR_PPCE6500);
4520
4521 /* Allow debug switches to override the above settings. These are set to -1
4522 in rs6000.opt to indicate the user hasn't directly set the switch. */
4523 if (TARGET_ALWAYS_HINT >= 0)
4524 rs6000_always_hint = TARGET_ALWAYS_HINT;
4525
4526 if (TARGET_SCHED_GROUPS >= 0)
4527 rs6000_sched_groups = TARGET_SCHED_GROUPS;
4528
4529 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
4530 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
4531
4532 rs6000_sched_restricted_insns_priority
4533 = (rs6000_sched_groups ? 1 : 0);
4534
4535 /* Handle -msched-costly-dep option. */
4536 rs6000_sched_costly_dep
4537 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
4538
4539 if (rs6000_sched_costly_dep_str)
4540 {
4541 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
4542 rs6000_sched_costly_dep = no_dep_costly;
4543 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
4544 rs6000_sched_costly_dep = all_deps_costly;
4545 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
4546 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
4547 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
4548 rs6000_sched_costly_dep = store_to_load_dep_costly;
4549 else
4550 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
4551 atoi (rs6000_sched_costly_dep_str));
4552 }
4553
4554 /* Handle -minsert-sched-nops option. */
4555 rs6000_sched_insert_nops
4556 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
4557
4558 if (rs6000_sched_insert_nops_str)
4559 {
4560 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
4561 rs6000_sched_insert_nops = sched_finish_none;
4562 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
4563 rs6000_sched_insert_nops = sched_finish_pad_groups;
4564 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
4565 rs6000_sched_insert_nops = sched_finish_regroup_exact;
4566 else
4567 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
4568 atoi (rs6000_sched_insert_nops_str));
4569 }
4570
4571 /* Handle stack protector */
4572 if (!OPTION_SET_P (rs6000_stack_protector_guard))
4573 #ifdef TARGET_THREAD_SSP_OFFSET
4574 rs6000_stack_protector_guard = SSP_TLS;
4575 #else
4576 rs6000_stack_protector_guard = SSP_GLOBAL;
4577 #endif
4578
4579 #ifdef TARGET_THREAD_SSP_OFFSET
4580 rs6000_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET;
4581 rs6000_stack_protector_guard_reg = TARGET_64BIT ? 13 : 2;
4582 #endif
4583
4584 if (OPTION_SET_P (rs6000_stack_protector_guard_offset_str))
4585 {
4586 char *endp;
4587 const char *str = rs6000_stack_protector_guard_offset_str;
4588
4589 errno = 0;
4590 long offset = strtol (str, &endp, 0);
4591 if (!*str || *endp || errno)
4592 error ("%qs is not a valid number in %qs", str,
4593 "-mstack-protector-guard-offset=");
4594
4595 if (!IN_RANGE (offset, -0x8000, 0x7fff)
4596 || (TARGET_64BIT && (offset & 3)))
4597 error ("%qs is not a valid offset in %qs", str,
4598 "-mstack-protector-guard-offset=");
4599
4600 rs6000_stack_protector_guard_offset = offset;
4601 }
4602
4603 if (OPTION_SET_P (rs6000_stack_protector_guard_reg_str))
4604 {
4605 const char *str = rs6000_stack_protector_guard_reg_str;
4606 int reg = decode_reg_name (str);
4607
4608 if (!IN_RANGE (reg, 1, 31))
4609 error ("%qs is not a valid base register in %qs", str,
4610 "-mstack-protector-guard-reg=");
4611
4612 rs6000_stack_protector_guard_reg = reg;
4613 }
4614
4615 if (rs6000_stack_protector_guard == SSP_TLS
4616 && !IN_RANGE (rs6000_stack_protector_guard_reg, 1, 31))
4617 error ("%qs needs a valid base register", "-mstack-protector-guard=tls");
4618
4619 if (global_init_p)
4620 {
4621 #ifdef TARGET_REGNAMES
4622 /* If the user desires alternate register names, copy in the
4623 alternate names now. */
4624 if (TARGET_REGNAMES)
4625 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
4626 #endif
4627
4628 /* Set aix_struct_return last, after the ABI is determined.
4629 If -maix-struct-return or -msvr4-struct-return was explicitly
4630 used, don't override with the ABI default. */
4631 if (!OPTION_SET_P (aix_struct_return))
4632 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
4633
4634 #if 0
4635 /* IBM XL compiler defaults to unsigned bitfields. */
4636 if (TARGET_XL_COMPAT)
4637 flag_signed_bitfields = 0;
4638 #endif
4639
4640 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
4641 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
4642
4643 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
4644
4645 /* We can only guarantee the availability of DI pseudo-ops when
4646 assembling for 64-bit targets. */
4647 if (!TARGET_64BIT)
4648 {
4649 targetm.asm_out.aligned_op.di = NULL;
4650 targetm.asm_out.unaligned_op.di = NULL;
4651 }
4652
4653
4654 /* Set branch target alignment, if not optimizing for size. */
4655 if (!optimize_size)
4656 {
4657 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4658 aligned 8byte to avoid misprediction by the branch predictor. */
4659 if (rs6000_tune == PROCESSOR_TITAN
4660 || rs6000_tune == PROCESSOR_CELL)
4661 {
4662 if (flag_align_functions && !str_align_functions)
4663 str_align_functions = "8";
4664 if (flag_align_jumps && !str_align_jumps)
4665 str_align_jumps = "8";
4666 if (flag_align_loops && !str_align_loops)
4667 str_align_loops = "8";
4668 }
4669 if (rs6000_align_branch_targets)
4670 {
4671 if (flag_align_functions && !str_align_functions)
4672 str_align_functions = "16";
4673 if (flag_align_jumps && !str_align_jumps)
4674 str_align_jumps = "16";
4675 if (flag_align_loops && !str_align_loops)
4676 {
4677 can_override_loop_align = 1;
4678 str_align_loops = "16";
4679 }
4680 }
4681 }
4682
4683 /* Arrange to save and restore machine status around nested functions. */
4684 init_machine_status = rs6000_init_machine_status;
4685
4686 /* We should always be splitting complex arguments, but we can't break
4687 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
4688 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
4689 targetm.calls.split_complex_arg = NULL;
4690
4691 /* The AIX and ELFv1 ABIs define standard function descriptors. */
4692 if (DEFAULT_ABI == ABI_AIX)
4693 targetm.calls.custom_function_descriptors = 0;
4694 }
4695
4696 /* Initialize rs6000_cost with the appropriate target costs. */
4697 if (optimize_size)
4698 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
4699 else
4700 switch (rs6000_tune)
4701 {
4702 case PROCESSOR_RS64A:
4703 rs6000_cost = &rs64a_cost;
4704 break;
4705
4706 case PROCESSOR_MPCCORE:
4707 rs6000_cost = &mpccore_cost;
4708 break;
4709
4710 case PROCESSOR_PPC403:
4711 rs6000_cost = &ppc403_cost;
4712 break;
4713
4714 case PROCESSOR_PPC405:
4715 rs6000_cost = &ppc405_cost;
4716 break;
4717
4718 case PROCESSOR_PPC440:
4719 rs6000_cost = &ppc440_cost;
4720 break;
4721
4722 case PROCESSOR_PPC476:
4723 rs6000_cost = &ppc476_cost;
4724 break;
4725
4726 case PROCESSOR_PPC601:
4727 rs6000_cost = &ppc601_cost;
4728 break;
4729
4730 case PROCESSOR_PPC603:
4731 rs6000_cost = &ppc603_cost;
4732 break;
4733
4734 case PROCESSOR_PPC604:
4735 rs6000_cost = &ppc604_cost;
4736 break;
4737
4738 case PROCESSOR_PPC604e:
4739 rs6000_cost = &ppc604e_cost;
4740 break;
4741
4742 case PROCESSOR_PPC620:
4743 rs6000_cost = &ppc620_cost;
4744 break;
4745
4746 case PROCESSOR_PPC630:
4747 rs6000_cost = &ppc630_cost;
4748 break;
4749
4750 case PROCESSOR_CELL:
4751 rs6000_cost = &ppccell_cost;
4752 break;
4753
4754 case PROCESSOR_PPC750:
4755 case PROCESSOR_PPC7400:
4756 rs6000_cost = &ppc750_cost;
4757 break;
4758
4759 case PROCESSOR_PPC7450:
4760 rs6000_cost = &ppc7450_cost;
4761 break;
4762
4763 case PROCESSOR_PPC8540:
4764 case PROCESSOR_PPC8548:
4765 rs6000_cost = &ppc8540_cost;
4766 break;
4767
4768 case PROCESSOR_PPCE300C2:
4769 case PROCESSOR_PPCE300C3:
4770 rs6000_cost = &ppce300c2c3_cost;
4771 break;
4772
4773 case PROCESSOR_PPCE500MC:
4774 rs6000_cost = &ppce500mc_cost;
4775 break;
4776
4777 case PROCESSOR_PPCE500MC64:
4778 rs6000_cost = &ppce500mc64_cost;
4779 break;
4780
4781 case PROCESSOR_PPCE5500:
4782 rs6000_cost = &ppce5500_cost;
4783 break;
4784
4785 case PROCESSOR_PPCE6500:
4786 rs6000_cost = &ppce6500_cost;
4787 break;
4788
4789 case PROCESSOR_TITAN:
4790 rs6000_cost = &titan_cost;
4791 break;
4792
4793 case PROCESSOR_POWER4:
4794 case PROCESSOR_POWER5:
4795 rs6000_cost = &power4_cost;
4796 break;
4797
4798 case PROCESSOR_POWER6:
4799 rs6000_cost = &power6_cost;
4800 break;
4801
4802 case PROCESSOR_POWER7:
4803 rs6000_cost = &power7_cost;
4804 break;
4805
4806 case PROCESSOR_POWER8:
4807 rs6000_cost = &power8_cost;
4808 break;
4809
4810 case PROCESSOR_POWER9:
4811 rs6000_cost = &power9_cost;
4812 break;
4813
4814 case PROCESSOR_POWER10:
4815 rs6000_cost = &power10_cost;
4816 break;
4817
4818 case PROCESSOR_PPCA2:
4819 rs6000_cost = &ppca2_cost;
4820 break;
4821
4822 default:
4823 gcc_unreachable ();
4824 }
4825
4826 if (global_init_p)
4827 {
4828 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4829 param_simultaneous_prefetches,
4830 rs6000_cost->simultaneous_prefetches);
4831 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4832 param_l1_cache_size,
4833 rs6000_cost->l1_cache_size);
4834 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4835 param_l1_cache_line_size,
4836 rs6000_cost->cache_line_size);
4837 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4838 param_l2_cache_size,
4839 rs6000_cost->l2_cache_size);
4840
4841 /* Increase loop peeling limits based on performance analysis. */
4842 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4843 param_max_peeled_insns, 400);
4844 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4845 param_max_completely_peeled_insns, 400);
4846
4847 /* The lxvl/stxvl instructions don't perform well before Power10. */
4848 if (TARGET_POWER10)
4849 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4850 param_vect_partial_vector_usage, 1);
4851 else
4852 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4853 param_vect_partial_vector_usage, 0);
4854
4855 /* Use the 'model' -fsched-pressure algorithm by default. */
4856 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4857 param_sched_pressure_algorithm,
4858 SCHED_PRESSURE_MODEL);
4859
4860 /* If using typedef char *va_list, signal that
4861 __builtin_va_start (&ap, 0) can be optimized to
4862 ap = __builtin_next_arg (0). */
4863 if (DEFAULT_ABI != ABI_V4)
4864 targetm.expand_builtin_va_start = NULL;
4865 }
4866
4867 rs6000_override_options_after_change ();
4868
4869 /* If not explicitly specified via option, decide whether to generate indexed
4870 load/store instructions. A value of -1 indicates that the
4871 initial value of this variable has not been overwritten. During
4872 compilation, TARGET_AVOID_XFORM is either 0 or 1. */
4873 if (TARGET_AVOID_XFORM == -1)
4874 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4875 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4876 need indexed accesses and the type used is the scalar type of the element
4877 being loaded or stored. */
4878 TARGET_AVOID_XFORM = (rs6000_tune == PROCESSOR_POWER6 && TARGET_CMPB
4879 && !TARGET_ALTIVEC);
4880
4881 /* Set the -mrecip options. */
4882 if (rs6000_recip_name)
4883 {
4884 char *p = ASTRDUP (rs6000_recip_name);
4885 char *q;
4886 unsigned int mask, i;
4887 bool invert;
4888
4889 while ((q = strtok (p, ",")) != NULL)
4890 {
4891 p = NULL;
4892 if (*q == '!')
4893 {
4894 invert = true;
4895 q++;
4896 }
4897 else
4898 invert = false;
4899
4900 if (!strcmp (q, "default"))
4901 mask = ((TARGET_RECIP_PRECISION)
4902 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
4903 else
4904 {
4905 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4906 if (!strcmp (q, recip_options[i].string))
4907 {
4908 mask = recip_options[i].mask;
4909 break;
4910 }
4911
4912 if (i == ARRAY_SIZE (recip_options))
4913 {
4914 error ("unknown option for %<%s=%s%>", "-mrecip", q);
4915 invert = false;
4916 mask = 0;
4917 ret = false;
4918 }
4919 }
4920
4921 if (invert)
4922 rs6000_recip_control &= ~mask;
4923 else
4924 rs6000_recip_control |= mask;
4925 }
4926 }
4927
4928 /* Set the builtin mask of the various options used that could affect which
4929 builtins were used. In the past we used target_flags, but we've run out
4930 of bits, and some options are no longer in target_flags. */
4931 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
4932 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
4933 rs6000_print_builtin_options (stderr, 0, "builtin mask",
4934 rs6000_builtin_mask);
4935
4936 /* Initialize all of the registers. */
4937 rs6000_init_hard_regno_mode_ok (global_init_p);
4938
4939 /* Save the initial options in case the user does function specific options */
4940 if (global_init_p)
4941 target_option_default_node = target_option_current_node
4942 = build_target_option_node (&global_options, &global_options_set);
4943
4944 /* If not explicitly specified via option, decide whether to generate the
4945 extra blr's required to preserve the link stack on some cpus (eg, 476). */
4946 if (TARGET_LINK_STACK == -1)
4947 SET_TARGET_LINK_STACK (rs6000_tune == PROCESSOR_PPC476 && flag_pic);
4948
4949 /* Deprecate use of -mno-speculate-indirect-jumps. */
4950 if (!rs6000_speculate_indirect_jumps)
4951 warning (0, "%qs is deprecated and not recommended in any circumstances",
4952 "-mno-speculate-indirect-jumps");
4953
4954 return ret;
4955 }
4956
4957 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
4958 define the target cpu type. */
4959
4960 static void
4961 rs6000_option_override (void)
4962 {
4963 (void) rs6000_option_override_internal (true);
4964 }
4965
4966 \f
4967 /* Implement LOOP_ALIGN. */
4968 align_flags
4969 rs6000_loop_align (rtx label)
4970 {
4971 basic_block bb;
4972 int ninsns;
4973
4974 /* Don't override loop alignment if -falign-loops was specified. */
4975 if (!can_override_loop_align)
4976 return align_loops;
4977
4978 bb = BLOCK_FOR_INSN (label);
4979 ninsns = num_loop_insns(bb->loop_father);
4980
4981 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
4982 if (ninsns > 4 && ninsns <= 8
4983 && (rs6000_tune == PROCESSOR_POWER4
4984 || rs6000_tune == PROCESSOR_POWER5
4985 || rs6000_tune == PROCESSOR_POWER6
4986 || rs6000_tune == PROCESSOR_POWER7
4987 || rs6000_tune == PROCESSOR_POWER8))
4988 return align_flags (5);
4989 else
4990 return align_loops;
4991 }
4992
4993 /* Return true iff, data reference of TYPE can reach vector alignment (16)
4994 after applying N number of iterations. This routine does not determine
4995 how may iterations are required to reach desired alignment. */
4996
4997 static bool
4998 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
4999 {
5000 if (is_packed)
5001 return false;
5002
5003 if (TARGET_32BIT)
5004 {
5005 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
5006 return true;
5007
5008 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
5009 return true;
5010
5011 return false;
5012 }
5013 else
5014 {
5015 if (TARGET_MACHO)
5016 return false;
5017
5018 /* Assuming that all other types are naturally aligned. CHECKME! */
5019 return true;
5020 }
5021 }
5022
5023 /* Return true if the vector misalignment factor is supported by the
5024 target. */
5025 static bool
5026 rs6000_builtin_support_vector_misalignment (machine_mode mode,
5027 const_tree type,
5028 int misalignment,
5029 bool is_packed)
5030 {
5031 if (TARGET_VSX)
5032 {
5033 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5034 return true;
5035
5036 /* Return if movmisalign pattern is not supported for this mode. */
5037 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
5038 return false;
5039
5040 if (misalignment == -1)
5041 {
5042 /* Misalignment factor is unknown at compile time but we know
5043 it's word aligned. */
5044 if (rs6000_vector_alignment_reachable (type, is_packed))
5045 {
5046 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
5047
5048 if (element_size == 64 || element_size == 32)
5049 return true;
5050 }
5051
5052 return false;
5053 }
5054
5055 /* VSX supports word-aligned vector. */
5056 if (misalignment % 4 == 0)
5057 return true;
5058 }
5059 return false;
5060 }
5061
5062 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5063 static int
5064 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
5065 tree vectype, int misalign)
5066 {
5067 unsigned elements;
5068 tree elem_type;
5069
5070 switch (type_of_cost)
5071 {
5072 case scalar_stmt:
5073 case scalar_store:
5074 case vector_stmt:
5075 case vector_store:
5076 case vec_to_scalar:
5077 case scalar_to_vec:
5078 case cond_branch_not_taken:
5079 return 1;
5080 case scalar_load:
5081 case vector_load:
5082 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5083 return 2;
5084
5085 case vec_perm:
5086 /* Power7 has only one permute unit, make it a bit expensive. */
5087 if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7)
5088 return 3;
5089 else
5090 return 1;
5091
5092 case vec_promote_demote:
5093 /* Power7 has only one permute/pack unit, make it a bit expensive. */
5094 if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7)
5095 return 4;
5096 else
5097 return 1;
5098
5099 case cond_branch_taken:
5100 return 3;
5101
5102 case unaligned_load:
5103 case vector_gather_load:
5104 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5105 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5106 return 2;
5107
5108 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5109 {
5110 elements = TYPE_VECTOR_SUBPARTS (vectype);
5111 /* See PR102767, consider V1TI to keep consistency. */
5112 if (elements == 2 || elements == 1)
5113 /* Double word aligned. */
5114 return 4;
5115
5116 if (elements == 4)
5117 {
5118 switch (misalign)
5119 {
5120 case 8:
5121 /* Double word aligned. */
5122 return 4;
5123
5124 case -1:
5125 /* Unknown misalignment. */
5126 case 4:
5127 case 12:
5128 /* Word aligned. */
5129 return 33;
5130
5131 default:
5132 gcc_unreachable ();
5133 }
5134 }
5135 }
5136
5137 if (TARGET_ALTIVEC)
5138 /* Misaligned loads are not supported. */
5139 gcc_unreachable ();
5140
5141 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5142 return 4;
5143
5144 case unaligned_store:
5145 case vector_scatter_store:
5146 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5147 return 1;
5148
5149 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5150 {
5151 elements = TYPE_VECTOR_SUBPARTS (vectype);
5152 /* See PR102767, consider V1TI to keep consistency. */
5153 if (elements == 2 || elements == 1)
5154 /* Double word aligned. */
5155 return 2;
5156
5157 if (elements == 4)
5158 {
5159 switch (misalign)
5160 {
5161 case 8:
5162 /* Double word aligned. */
5163 return 2;
5164
5165 case -1:
5166 /* Unknown misalignment. */
5167 case 4:
5168 case 12:
5169 /* Word aligned. */
5170 return 23;
5171
5172 default:
5173 gcc_unreachable ();
5174 }
5175 }
5176 }
5177
5178 if (TARGET_ALTIVEC)
5179 /* Misaligned stores are not supported. */
5180 gcc_unreachable ();
5181
5182 return 2;
5183
5184 case vec_construct:
5185 /* This is a rough approximation assuming non-constant elements
5186 constructed into a vector via element insertion. FIXME:
5187 vec_construct is not granular enough for uniformly good
5188 decisions. If the initialization is a splat, this is
5189 cheaper than we estimate. Improve this someday. */
5190 elem_type = TREE_TYPE (vectype);
5191 /* 32-bit vectors loaded into registers are stored as double
5192 precision, so we need 2 permutes, 2 converts, and 1 merge
5193 to construct a vector of short floats from them. */
5194 if (SCALAR_FLOAT_TYPE_P (elem_type)
5195 && TYPE_PRECISION (elem_type) == 32)
5196 return 5;
5197 /* On POWER9, integer vector types are built up in GPRs and then
5198 use a direct move (2 cycles). For POWER8 this is even worse,
5199 as we need two direct moves and a merge, and the direct moves
5200 are five cycles. */
5201 else if (INTEGRAL_TYPE_P (elem_type))
5202 {
5203 if (TARGET_P9_VECTOR)
5204 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 2;
5205 else
5206 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 5;
5207 }
5208 else
5209 /* V2DFmode doesn't need a direct move. */
5210 return 2;
5211
5212 default:
5213 gcc_unreachable ();
5214 }
5215 }
5216
5217 /* Implement targetm.vectorize.preferred_simd_mode. */
5218
5219 static machine_mode
5220 rs6000_preferred_simd_mode (scalar_mode mode)
5221 {
5222 opt_machine_mode vmode = mode_for_vector (mode, 16 / GET_MODE_SIZE (mode));
5223
5224 if (vmode.exists () && !VECTOR_MEM_NONE_P (vmode.require ()))
5225 return vmode.require ();
5226
5227 return word_mode;
5228 }
5229
5230 class rs6000_cost_data : public vector_costs
5231 {
5232 public:
5233 using vector_costs::vector_costs;
5234
5235 unsigned int add_stmt_cost (int count, vect_cost_for_stmt kind,
5236 stmt_vec_info stmt_info, tree vectype,
5237 int misalign,
5238 vect_cost_model_location where) override;
5239 void finish_cost (const vector_costs *) override;
5240
5241 protected:
5242 void update_target_cost_per_stmt (vect_cost_for_stmt, stmt_vec_info,
5243 vect_cost_model_location, unsigned int);
5244 void density_test (loop_vec_info);
5245 void adjust_vect_cost_per_loop (loop_vec_info);
5246
5247 /* Total number of vectorized stmts (loop only). */
5248 unsigned m_nstmts = 0;
5249 /* Total number of loads (loop only). */
5250 unsigned m_nloads = 0;
5251 /* Possible extra penalized cost on vector construction (loop only). */
5252 unsigned m_extra_ctor_cost = 0;
5253 /* For each vectorized loop, this var holds TRUE iff a non-memory vector
5254 instruction is needed by the vectorization. */
5255 bool m_vect_nonmem = false;
5256 };
5257
5258 /* Test for likely overcommitment of vector hardware resources. If a
5259 loop iteration is relatively large, and too large a percentage of
5260 instructions in the loop are vectorized, the cost model may not
5261 adequately reflect delays from unavailable vector resources.
5262 Penalize the loop body cost for this case. */
5263
5264 void
5265 rs6000_cost_data::density_test (loop_vec_info loop_vinfo)
5266 {
5267 /* This density test only cares about the cost of vector version of the
5268 loop, so immediately return if we are passed costing for the scalar
5269 version (namely computing single scalar iteration cost). */
5270 if (m_costing_for_scalar)
5271 return;
5272
5273 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5274 basic_block *bbs = get_loop_body (loop);
5275 int nbbs = loop->num_nodes;
5276 int vec_cost = m_costs[vect_body], not_vec_cost = 0;
5277
5278 for (int i = 0; i < nbbs; i++)
5279 {
5280 basic_block bb = bbs[i];
5281 gimple_stmt_iterator gsi;
5282
5283 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5284 {
5285 gimple *stmt = gsi_stmt (gsi);
5286 if (is_gimple_debug (stmt))
5287 continue;
5288
5289 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
5290
5291 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5292 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
5293 not_vec_cost++;
5294 }
5295 }
5296
5297 free (bbs);
5298 int density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
5299
5300 if (density_pct > rs6000_density_pct_threshold
5301 && vec_cost + not_vec_cost > rs6000_density_size_threshold)
5302 {
5303 m_costs[vect_body] = vec_cost * (100 + rs6000_density_penalty) / 100;
5304 if (dump_enabled_p ())
5305 dump_printf_loc (MSG_NOTE, vect_location,
5306 "density %d%%, cost %d exceeds threshold, penalizing "
5307 "loop body cost by %u%%\n", density_pct,
5308 vec_cost + not_vec_cost, rs6000_density_penalty);
5309 }
5310
5311 /* Check whether we need to penalize the body cost to account
5312 for excess strided or elementwise loads. */
5313 if (m_extra_ctor_cost > 0)
5314 {
5315 gcc_assert (m_nloads <= m_nstmts);
5316 unsigned int load_pct = (m_nloads * 100) / m_nstmts;
5317
5318 /* It's likely to be bounded by latency and execution resources
5319 from many scalar loads which are strided or elementwise loads
5320 into a vector if both conditions below are found:
5321 1. there are many loads, it's easy to result in a long wait
5322 for load units;
5323 2. load has a big proportion of all vectorized statements,
5324 it's not easy to schedule other statements to spread among
5325 the loads.
5326 One typical case is the innermost loop of the hotspot of SPEC2017
5327 503.bwaves_r without loop interchange. */
5328 if (m_nloads > (unsigned int) rs6000_density_load_num_threshold
5329 && load_pct > (unsigned int) rs6000_density_load_pct_threshold)
5330 {
5331 m_costs[vect_body] += m_extra_ctor_cost;
5332 if (dump_enabled_p ())
5333 dump_printf_loc (MSG_NOTE, vect_location,
5334 "Found %u loads and "
5335 "load pct. %u%% exceed "
5336 "the threshold, "
5337 "penalizing loop body "
5338 "cost by extra cost %u "
5339 "for ctor.\n",
5340 m_nloads, load_pct,
5341 m_extra_ctor_cost);
5342 }
5343 }
5344 }
5345
5346 /* Implement targetm.vectorize.create_costs. */
5347
5348 static vector_costs *
5349 rs6000_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar)
5350 {
5351 return new rs6000_cost_data (vinfo, costing_for_scalar);
5352 }
5353
5354 /* Adjust vectorization cost after calling rs6000_builtin_vectorization_cost.
5355 For some statement, we would like to further fine-grain tweak the cost on
5356 top of rs6000_builtin_vectorization_cost handling which doesn't have any
5357 information on statement operation codes etc. One typical case here is
5358 COND_EXPR, it takes the same cost to simple FXU instruction when evaluating
5359 for scalar cost, but it should be priced more whatever transformed to either
5360 compare + branch or compare + isel instructions. */
5361
5362 static unsigned
5363 rs6000_adjust_vect_cost_per_stmt (enum vect_cost_for_stmt kind,
5364 struct _stmt_vec_info *stmt_info)
5365 {
5366 if (kind == scalar_stmt && stmt_info && stmt_info->stmt
5367 && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
5368 {
5369 tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
5370 if (subcode == COND_EXPR)
5371 return 2;
5372 }
5373
5374 return 0;
5375 }
5376
5377 /* Helper function for add_stmt_cost. Check each statement cost
5378 entry, gather information and update the target_cost fields
5379 accordingly. */
5380 void
5381 rs6000_cost_data::update_target_cost_per_stmt (vect_cost_for_stmt kind,
5382 stmt_vec_info stmt_info,
5383 vect_cost_model_location where,
5384 unsigned int orig_count)
5385 {
5386
5387 /* Check whether we're doing something other than just a copy loop.
5388 Not all such loops may be profitably vectorized; see
5389 rs6000_finish_cost. */
5390 if (kind == vec_to_scalar
5391 || kind == vec_perm
5392 || kind == vec_promote_demote
5393 || kind == vec_construct
5394 || kind == scalar_to_vec
5395 || (where == vect_body && kind == vector_stmt))
5396 m_vect_nonmem = true;
5397
5398 /* Gather some information when we are costing the vectorized instruction
5399 for the statements located in a loop body. */
5400 if (!m_costing_for_scalar
5401 && is_a<loop_vec_info> (m_vinfo)
5402 && where == vect_body)
5403 {
5404 m_nstmts += orig_count;
5405
5406 if (kind == scalar_load || kind == vector_load
5407 || kind == unaligned_load || kind == vector_gather_load)
5408 m_nloads += orig_count;
5409
5410 /* Power processors do not currently have instructions for strided
5411 and elementwise loads, and instead we must generate multiple
5412 scalar loads. This leads to undercounting of the cost. We
5413 account for this by scaling the construction cost by the number
5414 of elements involved, and saving this as extra cost that we may
5415 or may not need to apply. When finalizing the cost of the loop,
5416 the extra penalty is applied when the load density heuristics
5417 are satisfied. */
5418 if (kind == vec_construct && stmt_info
5419 && STMT_VINFO_TYPE (stmt_info) == load_vec_info_type
5420 && (STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_ELEMENTWISE
5421 || STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_STRIDED_SLP))
5422 {
5423 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5424 unsigned int nunits = vect_nunits_for_cost (vectype);
5425 /* As PR103702 shows, it's possible that vectorizer wants to do
5426 costings for only one unit here, it's no need to do any
5427 penalization for it, so simply early return here. */
5428 if (nunits == 1)
5429 return;
5430 /* i386 port adopts nunits * stmt_cost as the penalized cost
5431 for this kind of penalization, we used to follow it but
5432 found it could result in an unreliable body cost especially
5433 for V16QI/V8HI modes. To make it better, we choose this
5434 new heuristic: for each scalar load, we use 2 as penalized
5435 cost for the case with 2 nunits and use 1 for the other
5436 cases. It's without much supporting theory, mainly
5437 concluded from the broad performance evaluations on Power8,
5438 Power9 and Power10. One possibly related point is that:
5439 vector construction for more units would use more insns,
5440 it has more chances to schedule them better (even run in
5441 parallelly when enough available units at that time), so
5442 it seems reasonable not to penalize that much for them. */
5443 unsigned int adjusted_cost = (nunits == 2) ? 2 : 1;
5444 unsigned int extra_cost = nunits * adjusted_cost;
5445 m_extra_ctor_cost += extra_cost;
5446 }
5447 }
5448 }
5449
5450 unsigned
5451 rs6000_cost_data::add_stmt_cost (int count, vect_cost_for_stmt kind,
5452 stmt_vec_info stmt_info, tree vectype,
5453 int misalign, vect_cost_model_location where)
5454 {
5455 unsigned retval = 0;
5456
5457 if (flag_vect_cost_model)
5458 {
5459 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
5460 misalign);
5461 stmt_cost += rs6000_adjust_vect_cost_per_stmt (kind, stmt_info);
5462 /* Statements in an inner loop relative to the loop being
5463 vectorized are weighted more heavily. The value here is
5464 arbitrary and could potentially be improved with analysis. */
5465 unsigned int orig_count = count;
5466 retval = adjust_cost_for_freq (stmt_info, where, count * stmt_cost);
5467 m_costs[where] += retval;
5468
5469 update_target_cost_per_stmt (kind, stmt_info, where, orig_count);
5470 }
5471
5472 return retval;
5473 }
5474
5475 /* For some target specific vectorization cost which can't be handled per stmt,
5476 we check the requisite conditions and adjust the vectorization cost
5477 accordingly if satisfied. One typical example is to model shift cost for
5478 vector with length by counting number of required lengths under condition
5479 LOOP_VINFO_FULLY_WITH_LENGTH_P. */
5480
5481 void
5482 rs6000_cost_data::adjust_vect_cost_per_loop (loop_vec_info loop_vinfo)
5483 {
5484 if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
5485 {
5486 rgroup_controls *rgc;
5487 unsigned int num_vectors_m1;
5488 unsigned int shift_cnt = 0;
5489 FOR_EACH_VEC_ELT (LOOP_VINFO_LENS (loop_vinfo), num_vectors_m1, rgc)
5490 if (rgc->type)
5491 /* Each length needs one shift to fill into bits 0-7. */
5492 shift_cnt += num_vectors_m1 + 1;
5493
5494 add_stmt_cost (shift_cnt, scalar_stmt, NULL, NULL_TREE, 0, vect_body);
5495 }
5496 }
5497
5498 void
5499 rs6000_cost_data::finish_cost (const vector_costs *scalar_costs)
5500 {
5501 if (loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (m_vinfo))
5502 {
5503 adjust_vect_cost_per_loop (loop_vinfo);
5504 density_test (loop_vinfo);
5505
5506 /* Don't vectorize minimum-vectorization-factor, simple copy loops
5507 that require versioning for any reason. The vectorization is at
5508 best a wash inside the loop, and the versioning checks make
5509 profitability highly unlikely and potentially quite harmful. */
5510 if (!m_vect_nonmem
5511 && LOOP_VINFO_VECT_FACTOR (loop_vinfo) == 2
5512 && LOOP_REQUIRES_VERSIONING (loop_vinfo))
5513 m_costs[vect_body] += 10000;
5514 }
5515
5516 vector_costs::finish_cost (scalar_costs);
5517 }
5518
5519 /* Implement targetm.loop_unroll_adjust. */
5520
5521 static unsigned
5522 rs6000_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
5523 {
5524 if (unroll_only_small_loops)
5525 {
5526 /* TODO: These are hardcoded values right now. We probably should use
5527 a PARAM here. */
5528 if (loop->ninsns <= 6)
5529 return MIN (4, nunroll);
5530 if (loop->ninsns <= 10)
5531 return MIN (2, nunroll);
5532
5533 return 0;
5534 }
5535
5536 return nunroll;
5537 }
5538
5539 /* Returns a function decl for a vectorized version of the builtin function
5540 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5541 if it is not available.
5542
5543 Implement targetm.vectorize.builtin_vectorized_function. */
5544
5545 static tree
5546 rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
5547 tree type_in)
5548 {
5549 machine_mode in_mode, out_mode;
5550 int in_n, out_n;
5551
5552 if (TARGET_DEBUG_BUILTIN)
5553 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5554 combined_fn_name (combined_fn (fn)),
5555 GET_MODE_NAME (TYPE_MODE (type_out)),
5556 GET_MODE_NAME (TYPE_MODE (type_in)));
5557
5558 /* TODO: Should this be gcc_assert? */
5559 if (TREE_CODE (type_out) != VECTOR_TYPE
5560 || TREE_CODE (type_in) != VECTOR_TYPE)
5561 return NULL_TREE;
5562
5563 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5564 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5565 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5566 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5567
5568 switch (fn)
5569 {
5570 CASE_CFN_COPYSIGN:
5571 if (VECTOR_UNIT_VSX_P (V2DFmode)
5572 && out_mode == DFmode && out_n == 2
5573 && in_mode == DFmode && in_n == 2)
5574 return rs6000_builtin_decls[RS6000_BIF_CPSGNDP];
5575 if (VECTOR_UNIT_VSX_P (V4SFmode)
5576 && out_mode == SFmode && out_n == 4
5577 && in_mode == SFmode && in_n == 4)
5578 return rs6000_builtin_decls[RS6000_BIF_CPSGNSP];
5579 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5580 && out_mode == SFmode && out_n == 4
5581 && in_mode == SFmode && in_n == 4)
5582 return rs6000_builtin_decls[RS6000_BIF_COPYSIGN_V4SF];
5583 break;
5584 CASE_CFN_CEIL:
5585 if (VECTOR_UNIT_VSX_P (V2DFmode)
5586 && out_mode == DFmode && out_n == 2
5587 && in_mode == DFmode && in_n == 2)
5588 return rs6000_builtin_decls[RS6000_BIF_XVRDPIP];
5589 if (VECTOR_UNIT_VSX_P (V4SFmode)
5590 && out_mode == SFmode && out_n == 4
5591 && in_mode == SFmode && in_n == 4)
5592 return rs6000_builtin_decls[RS6000_BIF_XVRSPIP];
5593 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5594 && out_mode == SFmode && out_n == 4
5595 && in_mode == SFmode && in_n == 4)
5596 return rs6000_builtin_decls[RS6000_BIF_VRFIP];
5597 break;
5598 CASE_CFN_FLOOR:
5599 if (VECTOR_UNIT_VSX_P (V2DFmode)
5600 && out_mode == DFmode && out_n == 2
5601 && in_mode == DFmode && in_n == 2)
5602 return rs6000_builtin_decls[RS6000_BIF_XVRDPIM];
5603 if (VECTOR_UNIT_VSX_P (V4SFmode)
5604 && out_mode == SFmode && out_n == 4
5605 && in_mode == SFmode && in_n == 4)
5606 return rs6000_builtin_decls[RS6000_BIF_XVRSPIM];
5607 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5608 && out_mode == SFmode && out_n == 4
5609 && in_mode == SFmode && in_n == 4)
5610 return rs6000_builtin_decls[RS6000_BIF_VRFIM];
5611 break;
5612 CASE_CFN_FMA:
5613 if (VECTOR_UNIT_VSX_P (V2DFmode)
5614 && out_mode == DFmode && out_n == 2
5615 && in_mode == DFmode && in_n == 2)
5616 return rs6000_builtin_decls[RS6000_BIF_XVMADDDP];
5617 if (VECTOR_UNIT_VSX_P (V4SFmode)
5618 && out_mode == SFmode && out_n == 4
5619 && in_mode == SFmode && in_n == 4)
5620 return rs6000_builtin_decls[RS6000_BIF_XVMADDSP];
5621 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5622 && out_mode == SFmode && out_n == 4
5623 && in_mode == SFmode && in_n == 4)
5624 return rs6000_builtin_decls[RS6000_BIF_VMADDFP];
5625 break;
5626 CASE_CFN_TRUNC:
5627 if (VECTOR_UNIT_VSX_P (V2DFmode)
5628 && out_mode == DFmode && out_n == 2
5629 && in_mode == DFmode && in_n == 2)
5630 return rs6000_builtin_decls[RS6000_BIF_XVRDPIZ];
5631 if (VECTOR_UNIT_VSX_P (V4SFmode)
5632 && out_mode == SFmode && out_n == 4
5633 && in_mode == SFmode && in_n == 4)
5634 return rs6000_builtin_decls[RS6000_BIF_XVRSPIZ];
5635 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5636 && out_mode == SFmode && out_n == 4
5637 && in_mode == SFmode && in_n == 4)
5638 return rs6000_builtin_decls[RS6000_BIF_VRFIZ];
5639 break;
5640 CASE_CFN_NEARBYINT:
5641 if (VECTOR_UNIT_VSX_P (V2DFmode)
5642 && flag_unsafe_math_optimizations
5643 && out_mode == DFmode && out_n == 2
5644 && in_mode == DFmode && in_n == 2)
5645 return rs6000_builtin_decls[RS6000_BIF_XVRDPI];
5646 if (VECTOR_UNIT_VSX_P (V4SFmode)
5647 && flag_unsafe_math_optimizations
5648 && out_mode == SFmode && out_n == 4
5649 && in_mode == SFmode && in_n == 4)
5650 return rs6000_builtin_decls[RS6000_BIF_XVRSPI];
5651 break;
5652 CASE_CFN_RINT:
5653 if (VECTOR_UNIT_VSX_P (V2DFmode)
5654 && !flag_trapping_math
5655 && out_mode == DFmode && out_n == 2
5656 && in_mode == DFmode && in_n == 2)
5657 return rs6000_builtin_decls[RS6000_BIF_XVRDPIC];
5658 if (VECTOR_UNIT_VSX_P (V4SFmode)
5659 && !flag_trapping_math
5660 && out_mode == SFmode && out_n == 4
5661 && in_mode == SFmode && in_n == 4)
5662 return rs6000_builtin_decls[RS6000_BIF_XVRSPIC];
5663 break;
5664 default:
5665 break;
5666 }
5667
5668 /* Generate calls to libmass if appropriate. */
5669 if (rs6000_veclib_handler)
5670 return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
5671
5672 return NULL_TREE;
5673 }
5674
5675 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5676 library with vectorized intrinsics. */
5677
5678 static tree
5679 rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out,
5680 tree type_in)
5681 {
5682 char name[32];
5683 const char *suffix = NULL;
5684 tree fntype, new_fndecl, bdecl = NULL_TREE;
5685 int n_args = 1;
5686 const char *bname;
5687 machine_mode el_mode, in_mode;
5688 int n, in_n;
5689
5690 /* Libmass is suitable for unsafe math only as it does not correctly support
5691 parts of IEEE with the required precision such as denormals. Only support
5692 it if we have VSX to use the simd d2 or f4 functions.
5693 XXX: Add variable length support. */
5694 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
5695 return NULL_TREE;
5696
5697 el_mode = TYPE_MODE (TREE_TYPE (type_out));
5698 n = TYPE_VECTOR_SUBPARTS (type_out);
5699 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5700 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5701 if (el_mode != in_mode
5702 || n != in_n)
5703 return NULL_TREE;
5704
5705 switch (fn)
5706 {
5707 CASE_CFN_ATAN2:
5708 CASE_CFN_HYPOT:
5709 CASE_CFN_POW:
5710 n_args = 2;
5711 gcc_fallthrough ();
5712
5713 CASE_CFN_ACOS:
5714 CASE_CFN_ACOSH:
5715 CASE_CFN_ASIN:
5716 CASE_CFN_ASINH:
5717 CASE_CFN_ATAN:
5718 CASE_CFN_ATANH:
5719 CASE_CFN_CBRT:
5720 CASE_CFN_COS:
5721 CASE_CFN_COSH:
5722 CASE_CFN_ERF:
5723 CASE_CFN_ERFC:
5724 CASE_CFN_EXP2:
5725 CASE_CFN_EXP:
5726 CASE_CFN_EXPM1:
5727 CASE_CFN_LGAMMA:
5728 CASE_CFN_LOG10:
5729 CASE_CFN_LOG1P:
5730 CASE_CFN_LOG2:
5731 CASE_CFN_LOG:
5732 CASE_CFN_SIN:
5733 CASE_CFN_SINH:
5734 CASE_CFN_SQRT:
5735 CASE_CFN_TAN:
5736 CASE_CFN_TANH:
5737 if (el_mode == DFmode && n == 2)
5738 {
5739 bdecl = mathfn_built_in (double_type_node, fn);
5740 suffix = "d2"; /* pow -> powd2 */
5741 }
5742 else if (el_mode == SFmode && n == 4)
5743 {
5744 bdecl = mathfn_built_in (float_type_node, fn);
5745 suffix = "4"; /* powf -> powf4 */
5746 }
5747 else
5748 return NULL_TREE;
5749 if (!bdecl)
5750 return NULL_TREE;
5751 break;
5752
5753 default:
5754 return NULL_TREE;
5755 }
5756
5757 gcc_assert (suffix != NULL);
5758 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
5759 if (!bname)
5760 return NULL_TREE;
5761
5762 strcpy (name, bname + strlen ("__builtin_"));
5763 strcat (name, suffix);
5764
5765 if (n_args == 1)
5766 fntype = build_function_type_list (type_out, type_in, NULL);
5767 else if (n_args == 2)
5768 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
5769 else
5770 gcc_unreachable ();
5771
5772 /* Build a function declaration for the vectorized function. */
5773 new_fndecl = build_decl (BUILTINS_LOCATION,
5774 FUNCTION_DECL, get_identifier (name), fntype);
5775 TREE_PUBLIC (new_fndecl) = 1;
5776 DECL_EXTERNAL (new_fndecl) = 1;
5777 DECL_IS_NOVOPS (new_fndecl) = 1;
5778 TREE_READONLY (new_fndecl) = 1;
5779
5780 return new_fndecl;
5781 }
5782
5783 \f
5784 /* Default CPU string for rs6000*_file_start functions. */
5785 static const char *rs6000_default_cpu;
5786
5787 #ifdef USING_ELFOS_H
5788 const char *rs6000_machine;
5789
5790 const char *
5791 rs6000_machine_from_flags (void)
5792 {
5793 /* For some CPUs, the machine cannot be determined by ISA flags. We have to
5794 check them first. */
5795 switch (rs6000_cpu)
5796 {
5797 case PROCESSOR_PPC8540:
5798 case PROCESSOR_PPC8548:
5799 return "e500";
5800
5801 case PROCESSOR_PPCE300C2:
5802 case PROCESSOR_PPCE300C3:
5803 return "e300";
5804
5805 case PROCESSOR_PPCE500MC:
5806 return "e500mc";
5807
5808 case PROCESSOR_PPCE500MC64:
5809 return "e500mc64";
5810
5811 case PROCESSOR_PPCE5500:
5812 return "e5500";
5813
5814 case PROCESSOR_PPCE6500:
5815 return "e6500";
5816
5817 default:
5818 break;
5819 }
5820
5821 HOST_WIDE_INT flags = rs6000_isa_flags;
5822
5823 /* Disable the flags that should never influence the .machine selection. */
5824 flags &= ~(OPTION_MASK_PPC_GFXOPT | OPTION_MASK_PPC_GPOPT | OPTION_MASK_ISEL);
5825
5826 if ((flags & (ISA_3_1_MASKS_SERVER & ~ISA_3_0_MASKS_SERVER)) != 0)
5827 return "power10";
5828 if ((flags & (ISA_3_0_MASKS_SERVER & ~ISA_2_7_MASKS_SERVER)) != 0)
5829 return "power9";
5830 if ((flags & (ISA_2_7_MASKS_SERVER & ~ISA_2_6_MASKS_SERVER)) != 0)
5831 return "power8";
5832 if ((flags & (ISA_2_6_MASKS_SERVER & ~ISA_2_5_MASKS_SERVER)) != 0)
5833 return "power7";
5834 if ((flags & (ISA_2_5_MASKS_SERVER & ~ISA_2_4_MASKS)) != 0)
5835 return "power6";
5836 if ((flags & (ISA_2_4_MASKS & ~ISA_2_1_MASKS)) != 0)
5837 return "power5";
5838 if ((flags & ISA_2_1_MASKS) != 0)
5839 return "power4";
5840 if ((flags & OPTION_MASK_POWERPC64) != 0)
5841 return "ppc64";
5842 return "ppc";
5843 }
5844
5845 void
5846 emit_asm_machine (void)
5847 {
5848 fprintf (asm_out_file, "\t.machine %s\n", rs6000_machine);
5849 }
5850 #endif
5851
5852 /* Do anything needed at the start of the asm file. */
5853
5854 static void
5855 rs6000_file_start (void)
5856 {
5857 char buffer[80];
5858 const char *start = buffer;
5859 FILE *file = asm_out_file;
5860
5861 rs6000_default_cpu = TARGET_CPU_DEFAULT;
5862
5863 default_file_start ();
5864
5865 if (flag_verbose_asm)
5866 {
5867 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
5868
5869 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
5870 {
5871 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
5872 start = "";
5873 }
5874
5875 if (OPTION_SET_P (rs6000_cpu_index))
5876 {
5877 fprintf (file, "%s -mcpu=%s", start,
5878 processor_target_table[rs6000_cpu_index].name);
5879 start = "";
5880 }
5881
5882 if (OPTION_SET_P (rs6000_tune_index))
5883 {
5884 fprintf (file, "%s -mtune=%s", start,
5885 processor_target_table[rs6000_tune_index].name);
5886 start = "";
5887 }
5888
5889 if (PPC405_ERRATUM77)
5890 {
5891 fprintf (file, "%s PPC405CR_ERRATUM77", start);
5892 start = "";
5893 }
5894
5895 #ifdef USING_ELFOS_H
5896 switch (rs6000_sdata)
5897 {
5898 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
5899 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
5900 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
5901 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
5902 }
5903
5904 if (rs6000_sdata && g_switch_value)
5905 {
5906 fprintf (file, "%s -G %d", start,
5907 g_switch_value);
5908 start = "";
5909 }
5910 #endif
5911
5912 if (*start == '\0')
5913 putc ('\n', file);
5914 }
5915
5916 #ifdef USING_ELFOS_H
5917 rs6000_machine = rs6000_machine_from_flags ();
5918 emit_asm_machine ();
5919 #endif
5920
5921 if (DEFAULT_ABI == ABI_ELFv2)
5922 fprintf (file, "\t.abiversion 2\n");
5923 }
5924
5925 \f
5926 /* Return nonzero if this function is known to have a null epilogue. */
5927
5928 int
5929 direct_return (void)
5930 {
5931 if (reload_completed)
5932 {
5933 rs6000_stack_t *info = rs6000_stack_info ();
5934
5935 if (info->first_gp_reg_save == 32
5936 && info->first_fp_reg_save == 64
5937 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
5938 && ! info->lr_save_p
5939 && ! info->cr_save_p
5940 && info->vrsave_size == 0
5941 && ! info->push_p)
5942 return 1;
5943 }
5944
5945 return 0;
5946 }
5947
5948 /* Helper for num_insns_constant. Calculate number of instructions to
5949 load VALUE to a single gpr using combinations of addi, addis, ori,
5950 oris, sldi and rldimi instructions. */
5951
5952 static int
5953 num_insns_constant_gpr (HOST_WIDE_INT value)
5954 {
5955 /* signed constant loadable with addi */
5956 if (SIGNED_INTEGER_16BIT_P (value))
5957 return 1;
5958
5959 /* constant loadable with addis */
5960 else if ((value & 0xffff) == 0
5961 && (value >> 31 == -1 || value >> 31 == 0))
5962 return 1;
5963
5964 /* PADDI can support up to 34 bit signed integers. */
5965 else if (TARGET_PREFIXED && SIGNED_INTEGER_34BIT_P (value))
5966 return 1;
5967
5968 else if (TARGET_POWERPC64)
5969 {
5970 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
5971 HOST_WIDE_INT high = value >> 31;
5972
5973 if (high == 0 || high == -1)
5974 return 2;
5975
5976 high >>= 1;
5977
5978 if (low == 0 || low == high)
5979 return num_insns_constant_gpr (high) + 1;
5980 else if (high == 0)
5981 return num_insns_constant_gpr (low) + 1;
5982 else
5983 return (num_insns_constant_gpr (high)
5984 + num_insns_constant_gpr (low) + 1);
5985 }
5986
5987 else
5988 return 2;
5989 }
5990
5991 /* Helper for num_insns_constant. Allow constants formed by the
5992 num_insns_constant_gpr sequences, plus li -1, rldicl/rldicr/rlwinm,
5993 and handle modes that require multiple gprs. */
5994
5995 static int
5996 num_insns_constant_multi (HOST_WIDE_INT value, machine_mode mode)
5997 {
5998 int nregs = (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5999 int total = 0;
6000 while (nregs-- > 0)
6001 {
6002 HOST_WIDE_INT low = sext_hwi (value, BITS_PER_WORD);
6003 int insns = num_insns_constant_gpr (low);
6004 if (insns > 2
6005 /* We won't get more than 2 from num_insns_constant_gpr
6006 except when TARGET_POWERPC64 and mode is DImode or
6007 wider, so the register mode must be DImode. */
6008 && rs6000_is_valid_and_mask (GEN_INT (low), DImode))
6009 insns = 2;
6010 total += insns;
6011 /* If BITS_PER_WORD is the number of bits in HOST_WIDE_INT, doing
6012 it all at once would be UB. */
6013 value >>= (BITS_PER_WORD - 1);
6014 value >>= 1;
6015 }
6016 return total;
6017 }
6018
6019 /* Return the number of instructions it takes to form a constant in as
6020 many gprs are needed for MODE. */
6021
6022 int
6023 num_insns_constant (rtx op, machine_mode mode)
6024 {
6025 HOST_WIDE_INT val;
6026
6027 switch (GET_CODE (op))
6028 {
6029 case CONST_INT:
6030 val = INTVAL (op);
6031 break;
6032
6033 case CONST_WIDE_INT:
6034 {
6035 int insns = 0;
6036 for (int i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
6037 insns += num_insns_constant_multi (CONST_WIDE_INT_ELT (op, i),
6038 DImode);
6039 return insns;
6040 }
6041
6042 case CONST_DOUBLE:
6043 {
6044 const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (op);
6045
6046 if (mode == SFmode || mode == SDmode)
6047 {
6048 long l;
6049
6050 if (mode == SDmode)
6051 REAL_VALUE_TO_TARGET_DECIMAL32 (*rv, l);
6052 else
6053 REAL_VALUE_TO_TARGET_SINGLE (*rv, l);
6054 /* See the first define_split in rs6000.md handling a
6055 const_double_operand. */
6056 val = l;
6057 mode = SImode;
6058 }
6059 else if (mode == DFmode || mode == DDmode)
6060 {
6061 long l[2];
6062
6063 if (mode == DDmode)
6064 REAL_VALUE_TO_TARGET_DECIMAL64 (*rv, l);
6065 else
6066 REAL_VALUE_TO_TARGET_DOUBLE (*rv, l);
6067
6068 /* See the second (32-bit) and third (64-bit) define_split
6069 in rs6000.md handling a const_double_operand. */
6070 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 1] << 32;
6071 val |= l[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffffUL;
6072 mode = DImode;
6073 }
6074 else if (mode == TFmode || mode == TDmode
6075 || mode == KFmode || mode == IFmode)
6076 {
6077 long l[4];
6078 int insns;
6079
6080 if (mode == TDmode)
6081 REAL_VALUE_TO_TARGET_DECIMAL128 (*rv, l);
6082 else
6083 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*rv, l);
6084
6085 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 3] << 32;
6086 val |= l[WORDS_BIG_ENDIAN ? 1 : 2] & 0xffffffffUL;
6087 insns = num_insns_constant_multi (val, DImode);
6088 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 2 : 1] << 32;
6089 val |= l[WORDS_BIG_ENDIAN ? 3 : 0] & 0xffffffffUL;
6090 insns += num_insns_constant_multi (val, DImode);
6091 return insns;
6092 }
6093 else
6094 gcc_unreachable ();
6095 }
6096 break;
6097
6098 default:
6099 gcc_unreachable ();
6100 }
6101
6102 return num_insns_constant_multi (val, mode);
6103 }
6104
6105 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
6106 If the mode of OP is MODE_VECTOR_INT, this simply returns the
6107 corresponding element of the vector, but for V4SFmode, the
6108 corresponding "float" is interpreted as an SImode integer. */
6109
6110 HOST_WIDE_INT
6111 const_vector_elt_as_int (rtx op, unsigned int elt)
6112 {
6113 rtx tmp;
6114
6115 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
6116 gcc_assert (GET_MODE (op) != V2DImode
6117 && GET_MODE (op) != V2DFmode);
6118
6119 tmp = CONST_VECTOR_ELT (op, elt);
6120 if (GET_MODE (op) == V4SFmode)
6121 tmp = gen_lowpart (SImode, tmp);
6122 return INTVAL (tmp);
6123 }
6124
6125 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
6126 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
6127 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
6128 all items are set to the same value and contain COPIES replicas of the
6129 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
6130 operand and the others are set to the value of the operand's msb. */
6131
6132 static bool
6133 vspltis_constant (rtx op, unsigned step, unsigned copies)
6134 {
6135 machine_mode mode = GET_MODE (op);
6136 machine_mode inner = GET_MODE_INNER (mode);
6137
6138 unsigned i;
6139 unsigned nunits;
6140 unsigned bitsize;
6141 unsigned mask;
6142
6143 HOST_WIDE_INT val;
6144 HOST_WIDE_INT splat_val;
6145 HOST_WIDE_INT msb_val;
6146
6147 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
6148 return false;
6149
6150 nunits = GET_MODE_NUNITS (mode);
6151 bitsize = GET_MODE_BITSIZE (inner);
6152 mask = GET_MODE_MASK (inner);
6153
6154 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6155 splat_val = val;
6156 msb_val = val >= 0 ? 0 : -1;
6157
6158 if (val == 0 && step > 1)
6159 {
6160 /* Special case for loading most significant bit with step > 1.
6161 In that case, match 0s in all but step-1s elements, where match
6162 EASY_VECTOR_MSB. */
6163 for (i = 1; i < nunits; ++i)
6164 {
6165 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
6166 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
6167 if ((i & (step - 1)) == step - 1)
6168 {
6169 if (!EASY_VECTOR_MSB (elt_val, inner))
6170 break;
6171 }
6172 else if (elt_val)
6173 break;
6174 }
6175 if (i == nunits)
6176 return true;
6177 }
6178
6179 /* Construct the value to be splatted, if possible. If not, return 0. */
6180 for (i = 2; i <= copies; i *= 2)
6181 {
6182 HOST_WIDE_INT small_val;
6183 bitsize /= 2;
6184 small_val = splat_val >> bitsize;
6185 mask >>= bitsize;
6186 if (splat_val != ((HOST_WIDE_INT)
6187 ((unsigned HOST_WIDE_INT) small_val << bitsize)
6188 | (small_val & mask)))
6189 return false;
6190 splat_val = small_val;
6191 inner = smallest_int_mode_for_size (bitsize);
6192 }
6193
6194 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
6195 if (EASY_VECTOR_15 (splat_val))
6196 ;
6197
6198 /* Also check if we can splat, and then add the result to itself. Do so if
6199 the value is positive, of if the splat instruction is using OP's mode;
6200 for splat_val < 0, the splat and the add should use the same mode. */
6201 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
6202 && (splat_val >= 0 || (step == 1 && copies == 1)))
6203 ;
6204
6205 /* Also check if are loading up the most significant bit which can be done by
6206 loading up -1 and shifting the value left by -1. Only do this for
6207 step 1 here, for larger steps it is done earlier. */
6208 else if (EASY_VECTOR_MSB (splat_val, inner) && step == 1)
6209 ;
6210
6211 else
6212 return false;
6213
6214 /* Check if VAL is present in every STEP-th element, and the
6215 other elements are filled with its most significant bit. */
6216 for (i = 1; i < nunits; ++i)
6217 {
6218 HOST_WIDE_INT desired_val;
6219 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
6220 if ((i & (step - 1)) == 0)
6221 desired_val = val;
6222 else
6223 desired_val = msb_val;
6224
6225 if (desired_val != const_vector_elt_as_int (op, elt))
6226 return false;
6227 }
6228
6229 return true;
6230 }
6231
6232 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
6233 instruction, filling in the bottom elements with 0 or -1.
6234
6235 Return 0 if the constant cannot be generated with VSLDOI. Return positive
6236 for the number of zeroes to shift in, or negative for the number of 0xff
6237 bytes to shift in.
6238
6239 OP is a CONST_VECTOR. */
6240
6241 int
6242 vspltis_shifted (rtx op)
6243 {
6244 machine_mode mode = GET_MODE (op);
6245 machine_mode inner = GET_MODE_INNER (mode);
6246
6247 unsigned i, j;
6248 unsigned nunits;
6249 unsigned mask;
6250
6251 HOST_WIDE_INT val;
6252
6253 if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
6254 return false;
6255
6256 /* We need to create pseudo registers to do the shift, so don't recognize
6257 shift vector constants after reload. */
6258 if (!can_create_pseudo_p ())
6259 return false;
6260
6261 nunits = GET_MODE_NUNITS (mode);
6262 mask = GET_MODE_MASK (inner);
6263
6264 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
6265
6266 /* Check if the value can really be the operand of a vspltis[bhw]. */
6267 if (EASY_VECTOR_15 (val))
6268 ;
6269
6270 /* Also check if we are loading up the most significant bit which can be done
6271 by loading up -1 and shifting the value left by -1. */
6272 else if (EASY_VECTOR_MSB (val, inner))
6273 ;
6274
6275 else
6276 return 0;
6277
6278 /* Check if VAL is present in every STEP-th element until we find elements
6279 that are 0 or all 1 bits. */
6280 for (i = 1; i < nunits; ++i)
6281 {
6282 unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
6283 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
6284
6285 /* If the value isn't the splat value, check for the remaining elements
6286 being 0/-1. */
6287 if (val != elt_val)
6288 {
6289 if (elt_val == 0)
6290 {
6291 for (j = i+1; j < nunits; ++j)
6292 {
6293 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6294 if (const_vector_elt_as_int (op, elt2) != 0)
6295 return 0;
6296 }
6297
6298 return (nunits - i) * GET_MODE_SIZE (inner);
6299 }
6300
6301 else if ((elt_val & mask) == mask)
6302 {
6303 for (j = i+1; j < nunits; ++j)
6304 {
6305 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6306 if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
6307 return 0;
6308 }
6309
6310 return -((nunits - i) * GET_MODE_SIZE (inner));
6311 }
6312
6313 else
6314 return 0;
6315 }
6316 }
6317
6318 /* If all elements are equal, we don't need to do VSLDOI. */
6319 return 0;
6320 }
6321
6322
6323 /* Return non-zero (element mode byte size) if OP is of the given MODE
6324 and can be synthesized with a vspltisb, vspltish or vspltisw. */
6325
6326 int
6327 easy_altivec_constant (rtx op, machine_mode mode)
6328 {
6329 unsigned step, copies;
6330
6331 if (mode == VOIDmode)
6332 mode = GET_MODE (op);
6333 else if (mode != GET_MODE (op))
6334 return 0;
6335
6336 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
6337 constants. */
6338 if (mode == V2DFmode)
6339 return zero_constant (op, mode) ? 8 : 0;
6340
6341 else if (mode == V2DImode)
6342 {
6343 if (!CONST_INT_P (CONST_VECTOR_ELT (op, 0))
6344 || !CONST_INT_P (CONST_VECTOR_ELT (op, 1)))
6345 return 0;
6346
6347 if (zero_constant (op, mode))
6348 return 8;
6349
6350 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
6351 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
6352 return 8;
6353
6354 return 0;
6355 }
6356
6357 /* V1TImode is a special container for TImode. Ignore for now. */
6358 else if (mode == V1TImode)
6359 return 0;
6360
6361 /* Start with a vspltisw. */
6362 step = GET_MODE_NUNITS (mode) / 4;
6363 copies = 1;
6364
6365 if (vspltis_constant (op, step, copies))
6366 return 4;
6367
6368 /* Then try with a vspltish. */
6369 if (step == 1)
6370 copies <<= 1;
6371 else
6372 step >>= 1;
6373
6374 if (vspltis_constant (op, step, copies))
6375 return 2;
6376
6377 /* And finally a vspltisb. */
6378 if (step == 1)
6379 copies <<= 1;
6380 else
6381 step >>= 1;
6382
6383 if (vspltis_constant (op, step, copies))
6384 return 1;
6385
6386 if (vspltis_shifted (op) != 0)
6387 return GET_MODE_SIZE (GET_MODE_INNER (mode));
6388
6389 return 0;
6390 }
6391
6392 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6393 result is OP. Abort if it is not possible. */
6394
6395 rtx
6396 gen_easy_altivec_constant (rtx op)
6397 {
6398 machine_mode mode = GET_MODE (op);
6399 int nunits = GET_MODE_NUNITS (mode);
6400 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6401 unsigned step = nunits / 4;
6402 unsigned copies = 1;
6403
6404 /* Start with a vspltisw. */
6405 if (vspltis_constant (op, step, copies))
6406 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
6407
6408 /* Then try with a vspltish. */
6409 if (step == 1)
6410 copies <<= 1;
6411 else
6412 step >>= 1;
6413
6414 if (vspltis_constant (op, step, copies))
6415 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
6416
6417 /* And finally a vspltisb. */
6418 if (step == 1)
6419 copies <<= 1;
6420 else
6421 step >>= 1;
6422
6423 if (vspltis_constant (op, step, copies))
6424 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
6425
6426 gcc_unreachable ();
6427 }
6428
6429 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6430 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6431
6432 Return the number of instructions needed (1 or 2) into the address pointed
6433 via NUM_INSNS_PTR.
6434
6435 Return the constant that is being split via CONSTANT_PTR. */
6436
6437 bool
6438 xxspltib_constant_p (rtx op,
6439 machine_mode mode,
6440 int *num_insns_ptr,
6441 int *constant_ptr)
6442 {
6443 size_t nunits = GET_MODE_NUNITS (mode);
6444 size_t i;
6445 HOST_WIDE_INT value;
6446 rtx element;
6447
6448 /* Set the returned values to out of bound values. */
6449 *num_insns_ptr = -1;
6450 *constant_ptr = 256;
6451
6452 if (!TARGET_P9_VECTOR)
6453 return false;
6454
6455 if (mode == VOIDmode)
6456 mode = GET_MODE (op);
6457
6458 else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode)
6459 return false;
6460
6461 /* Handle (vec_duplicate <constant>). */
6462 if (GET_CODE (op) == VEC_DUPLICATE)
6463 {
6464 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6465 && mode != V2DImode)
6466 return false;
6467
6468 element = XEXP (op, 0);
6469 if (!CONST_INT_P (element))
6470 return false;
6471
6472 value = INTVAL (element);
6473 if (!IN_RANGE (value, -128, 127))
6474 return false;
6475 }
6476
6477 /* Handle (const_vector [...]). */
6478 else if (GET_CODE (op) == CONST_VECTOR)
6479 {
6480 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6481 && mode != V2DImode)
6482 return false;
6483
6484 element = CONST_VECTOR_ELT (op, 0);
6485 if (!CONST_INT_P (element))
6486 return false;
6487
6488 value = INTVAL (element);
6489 if (!IN_RANGE (value, -128, 127))
6490 return false;
6491
6492 for (i = 1; i < nunits; i++)
6493 {
6494 element = CONST_VECTOR_ELT (op, i);
6495 if (!CONST_INT_P (element))
6496 return false;
6497
6498 if (value != INTVAL (element))
6499 return false;
6500 }
6501 }
6502
6503 /* Handle integer constants being loaded into the upper part of the VSX
6504 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6505 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6506 else if (CONST_INT_P (op))
6507 {
6508 if (!SCALAR_INT_MODE_P (mode))
6509 return false;
6510
6511 value = INTVAL (op);
6512 if (!IN_RANGE (value, -128, 127))
6513 return false;
6514
6515 if (!IN_RANGE (value, -1, 0))
6516 {
6517 if (!(reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID))
6518 return false;
6519
6520 if (EASY_VECTOR_15 (value))
6521 return false;
6522 }
6523 }
6524
6525 else
6526 return false;
6527
6528 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6529 sign extend. Special case 0/-1 to allow getting any VSX register instead
6530 of an Altivec register. */
6531 if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0)
6532 && EASY_VECTOR_15 (value))
6533 return false;
6534
6535 /* Return # of instructions and the constant byte for XXSPLTIB. */
6536 if (mode == V16QImode)
6537 *num_insns_ptr = 1;
6538
6539 else if (IN_RANGE (value, -1, 0))
6540 *num_insns_ptr = 1;
6541
6542 /* Do not generate XXSPLTIB and a sign extend operation if we can generate a
6543 single XXSPLTIW or XXSPLTIDP instruction. */
6544 else if (vsx_prefixed_constant (op, mode))
6545 return false;
6546
6547 /* Return XXSPLITB followed by a sign extend operation to convert the
6548 constant to V8HImode or V4SImode. */
6549 else
6550 *num_insns_ptr = 2;
6551
6552 *constant_ptr = (int) value;
6553 return true;
6554 }
6555
6556 const char *
6557 output_vec_const_move (rtx *operands)
6558 {
6559 int shift;
6560 machine_mode mode;
6561 rtx dest, vec;
6562
6563 dest = operands[0];
6564 vec = operands[1];
6565 mode = GET_MODE (dest);
6566
6567 if (TARGET_VSX)
6568 {
6569 bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest));
6570 int xxspltib_value = 256;
6571 int num_insns = -1;
6572
6573 if (zero_constant (vec, mode))
6574 {
6575 if (TARGET_P9_VECTOR)
6576 return "xxspltib %x0,0";
6577
6578 else if (dest_vmx_p)
6579 return "vspltisw %0,0";
6580
6581 else
6582 return "xxlxor %x0,%x0,%x0";
6583 }
6584
6585 if (all_ones_constant (vec, mode))
6586 {
6587 if (TARGET_P9_VECTOR)
6588 return "xxspltib %x0,255";
6589
6590 else if (dest_vmx_p)
6591 return "vspltisw %0,-1";
6592
6593 else if (TARGET_P8_VECTOR)
6594 return "xxlorc %x0,%x0,%x0";
6595
6596 else
6597 gcc_unreachable ();
6598 }
6599
6600 vec_const_128bit_type vsx_const;
6601 if (TARGET_POWER10 && vec_const_128bit_to_bytes (vec, mode, &vsx_const))
6602 {
6603 unsigned imm = constant_generates_lxvkq (&vsx_const);
6604 if (imm)
6605 {
6606 operands[2] = GEN_INT (imm);
6607 return "lxvkq %x0,%2";
6608 }
6609
6610 imm = constant_generates_xxspltiw (&vsx_const);
6611 if (imm)
6612 {
6613 operands[2] = GEN_INT (imm);
6614 return "xxspltiw %x0,%2";
6615 }
6616
6617 imm = constant_generates_xxspltidp (&vsx_const);
6618 if (imm)
6619 {
6620 operands[2] = GEN_INT (imm);
6621 return "xxspltidp %x0,%2";
6622 }
6623 }
6624
6625 if (TARGET_P9_VECTOR
6626 && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
6627 {
6628 if (num_insns == 1)
6629 {
6630 operands[2] = GEN_INT (xxspltib_value & 0xff);
6631 return "xxspltib %x0,%2";
6632 }
6633
6634 return "#";
6635 }
6636 }
6637
6638 if (TARGET_ALTIVEC)
6639 {
6640 rtx splat_vec;
6641
6642 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest)));
6643 if (zero_constant (vec, mode))
6644 return "vspltisw %0,0";
6645
6646 if (all_ones_constant (vec, mode))
6647 return "vspltisw %0,-1";
6648
6649 /* Do we need to construct a value using VSLDOI? */
6650 shift = vspltis_shifted (vec);
6651 if (shift != 0)
6652 return "#";
6653
6654 splat_vec = gen_easy_altivec_constant (vec);
6655 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
6656 operands[1] = XEXP (splat_vec, 0);
6657 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
6658 return "#";
6659
6660 switch (GET_MODE (splat_vec))
6661 {
6662 case E_V4SImode:
6663 return "vspltisw %0,%1";
6664
6665 case E_V8HImode:
6666 return "vspltish %0,%1";
6667
6668 case E_V16QImode:
6669 return "vspltisb %0,%1";
6670
6671 default:
6672 gcc_unreachable ();
6673 }
6674 }
6675
6676 gcc_unreachable ();
6677 }
6678
6679 /* Initialize vector TARGET to VALS. */
6680
6681 void
6682 rs6000_expand_vector_init (rtx target, rtx vals)
6683 {
6684 machine_mode mode = GET_MODE (target);
6685 machine_mode inner_mode = GET_MODE_INNER (mode);
6686 unsigned int n_elts = GET_MODE_NUNITS (mode);
6687 int n_var = 0, one_var = -1;
6688 bool all_same = true, all_const_zero = true;
6689 rtx x, mem;
6690 unsigned int i;
6691
6692 for (i = 0; i < n_elts; ++i)
6693 {
6694 x = XVECEXP (vals, 0, i);
6695 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6696 ++n_var, one_var = i;
6697 else if (x != CONST0_RTX (inner_mode))
6698 all_const_zero = false;
6699
6700 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6701 all_same = false;
6702 }
6703
6704 if (n_var == 0)
6705 {
6706 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6707 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
6708 if ((int_vector_p || TARGET_VSX) && all_const_zero)
6709 {
6710 /* Zero register. */
6711 emit_move_insn (target, CONST0_RTX (mode));
6712 return;
6713 }
6714 else if (int_vector_p && easy_vector_constant (const_vec, mode))
6715 {
6716 /* Splat immediate. */
6717 emit_insn (gen_rtx_SET (target, const_vec));
6718 return;
6719 }
6720 else
6721 {
6722 /* Load from constant pool. */
6723 emit_move_insn (target, const_vec);
6724 return;
6725 }
6726 }
6727
6728 /* Double word values on VSX can use xxpermdi or lxvdsx. */
6729 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
6730 {
6731 rtx op[2];
6732 size_t i;
6733 size_t num_elements = all_same ? 1 : 2;
6734 for (i = 0; i < num_elements; i++)
6735 {
6736 op[i] = XVECEXP (vals, 0, i);
6737 /* Just in case there is a SUBREG with a smaller mode, do a
6738 conversion. */
6739 if (GET_MODE (op[i]) != inner_mode)
6740 {
6741 rtx tmp = gen_reg_rtx (inner_mode);
6742 convert_move (tmp, op[i], 0);
6743 op[i] = tmp;
6744 }
6745 /* Allow load with splat double word. */
6746 else if (MEM_P (op[i]))
6747 {
6748 if (!all_same)
6749 op[i] = force_reg (inner_mode, op[i]);
6750 }
6751 else if (!REG_P (op[i]))
6752 op[i] = force_reg (inner_mode, op[i]);
6753 }
6754
6755 if (all_same)
6756 {
6757 if (mode == V2DFmode)
6758 emit_insn (gen_vsx_splat_v2df (target, op[0]));
6759 else
6760 emit_insn (gen_vsx_splat_v2di (target, op[0]));
6761 }
6762 else
6763 {
6764 if (mode == V2DFmode)
6765 emit_insn (gen_vsx_concat_v2df (target, op[0], op[1]));
6766 else
6767 emit_insn (gen_vsx_concat_v2di (target, op[0], op[1]));
6768 }
6769 return;
6770 }
6771
6772 /* Special case initializing vector int if we are on 64-bit systems with
6773 direct move or we have the ISA 3.0 instructions. */
6774 if (mode == V4SImode && VECTOR_MEM_VSX_P (V4SImode)
6775 && TARGET_DIRECT_MOVE_64BIT)
6776 {
6777 if (all_same)
6778 {
6779 rtx element0 = XVECEXP (vals, 0, 0);
6780 if (MEM_P (element0))
6781 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6782 else
6783 element0 = force_reg (SImode, element0);
6784
6785 if (TARGET_P9_VECTOR)
6786 emit_insn (gen_vsx_splat_v4si (target, element0));
6787 else
6788 {
6789 rtx tmp = gen_reg_rtx (DImode);
6790 emit_insn (gen_zero_extendsidi2 (tmp, element0));
6791 emit_insn (gen_vsx_splat_v4si_di (target, tmp));
6792 }
6793 return;
6794 }
6795 else
6796 {
6797 rtx elements[4];
6798 size_t i;
6799
6800 for (i = 0; i < 4; i++)
6801 elements[i] = force_reg (SImode, XVECEXP (vals, 0, i));
6802
6803 emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1],
6804 elements[2], elements[3]));
6805 return;
6806 }
6807 }
6808
6809 /* With single precision floating point on VSX, know that internally single
6810 precision is actually represented as a double, and either make 2 V2DF
6811 vectors, and convert these vectors to single precision, or do one
6812 conversion, and splat the result to the other elements. */
6813 if (mode == V4SFmode && VECTOR_MEM_VSX_P (V4SFmode))
6814 {
6815 if (all_same)
6816 {
6817 rtx element0 = XVECEXP (vals, 0, 0);
6818
6819 if (TARGET_P9_VECTOR)
6820 {
6821 if (MEM_P (element0))
6822 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6823
6824 emit_insn (gen_vsx_splat_v4sf (target, element0));
6825 }
6826
6827 else
6828 {
6829 rtx freg = gen_reg_rtx (V4SFmode);
6830 rtx sreg = force_reg (SFmode, element0);
6831 rtx cvt = (TARGET_XSCVDPSPN
6832 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
6833 : gen_vsx_xscvdpsp_scalar (freg, sreg));
6834
6835 emit_insn (cvt);
6836 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg,
6837 const0_rtx));
6838 }
6839 }
6840 else
6841 {
6842 if (TARGET_P8_VECTOR && TARGET_POWERPC64)
6843 {
6844 rtx tmp_sf[4];
6845 rtx tmp_si[4];
6846 rtx tmp_di[4];
6847 rtx mrg_di[4];
6848 for (i = 0; i < 4; i++)
6849 {
6850 tmp_si[i] = gen_reg_rtx (SImode);
6851 tmp_di[i] = gen_reg_rtx (DImode);
6852 mrg_di[i] = gen_reg_rtx (DImode);
6853 tmp_sf[i] = force_reg (SFmode, XVECEXP (vals, 0, i));
6854 emit_insn (gen_movsi_from_sf (tmp_si[i], tmp_sf[i]));
6855 emit_insn (gen_zero_extendsidi2 (tmp_di[i], tmp_si[i]));
6856 }
6857
6858 if (!BYTES_BIG_ENDIAN)
6859 {
6860 std::swap (tmp_di[0], tmp_di[1]);
6861 std::swap (tmp_di[2], tmp_di[3]);
6862 }
6863
6864 emit_insn (gen_ashldi3 (mrg_di[0], tmp_di[0], GEN_INT (32)));
6865 emit_insn (gen_iordi3 (mrg_di[1], mrg_di[0], tmp_di[1]));
6866 emit_insn (gen_ashldi3 (mrg_di[2], tmp_di[2], GEN_INT (32)));
6867 emit_insn (gen_iordi3 (mrg_di[3], mrg_di[2], tmp_di[3]));
6868
6869 rtx tmp_v2di = gen_reg_rtx (V2DImode);
6870 emit_insn (gen_vsx_concat_v2di (tmp_v2di, mrg_di[1], mrg_di[3]));
6871 emit_move_insn (target, gen_lowpart (V4SFmode, tmp_v2di));
6872 }
6873 else
6874 {
6875 rtx dbl_even = gen_reg_rtx (V2DFmode);
6876 rtx dbl_odd = gen_reg_rtx (V2DFmode);
6877 rtx flt_even = gen_reg_rtx (V4SFmode);
6878 rtx flt_odd = gen_reg_rtx (V4SFmode);
6879 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
6880 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
6881 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
6882 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
6883
6884 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
6885 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
6886 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6887 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6888 rs6000_expand_extract_even (target, flt_even, flt_odd);
6889 }
6890 }
6891 return;
6892 }
6893
6894 /* Special case initializing vector short/char that are splats if we are on
6895 64-bit systems with direct move. */
6896 if (all_same && TARGET_DIRECT_MOVE_64BIT
6897 && (mode == V16QImode || mode == V8HImode))
6898 {
6899 rtx op0 = XVECEXP (vals, 0, 0);
6900 rtx di_tmp = gen_reg_rtx (DImode);
6901
6902 if (!REG_P (op0))
6903 op0 = force_reg (GET_MODE_INNER (mode), op0);
6904
6905 if (mode == V16QImode)
6906 {
6907 emit_insn (gen_zero_extendqidi2 (di_tmp, op0));
6908 emit_insn (gen_vsx_vspltb_di (target, di_tmp));
6909 return;
6910 }
6911
6912 if (mode == V8HImode)
6913 {
6914 emit_insn (gen_zero_extendhidi2 (di_tmp, op0));
6915 emit_insn (gen_vsx_vsplth_di (target, di_tmp));
6916 return;
6917 }
6918 }
6919
6920 /* Store value to stack temp. Load vector element. Splat. However, splat
6921 of 64-bit items is not supported on Altivec. */
6922 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
6923 {
6924 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6925 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
6926 XVECEXP (vals, 0, 0));
6927 x = gen_rtx_UNSPEC (VOIDmode,
6928 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6929 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6930 gen_rtvec (2,
6931 gen_rtx_SET (target, mem),
6932 x)));
6933 x = gen_rtx_VEC_SELECT (inner_mode, target,
6934 gen_rtx_PARALLEL (VOIDmode,
6935 gen_rtvec (1, const0_rtx)));
6936 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
6937 return;
6938 }
6939
6940 /* One field is non-constant. Load constant then overwrite
6941 varying field. */
6942 if (n_var == 1)
6943 {
6944 rtx copy = copy_rtx (vals);
6945
6946 /* Load constant part of vector, substitute neighboring value for
6947 varying element. */
6948 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
6949 rs6000_expand_vector_init (target, copy);
6950
6951 /* Insert variable. */
6952 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var),
6953 GEN_INT (one_var));
6954 return;
6955 }
6956
6957 if (TARGET_DIRECT_MOVE && (mode == V16QImode || mode == V8HImode))
6958 {
6959 rtx op[16];
6960 /* Force the values into word_mode registers. */
6961 for (i = 0; i < n_elts; i++)
6962 {
6963 rtx tmp = force_reg (inner_mode, XVECEXP (vals, 0, i));
6964 machine_mode tmode = TARGET_POWERPC64 ? DImode : SImode;
6965 op[i] = simplify_gen_subreg (tmode, tmp, inner_mode, 0);
6966 }
6967
6968 /* Take unsigned char big endianness on 64bit as example for below
6969 construction, the input values are: A, B, C, D, ..., O, P. */
6970
6971 if (TARGET_DIRECT_MOVE_128)
6972 {
6973 /* Move to VSX register with vec_concat, each has 2 values.
6974 eg: vr1[0] = { xxxxxxxA, xxxxxxxB };
6975 vr1[1] = { xxxxxxxC, xxxxxxxD };
6976 ...
6977 vr1[7] = { xxxxxxxO, xxxxxxxP }; */
6978 rtx vr1[8];
6979 for (i = 0; i < n_elts / 2; i++)
6980 {
6981 vr1[i] = gen_reg_rtx (V2DImode);
6982 emit_insn (gen_vsx_concat_v2di (vr1[i], op[i * 2],
6983 op[i * 2 + 1]));
6984 }
6985
6986 /* Pack vectors with 2 values into vectors with 4 values.
6987 eg: vr2[0] = { xxxAxxxB, xxxCxxxD };
6988 vr2[1] = { xxxExxxF, xxxGxxxH };
6989 vr2[1] = { xxxIxxxJ, xxxKxxxL };
6990 vr2[3] = { xxxMxxxN, xxxOxxxP }; */
6991 rtx vr2[4];
6992 for (i = 0; i < n_elts / 4; i++)
6993 {
6994 vr2[i] = gen_reg_rtx (V4SImode);
6995 emit_insn (gen_altivec_vpkudum (vr2[i], vr1[i * 2],
6996 vr1[i * 2 + 1]));
6997 }
6998
6999 /* Pack vectors with 4 values into vectors with 8 values.
7000 eg: vr3[0] = { xAxBxCxD, xExFxGxH };
7001 vr3[1] = { xIxJxKxL, xMxNxOxP }; */
7002 rtx vr3[2];
7003 for (i = 0; i < n_elts / 8; i++)
7004 {
7005 vr3[i] = gen_reg_rtx (V8HImode);
7006 emit_insn (gen_altivec_vpkuwum (vr3[i], vr2[i * 2],
7007 vr2[i * 2 + 1]));
7008 }
7009
7010 /* If it's V8HImode, it's done and return it. */
7011 if (mode == V8HImode)
7012 {
7013 emit_insn (gen_rtx_SET (target, vr3[0]));
7014 return;
7015 }
7016
7017 /* Pack vectors with 8 values into 16 values. */
7018 rtx res = gen_reg_rtx (V16QImode);
7019 emit_insn (gen_altivec_vpkuhum (res, vr3[0], vr3[1]));
7020 emit_insn (gen_rtx_SET (target, res));
7021 }
7022 else
7023 {
7024 rtx (*merge_v16qi) (rtx, rtx, rtx) = NULL;
7025 rtx (*merge_v8hi) (rtx, rtx, rtx) = NULL;
7026 rtx (*merge_v4si) (rtx, rtx, rtx) = NULL;
7027 rtx perm_idx;
7028
7029 /* Set up some common gen routines and values. */
7030 if (BYTES_BIG_ENDIAN)
7031 {
7032 if (mode == V16QImode)
7033 {
7034 merge_v16qi = gen_altivec_vmrghb;
7035 merge_v8hi = gen_altivec_vmrglh;
7036 }
7037 else
7038 merge_v8hi = gen_altivec_vmrghh;
7039
7040 merge_v4si = gen_altivec_vmrglw;
7041 perm_idx = GEN_INT (3);
7042 }
7043 else
7044 {
7045 if (mode == V16QImode)
7046 {
7047 merge_v16qi = gen_altivec_vmrglb;
7048 merge_v8hi = gen_altivec_vmrghh;
7049 }
7050 else
7051 merge_v8hi = gen_altivec_vmrglh;
7052
7053 merge_v4si = gen_altivec_vmrghw;
7054 perm_idx = GEN_INT (0);
7055 }
7056
7057 /* Move to VSX register with direct move.
7058 eg: vr_qi[0] = { xxxxxxxA, xxxxxxxx };
7059 vr_qi[1] = { xxxxxxxB, xxxxxxxx };
7060 ...
7061 vr_qi[15] = { xxxxxxxP, xxxxxxxx }; */
7062 rtx vr_qi[16];
7063 for (i = 0; i < n_elts; i++)
7064 {
7065 vr_qi[i] = gen_reg_rtx (V16QImode);
7066 if (TARGET_POWERPC64)
7067 emit_insn (gen_p8_mtvsrd_v16qidi2 (vr_qi[i], op[i]));
7068 else
7069 emit_insn (gen_p8_mtvsrwz_v16qisi2 (vr_qi[i], op[i]));
7070 }
7071
7072 /* Merge/move to vector short.
7073 eg: vr_hi[0] = { xxxxxxxx, xxxxxxAB };
7074 vr_hi[1] = { xxxxxxxx, xxxxxxCD };
7075 ...
7076 vr_hi[7] = { xxxxxxxx, xxxxxxOP }; */
7077 rtx vr_hi[8];
7078 for (i = 0; i < 8; i++)
7079 {
7080 rtx tmp = vr_qi[i];
7081 if (mode == V16QImode)
7082 {
7083 tmp = gen_reg_rtx (V16QImode);
7084 emit_insn (merge_v16qi (tmp, vr_qi[2 * i], vr_qi[2 * i + 1]));
7085 }
7086 vr_hi[i] = gen_reg_rtx (V8HImode);
7087 emit_move_insn (vr_hi[i], gen_lowpart (V8HImode, tmp));
7088 }
7089
7090 /* Merge vector short to vector int.
7091 eg: vr_si[0] = { xxxxxxxx, xxxxABCD };
7092 vr_si[1] = { xxxxxxxx, xxxxEFGH };
7093 ...
7094 vr_si[3] = { xxxxxxxx, xxxxMNOP }; */
7095 rtx vr_si[4];
7096 for (i = 0; i < 4; i++)
7097 {
7098 rtx tmp = gen_reg_rtx (V8HImode);
7099 emit_insn (merge_v8hi (tmp, vr_hi[2 * i], vr_hi[2 * i + 1]));
7100 vr_si[i] = gen_reg_rtx (V4SImode);
7101 emit_move_insn (vr_si[i], gen_lowpart (V4SImode, tmp));
7102 }
7103
7104 /* Merge vector int to vector long.
7105 eg: vr_di[0] = { xxxxxxxx, ABCDEFGH };
7106 vr_di[1] = { xxxxxxxx, IJKLMNOP }; */
7107 rtx vr_di[2];
7108 for (i = 0; i < 2; i++)
7109 {
7110 rtx tmp = gen_reg_rtx (V4SImode);
7111 emit_insn (merge_v4si (tmp, vr_si[2 * i], vr_si[2 * i + 1]));
7112 vr_di[i] = gen_reg_rtx (V2DImode);
7113 emit_move_insn (vr_di[i], gen_lowpart (V2DImode, tmp));
7114 }
7115
7116 rtx res = gen_reg_rtx (V2DImode);
7117 emit_insn (gen_vsx_xxpermdi_v2di (res, vr_di[0], vr_di[1], perm_idx));
7118 emit_insn (gen_rtx_SET (target, gen_lowpart (mode, res)));
7119 }
7120
7121 return;
7122 }
7123
7124 /* Construct the vector in memory one field at a time
7125 and load the whole vector. */
7126 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7127 for (i = 0; i < n_elts; i++)
7128 emit_move_insn (adjust_address_nv (mem, inner_mode,
7129 i * GET_MODE_SIZE (inner_mode)),
7130 XVECEXP (vals, 0, i));
7131 emit_move_insn (target, mem);
7132 }
7133
7134 /* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX
7135 is variable and also counts by vector element size for p9 and above. */
7136
7137 static void
7138 rs6000_expand_vector_set_var_p9 (rtx target, rtx val, rtx idx)
7139 {
7140 machine_mode mode = GET_MODE (target);
7141
7142 gcc_assert (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (idx));
7143
7144 machine_mode inner_mode = GET_MODE (val);
7145
7146 int width = GET_MODE_SIZE (inner_mode);
7147
7148 gcc_assert (width >= 1 && width <= 8);
7149
7150 int shift = exact_log2 (width);
7151
7152 machine_mode idx_mode = GET_MODE (idx);
7153
7154 machine_mode shift_mode;
7155 rtx (*gen_ashl)(rtx, rtx, rtx);
7156 rtx (*gen_lvsl)(rtx, rtx);
7157 rtx (*gen_lvsr)(rtx, rtx);
7158
7159 if (TARGET_POWERPC64)
7160 {
7161 shift_mode = DImode;
7162 gen_ashl = gen_ashldi3;
7163 gen_lvsl = gen_altivec_lvsl_reg_di;
7164 gen_lvsr = gen_altivec_lvsr_reg_di;
7165 }
7166 else
7167 {
7168 shift_mode = SImode;
7169 gen_ashl = gen_ashlsi3;
7170 gen_lvsl = gen_altivec_lvsl_reg_si;
7171 gen_lvsr = gen_altivec_lvsr_reg_si;
7172 }
7173 /* Generate the IDX for permute shift, width is the vector element size.
7174 idx = idx * width. */
7175 rtx tmp = gen_reg_rtx (shift_mode);
7176 idx = convert_modes (shift_mode, idx_mode, idx, 1);
7177
7178 emit_insn (gen_ashl (tmp, idx, GEN_INT (shift)));
7179
7180 /* lvsr v1,0,idx. */
7181 rtx pcvr = gen_reg_rtx (V16QImode);
7182 emit_insn (gen_lvsr (pcvr, tmp));
7183
7184 /* lvsl v2,0,idx. */
7185 rtx pcvl = gen_reg_rtx (V16QImode);
7186 emit_insn (gen_lvsl (pcvl, tmp));
7187
7188 rtx sub_target = simplify_gen_subreg (V16QImode, target, mode, 0);
7189
7190 rtx permr
7191 = gen_altivec_vperm_v8hiv16qi (sub_target, sub_target, sub_target, pcvr);
7192 emit_insn (permr);
7193
7194 rs6000_expand_vector_set (target, val, const0_rtx);
7195
7196 rtx perml
7197 = gen_altivec_vperm_v8hiv16qi (sub_target, sub_target, sub_target, pcvl);
7198 emit_insn (perml);
7199 }
7200
7201 /* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX
7202 is variable and also counts by vector element size for p7 & p8. */
7203
7204 static void
7205 rs6000_expand_vector_set_var_p7 (rtx target, rtx val, rtx idx)
7206 {
7207 machine_mode mode = GET_MODE (target);
7208
7209 gcc_assert (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (idx));
7210
7211 machine_mode inner_mode = GET_MODE (val);
7212 HOST_WIDE_INT mode_mask = GET_MODE_MASK (inner_mode);
7213
7214 int width = GET_MODE_SIZE (inner_mode);
7215 gcc_assert (width >= 1 && width <= 4);
7216
7217 int shift = exact_log2 (width);
7218
7219 machine_mode idx_mode = GET_MODE (idx);
7220
7221 machine_mode shift_mode;
7222 rtx (*gen_ashl)(rtx, rtx, rtx);
7223 rtx (*gen_add)(rtx, rtx, rtx);
7224 rtx (*gen_sub)(rtx, rtx, rtx);
7225 rtx (*gen_lvsl)(rtx, rtx);
7226
7227 if (TARGET_POWERPC64)
7228 {
7229 shift_mode = DImode;
7230 gen_ashl = gen_ashldi3;
7231 gen_add = gen_adddi3;
7232 gen_sub = gen_subdi3;
7233 gen_lvsl = gen_altivec_lvsl_reg_di;
7234 }
7235 else
7236 {
7237 shift_mode = SImode;
7238 gen_ashl = gen_ashlsi3;
7239 gen_add = gen_addsi3;
7240 gen_sub = gen_subsi3;
7241 gen_lvsl = gen_altivec_lvsl_reg_si;
7242 }
7243
7244 /* idx = idx * width. */
7245 rtx tmp = gen_reg_rtx (shift_mode);
7246 idx = convert_modes (shift_mode, idx_mode, idx, 1);
7247
7248 emit_insn (gen_ashl (tmp, idx, GEN_INT (shift)));
7249
7250 /* For LE: idx = idx + 8. */
7251 if (!BYTES_BIG_ENDIAN)
7252 emit_insn (gen_add (tmp, tmp, GEN_INT (8)));
7253 else
7254 emit_insn (gen_sub (tmp, GEN_INT (24 - width), tmp));
7255
7256 /* lxv vs33, mask.
7257 DImode: 0xffffffffffffffff0000000000000000
7258 SImode: 0x00000000ffffffff0000000000000000
7259 HImode: 0x000000000000ffff0000000000000000.
7260 QImode: 0x00000000000000ff0000000000000000. */
7261 rtx mask = gen_reg_rtx (V16QImode);
7262 rtx mask_v2di = gen_reg_rtx (V2DImode);
7263 rtvec v = rtvec_alloc (2);
7264 if (!BYTES_BIG_ENDIAN)
7265 {
7266 RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, 0);
7267 RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, mode_mask);
7268 }
7269 else
7270 {
7271 RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, mode_mask);
7272 RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, 0);
7273 }
7274 emit_insn (gen_vec_initv2didi (mask_v2di, gen_rtx_PARALLEL (V2DImode, v)));
7275 rtx sub_mask = simplify_gen_subreg (V16QImode, mask_v2di, V2DImode, 0);
7276 emit_insn (gen_rtx_SET (mask, sub_mask));
7277
7278 /* mtvsrd[wz] f0,tmp_val. */
7279 rtx tmp_val = gen_reg_rtx (SImode);
7280 if (inner_mode == E_SFmode)
7281 if (TARGET_DIRECT_MOVE_64BIT)
7282 emit_insn (gen_movsi_from_sf (tmp_val, val));
7283 else
7284 {
7285 rtx stack = rs6000_allocate_stack_temp (SFmode, false, true);
7286 emit_insn (gen_movsf_hardfloat (stack, val));
7287 rtx stack2 = copy_rtx (stack);
7288 PUT_MODE (stack2, SImode);
7289 emit_move_insn (tmp_val, stack2);
7290 }
7291 else
7292 tmp_val = force_reg (SImode, val);
7293
7294 rtx val_v16qi = gen_reg_rtx (V16QImode);
7295 rtx val_v2di = gen_reg_rtx (V2DImode);
7296 rtvec vec_val = rtvec_alloc (2);
7297 if (!BYTES_BIG_ENDIAN)
7298 {
7299 RTVEC_ELT (vec_val, 0) = gen_rtx_CONST_INT (DImode, 0);
7300 RTVEC_ELT (vec_val, 1) = tmp_val;
7301 }
7302 else
7303 {
7304 RTVEC_ELT (vec_val, 0) = tmp_val;
7305 RTVEC_ELT (vec_val, 1) = gen_rtx_CONST_INT (DImode, 0);
7306 }
7307 emit_insn (
7308 gen_vec_initv2didi (val_v2di, gen_rtx_PARALLEL (V2DImode, vec_val)));
7309 rtx sub_val = simplify_gen_subreg (V16QImode, val_v2di, V2DImode, 0);
7310 emit_insn (gen_rtx_SET (val_v16qi, sub_val));
7311
7312 /* lvsl 13,0,idx. */
7313 rtx pcv = gen_reg_rtx (V16QImode);
7314 emit_insn (gen_lvsl (pcv, tmp));
7315
7316 /* vperm 1,1,1,13. */
7317 /* vperm 0,0,0,13. */
7318 rtx val_perm = gen_reg_rtx (V16QImode);
7319 rtx mask_perm = gen_reg_rtx (V16QImode);
7320 emit_insn (gen_altivec_vperm_v8hiv16qi (val_perm, val_v16qi, val_v16qi, pcv));
7321 emit_insn (gen_altivec_vperm_v8hiv16qi (mask_perm, mask, mask, pcv));
7322
7323 rtx target_v16qi = simplify_gen_subreg (V16QImode, target, mode, 0);
7324
7325 /* xxsel 34,34,32,33. */
7326 emit_insn (
7327 gen_vector_select_v16qi (target_v16qi, target_v16qi, val_perm, mask_perm));
7328 }
7329
7330 /* Set field ELT_RTX of TARGET to VAL. */
7331
7332 void
7333 rs6000_expand_vector_set (rtx target, rtx val, rtx elt_rtx)
7334 {
7335 machine_mode mode = GET_MODE (target);
7336 machine_mode inner_mode = GET_MODE_INNER (mode);
7337 rtx reg = gen_reg_rtx (mode);
7338 rtx mask, mem, x;
7339 int width = GET_MODE_SIZE (inner_mode);
7340 int i;
7341
7342 val = force_reg (GET_MODE (val), val);
7343
7344 if (VECTOR_MEM_VSX_P (mode))
7345 {
7346 if (!CONST_INT_P (elt_rtx))
7347 {
7348 /* For V2DI/V2DF, could leverage the P9 version to generate xxpermdi
7349 when elt_rtx is variable. */
7350 if ((TARGET_P9_VECTOR && TARGET_POWERPC64) || width == 8)
7351 {
7352 rs6000_expand_vector_set_var_p9 (target, val, elt_rtx);
7353 return;
7354 }
7355 else if (TARGET_VSX)
7356 {
7357 rs6000_expand_vector_set_var_p7 (target, val, elt_rtx);
7358 return;
7359 }
7360 else
7361 gcc_assert (CONST_INT_P (elt_rtx));
7362 }
7363
7364 rtx insn = NULL_RTX;
7365
7366 if (mode == V2DFmode)
7367 insn = gen_vsx_set_v2df (target, target, val, elt_rtx);
7368
7369 else if (mode == V2DImode)
7370 insn = gen_vsx_set_v2di (target, target, val, elt_rtx);
7371
7372 else if (TARGET_P9_VECTOR && TARGET_POWERPC64)
7373 {
7374 if (mode == V4SImode)
7375 insn = gen_vsx_set_v4si_p9 (target, target, val, elt_rtx);
7376 else if (mode == V8HImode)
7377 insn = gen_vsx_set_v8hi_p9 (target, target, val, elt_rtx);
7378 else if (mode == V16QImode)
7379 insn = gen_vsx_set_v16qi_p9 (target, target, val, elt_rtx);
7380 else if (mode == V4SFmode)
7381 insn = gen_vsx_set_v4sf_p9 (target, target, val, elt_rtx);
7382 }
7383
7384 if (insn)
7385 {
7386 emit_insn (insn);
7387 return;
7388 }
7389 }
7390
7391 /* Simplify setting single element vectors like V1TImode. */
7392 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode)
7393 && INTVAL (elt_rtx) == 0)
7394 {
7395 emit_move_insn (target, gen_lowpart (mode, val));
7396 return;
7397 }
7398
7399 /* Load single variable value. */
7400 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
7401 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
7402 x = gen_rtx_UNSPEC (VOIDmode,
7403 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
7404 emit_insn (gen_rtx_PARALLEL (VOIDmode,
7405 gen_rtvec (2,
7406 gen_rtx_SET (reg, mem),
7407 x)));
7408
7409 /* Linear sequence. */
7410 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
7411 for (i = 0; i < 16; ++i)
7412 XVECEXP (mask, 0, i) = GEN_INT (i);
7413
7414 /* Set permute mask to insert element into target. */
7415 for (i = 0; i < width; ++i)
7416 XVECEXP (mask, 0, INTVAL (elt_rtx) * width + i) = GEN_INT (i + 0x10);
7417 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
7418
7419 if (BYTES_BIG_ENDIAN)
7420 x = gen_rtx_UNSPEC (mode,
7421 gen_rtvec (3, target, reg,
7422 force_reg (V16QImode, x)),
7423 UNSPEC_VPERM);
7424 else
7425 {
7426 if (TARGET_P9_VECTOR)
7427 x = gen_rtx_UNSPEC (mode,
7428 gen_rtvec (3, reg, target,
7429 force_reg (V16QImode, x)),
7430 UNSPEC_VPERMR);
7431 else
7432 {
7433 /* Invert selector. We prefer to generate VNAND on P8 so
7434 that future fusion opportunities can kick in, but must
7435 generate VNOR elsewhere. */
7436 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
7437 rtx iorx = (TARGET_P8_VECTOR
7438 ? gen_rtx_IOR (V16QImode, notx, notx)
7439 : gen_rtx_AND (V16QImode, notx, notx));
7440 rtx tmp = gen_reg_rtx (V16QImode);
7441 emit_insn (gen_rtx_SET (tmp, iorx));
7442
7443 /* Permute with operands reversed and adjusted selector. */
7444 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
7445 UNSPEC_VPERM);
7446 }
7447 }
7448
7449 emit_insn (gen_rtx_SET (target, x));
7450 }
7451
7452 /* Extract field ELT from VEC into TARGET. */
7453
7454 void
7455 rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
7456 {
7457 machine_mode mode = GET_MODE (vec);
7458 machine_mode inner_mode = GET_MODE_INNER (mode);
7459 rtx mem;
7460
7461 if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (elt))
7462 {
7463 switch (mode)
7464 {
7465 default:
7466 break;
7467 case E_V1TImode:
7468 emit_move_insn (target, gen_lowpart (TImode, vec));
7469 break;
7470 case E_V2DFmode:
7471 emit_insn (gen_vsx_extract_v2df (target, vec, elt));
7472 return;
7473 case E_V2DImode:
7474 emit_insn (gen_vsx_extract_v2di (target, vec, elt));
7475 return;
7476 case E_V4SFmode:
7477 emit_insn (gen_vsx_extract_v4sf (target, vec, elt));
7478 return;
7479 case E_V16QImode:
7480 if (TARGET_DIRECT_MOVE_64BIT)
7481 {
7482 emit_insn (gen_vsx_extract_v16qi (target, vec, elt));
7483 return;
7484 }
7485 else
7486 break;
7487 case E_V8HImode:
7488 if (TARGET_DIRECT_MOVE_64BIT)
7489 {
7490 emit_insn (gen_vsx_extract_v8hi (target, vec, elt));
7491 return;
7492 }
7493 else
7494 break;
7495 case E_V4SImode:
7496 if (TARGET_DIRECT_MOVE_64BIT)
7497 {
7498 emit_insn (gen_vsx_extract_v4si (target, vec, elt));
7499 return;
7500 }
7501 break;
7502 }
7503 }
7504 else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt)
7505 && TARGET_DIRECT_MOVE_64BIT)
7506 {
7507 if (GET_MODE (elt) != DImode)
7508 {
7509 rtx tmp = gen_reg_rtx (DImode);
7510 convert_move (tmp, elt, 0);
7511 elt = tmp;
7512 }
7513 else if (!REG_P (elt))
7514 elt = force_reg (DImode, elt);
7515
7516 switch (mode)
7517 {
7518 case E_V1TImode:
7519 emit_move_insn (target, gen_lowpart (TImode, vec));
7520 return;
7521
7522 case E_V2DFmode:
7523 emit_insn (gen_vsx_extract_v2df_var (target, vec, elt));
7524 return;
7525
7526 case E_V2DImode:
7527 emit_insn (gen_vsx_extract_v2di_var (target, vec, elt));
7528 return;
7529
7530 case E_V4SFmode:
7531 emit_insn (gen_vsx_extract_v4sf_var (target, vec, elt));
7532 return;
7533
7534 case E_V4SImode:
7535 emit_insn (gen_vsx_extract_v4si_var (target, vec, elt));
7536 return;
7537
7538 case E_V8HImode:
7539 emit_insn (gen_vsx_extract_v8hi_var (target, vec, elt));
7540 return;
7541
7542 case E_V16QImode:
7543 emit_insn (gen_vsx_extract_v16qi_var (target, vec, elt));
7544 return;
7545
7546 default:
7547 gcc_unreachable ();
7548 }
7549 }
7550
7551 /* Allocate mode-sized buffer. */
7552 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7553
7554 emit_move_insn (mem, vec);
7555 if (CONST_INT_P (elt))
7556 {
7557 int modulo_elt = INTVAL (elt) % GET_MODE_NUNITS (mode);
7558
7559 /* Add offset to field within buffer matching vector element. */
7560 mem = adjust_address_nv (mem, inner_mode,
7561 modulo_elt * GET_MODE_SIZE (inner_mode));
7562 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
7563 }
7564 else
7565 {
7566 unsigned int ele_size = GET_MODE_SIZE (inner_mode);
7567 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1);
7568 rtx new_addr = gen_reg_rtx (Pmode);
7569
7570 elt = gen_rtx_AND (Pmode, elt, num_ele_m1);
7571 if (ele_size > 1)
7572 elt = gen_rtx_MULT (Pmode, elt, GEN_INT (ele_size));
7573 new_addr = gen_rtx_PLUS (Pmode, XEXP (mem, 0), elt);
7574 new_addr = change_address (mem, inner_mode, new_addr);
7575 emit_move_insn (target, new_addr);
7576 }
7577 }
7578
7579 /* Return the offset within a memory object (MEM) of a vector type to a given
7580 element within the vector (ELEMENT) with an element size (SCALAR_SIZE). If
7581 the element is constant, we return a constant integer.
7582
7583 Otherwise, we use a base register temporary to calculate the offset after
7584 masking it to fit within the bounds of the vector and scaling it. The
7585 masking is required by the 64-bit ELF version 2 ABI for the vec_extract
7586 built-in function. */
7587
7588 static rtx
7589 get_vector_offset (rtx mem, rtx element, rtx base_tmp, unsigned scalar_size)
7590 {
7591 if (CONST_INT_P (element))
7592 return GEN_INT (INTVAL (element) * scalar_size);
7593
7594 /* All insns should use the 'Q' constraint (address is a single register) if
7595 the element number is not a constant. */
7596 gcc_assert (satisfies_constraint_Q (mem));
7597
7598 /* Mask the element to make sure the element number is between 0 and the
7599 maximum number of elements - 1 so that we don't generate an address
7600 outside the vector. */
7601 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (GET_MODE (mem)) - 1);
7602 rtx and_op = gen_rtx_AND (Pmode, element, num_ele_m1);
7603 emit_insn (gen_rtx_SET (base_tmp, and_op));
7604
7605 /* Shift the element to get the byte offset from the element number. */
7606 int shift = exact_log2 (scalar_size);
7607 gcc_assert (shift >= 0);
7608
7609 if (shift > 0)
7610 {
7611 rtx shift_op = gen_rtx_ASHIFT (Pmode, base_tmp, GEN_INT (shift));
7612 emit_insn (gen_rtx_SET (base_tmp, shift_op));
7613 }
7614
7615 return base_tmp;
7616 }
7617
7618 /* Helper function update PC-relative addresses when we are adjusting a memory
7619 address (ADDR) to a vector to point to a scalar field within the vector with
7620 a constant offset (ELEMENT_OFFSET). If the address is not valid, we can
7621 use the base register temporary (BASE_TMP) to form the address. */
7622
7623 static rtx
7624 adjust_vec_address_pcrel (rtx addr, rtx element_offset, rtx base_tmp)
7625 {
7626 rtx new_addr = NULL;
7627
7628 gcc_assert (CONST_INT_P (element_offset));
7629
7630 if (GET_CODE (addr) == CONST)
7631 addr = XEXP (addr, 0);
7632
7633 if (GET_CODE (addr) == PLUS)
7634 {
7635 rtx op0 = XEXP (addr, 0);
7636 rtx op1 = XEXP (addr, 1);
7637
7638 if (CONST_INT_P (op1))
7639 {
7640 HOST_WIDE_INT offset
7641 = INTVAL (XEXP (addr, 1)) + INTVAL (element_offset);
7642
7643 if (offset == 0)
7644 new_addr = op0;
7645
7646 else
7647 {
7648 rtx plus = gen_rtx_PLUS (Pmode, op0, GEN_INT (offset));
7649 new_addr = gen_rtx_CONST (Pmode, plus);
7650 }
7651 }
7652
7653 else
7654 {
7655 emit_move_insn (base_tmp, addr);
7656 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7657 }
7658 }
7659
7660 else if (SYMBOL_REF_P (addr) || LABEL_REF_P (addr))
7661 {
7662 rtx plus = gen_rtx_PLUS (Pmode, addr, element_offset);
7663 new_addr = gen_rtx_CONST (Pmode, plus);
7664 }
7665
7666 else
7667 gcc_unreachable ();
7668
7669 return new_addr;
7670 }
7671
7672 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
7673 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
7674 temporary (BASE_TMP) to fixup the address. Return the new memory address
7675 that is valid for reads or writes to a given register (SCALAR_REG).
7676
7677 This function is expected to be called after reload is completed when we are
7678 splitting insns. The temporary BASE_TMP might be set multiple times with
7679 this code. */
7680
7681 rtx
7682 rs6000_adjust_vec_address (rtx scalar_reg,
7683 rtx mem,
7684 rtx element,
7685 rtx base_tmp,
7686 machine_mode scalar_mode)
7687 {
7688 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7689 rtx addr = XEXP (mem, 0);
7690 rtx new_addr;
7691
7692 gcc_assert (!reg_mentioned_p (base_tmp, addr));
7693 gcc_assert (!reg_mentioned_p (base_tmp, element));
7694
7695 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
7696 gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
7697
7698 /* Calculate what we need to add to the address to get the element
7699 address. */
7700 rtx element_offset = get_vector_offset (mem, element, base_tmp, scalar_size);
7701
7702 /* Create the new address pointing to the element within the vector. If we
7703 are adding 0, we don't have to change the address. */
7704 if (element_offset == const0_rtx)
7705 new_addr = addr;
7706
7707 /* A simple indirect address can be converted into a reg + offset
7708 address. */
7709 else if (REG_P (addr) || SUBREG_P (addr))
7710 new_addr = gen_rtx_PLUS (Pmode, addr, element_offset);
7711
7712 /* For references to local static variables, fold a constant offset into the
7713 address. */
7714 else if (pcrel_local_address (addr, Pmode) && CONST_INT_P (element_offset))
7715 new_addr = adjust_vec_address_pcrel (addr, element_offset, base_tmp);
7716
7717 /* Optimize D-FORM addresses with constant offset with a constant element, to
7718 include the element offset in the address directly. */
7719 else if (GET_CODE (addr) == PLUS)
7720 {
7721 rtx op0 = XEXP (addr, 0);
7722 rtx op1 = XEXP (addr, 1);
7723
7724 gcc_assert (REG_P (op0) || SUBREG_P (op0));
7725 if (CONST_INT_P (op1) && CONST_INT_P (element_offset))
7726 {
7727 /* op0 should never be r0, because r0+offset is not valid. But it
7728 doesn't hurt to make sure it is not r0. */
7729 gcc_assert (reg_or_subregno (op0) != 0);
7730
7731 /* D-FORM address with constant element number. */
7732 HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset);
7733 rtx offset_rtx = GEN_INT (offset);
7734 new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
7735 }
7736 else
7737 {
7738 /* If we don't have a D-FORM address with a constant element number,
7739 add the two elements in the current address. Then add the offset.
7740
7741 Previously, we tried to add the offset to OP1 and change the
7742 address to an X-FORM format adding OP0 and BASE_TMP, but it became
7743 complicated because we had to verify that op1 was not GPR0 and we
7744 had a constant element offset (due to the way ADDI is defined).
7745 By doing the add of OP0 and OP1 first, and then adding in the
7746 offset, it has the benefit that if D-FORM instructions are
7747 allowed, the offset is part of the memory access to the vector
7748 element. */
7749 emit_insn (gen_rtx_SET (base_tmp, gen_rtx_PLUS (Pmode, op0, op1)));
7750 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7751 }
7752 }
7753
7754 else
7755 {
7756 emit_move_insn (base_tmp, addr);
7757 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7758 }
7759
7760 /* If the address isn't valid, move the address into the temporary base
7761 register. Some reasons it could not be valid include:
7762
7763 The address offset overflowed the 16 or 34 bit offset size;
7764 We need to use a DS-FORM load, and the bottom 2 bits are non-zero;
7765 We need to use a DQ-FORM load, and the bottom 4 bits are non-zero;
7766 Only X_FORM loads can be done, and the address is D_FORM. */
7767
7768 enum insn_form iform
7769 = address_to_insn_form (new_addr, scalar_mode,
7770 reg_to_non_prefixed (scalar_reg, scalar_mode));
7771
7772 if (iform == INSN_FORM_BAD)
7773 {
7774 emit_move_insn (base_tmp, new_addr);
7775 new_addr = base_tmp;
7776 }
7777
7778 return change_address (mem, scalar_mode, new_addr);
7779 }
7780
7781 /* Split a variable vec_extract operation into the component instructions. */
7782
7783 void
7784 rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
7785 rtx tmp_altivec)
7786 {
7787 machine_mode mode = GET_MODE (src);
7788 machine_mode scalar_mode = GET_MODE_INNER (GET_MODE (src));
7789 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7790 int byte_shift = exact_log2 (scalar_size);
7791
7792 gcc_assert (byte_shift >= 0);
7793
7794 /* If we are given a memory address, optimize to load just the element. We
7795 don't have to adjust the vector element number on little endian
7796 systems. */
7797 if (MEM_P (src))
7798 {
7799 emit_move_insn (dest,
7800 rs6000_adjust_vec_address (dest, src, element, tmp_gpr,
7801 scalar_mode));
7802 return;
7803 }
7804
7805 else if (REG_P (src) || SUBREG_P (src))
7806 {
7807 int num_elements = GET_MODE_NUNITS (mode);
7808 int bits_in_element = mode_to_bits (GET_MODE_INNER (mode));
7809 int bit_shift = 7 - exact_log2 (num_elements);
7810 rtx element2;
7811 unsigned int dest_regno = reg_or_subregno (dest);
7812 unsigned int src_regno = reg_or_subregno (src);
7813 unsigned int element_regno = reg_or_subregno (element);
7814
7815 gcc_assert (REG_P (tmp_gpr));
7816
7817 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
7818 a general purpose register. */
7819 if (TARGET_P9_VECTOR
7820 && (mode == V16QImode || mode == V8HImode || mode == V4SImode)
7821 && INT_REGNO_P (dest_regno)
7822 && ALTIVEC_REGNO_P (src_regno)
7823 && INT_REGNO_P (element_regno))
7824 {
7825 rtx dest_si = gen_rtx_REG (SImode, dest_regno);
7826 rtx element_si = gen_rtx_REG (SImode, element_regno);
7827
7828 if (mode == V16QImode)
7829 emit_insn (BYTES_BIG_ENDIAN
7830 ? gen_vextublx (dest_si, element_si, src)
7831 : gen_vextubrx (dest_si, element_si, src));
7832
7833 else if (mode == V8HImode)
7834 {
7835 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7836 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const1_rtx));
7837 emit_insn (BYTES_BIG_ENDIAN
7838 ? gen_vextuhlx (dest_si, tmp_gpr_si, src)
7839 : gen_vextuhrx (dest_si, tmp_gpr_si, src));
7840 }
7841
7842
7843 else
7844 {
7845 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7846 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const2_rtx));
7847 emit_insn (BYTES_BIG_ENDIAN
7848 ? gen_vextuwlx (dest_si, tmp_gpr_si, src)
7849 : gen_vextuwrx (dest_si, tmp_gpr_si, src));
7850 }
7851
7852 return;
7853 }
7854
7855
7856 gcc_assert (REG_P (tmp_altivec));
7857
7858 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
7859 an XOR, otherwise we need to subtract. The shift amount is so VSLO
7860 will shift the element into the upper position (adding 3 to convert a
7861 byte shift into a bit shift). */
7862 if (scalar_size == 8)
7863 {
7864 if (!BYTES_BIG_ENDIAN)
7865 {
7866 emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx));
7867 element2 = tmp_gpr;
7868 }
7869 else
7870 element2 = element;
7871
7872 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
7873 bit. */
7874 emit_insn (gen_rtx_SET (tmp_gpr,
7875 gen_rtx_AND (DImode,
7876 gen_rtx_ASHIFT (DImode,
7877 element2,
7878 GEN_INT (6)),
7879 GEN_INT (64))));
7880 }
7881 else
7882 {
7883 if (!BYTES_BIG_ENDIAN)
7884 {
7885 rtx num_ele_m1 = GEN_INT (num_elements - 1);
7886
7887 emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1));
7888 emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr));
7889 element2 = tmp_gpr;
7890 }
7891 else
7892 element2 = element;
7893
7894 emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift)));
7895 }
7896
7897 /* Get the value into the lower byte of the Altivec register where VSLO
7898 expects it. */
7899 if (TARGET_P9_VECTOR)
7900 emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr));
7901 else if (can_create_pseudo_p ())
7902 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr));
7903 else
7904 {
7905 rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7906 emit_move_insn (tmp_di, tmp_gpr);
7907 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di));
7908 }
7909
7910 /* Do the VSLO to get the value into the final location. */
7911 switch (mode)
7912 {
7913 case E_V2DFmode:
7914 emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec));
7915 return;
7916
7917 case E_V2DImode:
7918 emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec));
7919 return;
7920
7921 case E_V4SFmode:
7922 {
7923 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7924 rtx tmp_altivec_v4sf = gen_rtx_REG (V4SFmode, REGNO (tmp_altivec));
7925 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7926 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7927 tmp_altivec));
7928
7929 emit_insn (gen_vsx_xscvspdp_scalar2 (dest, tmp_altivec_v4sf));
7930 return;
7931 }
7932
7933 case E_V4SImode:
7934 case E_V8HImode:
7935 case E_V16QImode:
7936 {
7937 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7938 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7939 rtx tmp_gpr_di = gen_rtx_REG (DImode, REGNO (dest));
7940 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7941 tmp_altivec));
7942 emit_move_insn (tmp_gpr_di, tmp_altivec_di);
7943 emit_insn (gen_lshrdi3 (tmp_gpr_di, tmp_gpr_di,
7944 GEN_INT (64 - bits_in_element)));
7945 return;
7946 }
7947
7948 default:
7949 gcc_unreachable ();
7950 }
7951
7952 return;
7953 }
7954 else
7955 gcc_unreachable ();
7956 }
7957
7958 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
7959 selects whether the alignment is abi mandated, optional, or
7960 both abi and optional alignment. */
7961
7962 unsigned int
7963 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
7964 {
7965 if (how != align_opt)
7966 {
7967 if (TREE_CODE (type) == VECTOR_TYPE && align < 128)
7968 align = 128;
7969 }
7970
7971 if (how != align_abi)
7972 {
7973 if (TREE_CODE (type) == ARRAY_TYPE
7974 && TYPE_MODE (TREE_TYPE (type)) == QImode)
7975 {
7976 if (align < BITS_PER_WORD)
7977 align = BITS_PER_WORD;
7978 }
7979 }
7980
7981 return align;
7982 }
7983
7984 /* Implement TARGET_SLOW_UNALIGNED_ACCESS. Altivec vector memory
7985 instructions simply ignore the low bits; VSX memory instructions
7986 are aligned to 4 or 8 bytes. */
7987
7988 static bool
7989 rs6000_slow_unaligned_access (machine_mode mode, unsigned int align)
7990 {
7991 return (STRICT_ALIGNMENT
7992 || (!TARGET_EFFICIENT_UNALIGNED_VSX
7993 && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && align < 32)
7994 || ((VECTOR_MODE_P (mode) || VECTOR_ALIGNMENT_P (mode))
7995 && (int) align < VECTOR_ALIGN (mode)))));
7996 }
7997
7998 /* AIX word-aligns FP doubles but doubleword-aligns 64-bit ints. */
7999
8000 unsigned int
8001 rs6000_special_adjust_field_align (tree type, unsigned int computed)
8002 {
8003 if (computed <= 32 || TYPE_PACKED (type))
8004 return computed;
8005
8006 /* Strip initial arrays. */
8007 while (TREE_CODE (type) == ARRAY_TYPE)
8008 type = TREE_TYPE (type);
8009
8010 /* If RECORD or UNION, recursively find the first field. */
8011 while (AGGREGATE_TYPE_P (type))
8012 {
8013 tree field = TYPE_FIELDS (type);
8014
8015 /* Skip all non field decls */
8016 while (field != NULL
8017 && (TREE_CODE (field) != FIELD_DECL
8018 || DECL_FIELD_ABI_IGNORED (field)))
8019 field = DECL_CHAIN (field);
8020
8021 if (! field)
8022 break;
8023
8024 /* A packed field does not contribute any extra alignment. */
8025 if (DECL_PACKED (field))
8026 return computed;
8027
8028 type = TREE_TYPE (field);
8029
8030 /* Strip arrays. */
8031 while (TREE_CODE (type) == ARRAY_TYPE)
8032 type = TREE_TYPE (type);
8033 }
8034
8035 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node
8036 && (TYPE_MODE (type) == DFmode || TYPE_MODE (type) == DCmode))
8037 computed = MIN (computed, 32);
8038
8039 return computed;
8040 }
8041
8042 /* AIX increases natural record alignment to doubleword if the innermost first
8043 field is an FP double while the FP fields remain word aligned.
8044 Only called if TYPE initially is a RECORD or UNION. */
8045
8046 unsigned int
8047 rs6000_special_round_type_align (tree type, unsigned int computed,
8048 unsigned int specified)
8049 {
8050 unsigned int align = MAX (computed, specified);
8051
8052 if (TYPE_PACKED (type) || align >= 64)
8053 return align;
8054
8055 /* If RECORD or UNION, recursively find the first field. */
8056 do
8057 {
8058 tree field = TYPE_FIELDS (type);
8059
8060 /* Skip all non field decls */
8061 while (field != NULL
8062 && (TREE_CODE (field) != FIELD_DECL
8063 || DECL_FIELD_ABI_IGNORED (field)))
8064 field = DECL_CHAIN (field);
8065
8066 if (! field)
8067 break;
8068
8069 /* A packed field does not contribute any extra alignment. */
8070 if (DECL_PACKED (field))
8071 return align;
8072
8073 type = TREE_TYPE (field);
8074
8075 /* Strip arrays. */
8076 while (TREE_CODE (type) == ARRAY_TYPE)
8077 type = TREE_TYPE (type);
8078 } while (AGGREGATE_TYPE_P (type));
8079
8080 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node
8081 && (TYPE_MODE (type) == DFmode || TYPE_MODE (type) == DCmode))
8082 align = MAX (align, 64);
8083
8084 return align;
8085 }
8086
8087 /* Darwin increases record alignment to the natural alignment of
8088 the first field. */
8089
8090 unsigned int
8091 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
8092 unsigned int specified)
8093 {
8094 unsigned int align = MAX (computed, specified);
8095
8096 if (TYPE_PACKED (type))
8097 return align;
8098
8099 /* Find the first field, looking down into aggregates. */
8100 do {
8101 tree field = TYPE_FIELDS (type);
8102 /* Skip all non field decls */
8103 while (field != NULL
8104 && (TREE_CODE (field) != FIELD_DECL
8105 || DECL_FIELD_ABI_IGNORED (field)))
8106 field = DECL_CHAIN (field);
8107 if (! field)
8108 break;
8109 /* A packed field does not contribute any extra alignment. */
8110 if (DECL_PACKED (field))
8111 return align;
8112 type = TREE_TYPE (field);
8113 while (TREE_CODE (type) == ARRAY_TYPE)
8114 type = TREE_TYPE (type);
8115 } while (AGGREGATE_TYPE_P (type));
8116
8117 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
8118 align = MAX (align, TYPE_ALIGN (type));
8119
8120 return align;
8121 }
8122
8123 /* Return 1 for an operand in small memory on V.4/eabi. */
8124
8125 int
8126 small_data_operand (rtx op ATTRIBUTE_UNUSED,
8127 machine_mode mode ATTRIBUTE_UNUSED)
8128 {
8129 #if TARGET_ELF
8130 rtx sym_ref;
8131
8132 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
8133 return 0;
8134
8135 if (DEFAULT_ABI != ABI_V4)
8136 return 0;
8137
8138 if (SYMBOL_REF_P (op))
8139 sym_ref = op;
8140
8141 else if (GET_CODE (op) != CONST
8142 || GET_CODE (XEXP (op, 0)) != PLUS
8143 || !SYMBOL_REF_P (XEXP (XEXP (op, 0), 0))
8144 || !CONST_INT_P (XEXP (XEXP (op, 0), 1)))
8145 return 0;
8146
8147 else
8148 {
8149 rtx sum = XEXP (op, 0);
8150 HOST_WIDE_INT summand;
8151
8152 /* We have to be careful here, because it is the referenced address
8153 that must be 32k from _SDA_BASE_, not just the symbol. */
8154 summand = INTVAL (XEXP (sum, 1));
8155 if (summand < 0 || summand > g_switch_value)
8156 return 0;
8157
8158 sym_ref = XEXP (sum, 0);
8159 }
8160
8161 return SYMBOL_REF_SMALL_P (sym_ref);
8162 #else
8163 return 0;
8164 #endif
8165 }
8166
8167 /* Return true if either operand is a general purpose register. */
8168
8169 bool
8170 gpr_or_gpr_p (rtx op0, rtx op1)
8171 {
8172 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
8173 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
8174 }
8175
8176 /* Return true if this is a move direct operation between GPR registers and
8177 floating point/VSX registers. */
8178
8179 bool
8180 direct_move_p (rtx op0, rtx op1)
8181 {
8182 if (!REG_P (op0) || !REG_P (op1))
8183 return false;
8184
8185 if (!TARGET_DIRECT_MOVE)
8186 return false;
8187
8188 int regno0 = REGNO (op0);
8189 int regno1 = REGNO (op1);
8190 if (!HARD_REGISTER_NUM_P (regno0) || !HARD_REGISTER_NUM_P (regno1))
8191 return false;
8192
8193 if (INT_REGNO_P (regno0) && VSX_REGNO_P (regno1))
8194 return true;
8195
8196 if (VSX_REGNO_P (regno0) && INT_REGNO_P (regno1))
8197 return true;
8198
8199 return false;
8200 }
8201
8202 /* Return true if the ADDR is an acceptable address for a quad memory
8203 operation of mode MODE (either LQ/STQ for general purpose registers, or
8204 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
8205 is intended for LQ/STQ. If it is false, the address is intended for the ISA
8206 3.0 LXV/STXV instruction. */
8207
8208 bool
8209 quad_address_p (rtx addr, machine_mode mode, bool strict)
8210 {
8211 rtx op0, op1;
8212
8213 if (GET_MODE_SIZE (mode) < 16)
8214 return false;
8215
8216 if (legitimate_indirect_address_p (addr, strict))
8217 return true;
8218
8219 if (VECTOR_MODE_P (mode) && !mode_supports_dq_form (mode))
8220 return false;
8221
8222 /* Is this a valid prefixed address? If the bottom four bits of the offset
8223 are non-zero, we could use a prefixed instruction (which does not have the
8224 DQ-form constraint that the traditional instruction had) instead of
8225 forcing the unaligned offset to a GPR. */
8226 if (address_is_prefixed (addr, mode, NON_PREFIXED_DQ))
8227 return true;
8228
8229 if (GET_CODE (addr) != PLUS)
8230 return false;
8231
8232 op0 = XEXP (addr, 0);
8233 if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, strict))
8234 return false;
8235
8236 op1 = XEXP (addr, 1);
8237 if (!CONST_INT_P (op1))
8238 return false;
8239
8240 return quad_address_offset_p (INTVAL (op1));
8241 }
8242
8243 /* Return true if this is a load or store quad operation. This function does
8244 not handle the atomic quad memory instructions. */
8245
8246 bool
8247 quad_load_store_p (rtx op0, rtx op1)
8248 {
8249 bool ret;
8250
8251 if (!TARGET_QUAD_MEMORY)
8252 ret = false;
8253
8254 else if (REG_P (op0) && MEM_P (op1))
8255 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
8256 && quad_memory_operand (op1, GET_MODE (op1))
8257 && !reg_overlap_mentioned_p (op0, op1));
8258
8259 else if (MEM_P (op0) && REG_P (op1))
8260 ret = (quad_memory_operand (op0, GET_MODE (op0))
8261 && quad_int_reg_operand (op1, GET_MODE (op1)));
8262
8263 else
8264 ret = false;
8265
8266 if (TARGET_DEBUG_ADDR)
8267 {
8268 fprintf (stderr, "\n========== quad_load_store, return %s\n",
8269 ret ? "true" : "false");
8270 debug_rtx (gen_rtx_SET (op0, op1));
8271 }
8272
8273 return ret;
8274 }
8275
8276 /* Given an address, return a constant offset term if one exists. */
8277
8278 static rtx
8279 address_offset (rtx op)
8280 {
8281 if (GET_CODE (op) == PRE_INC
8282 || GET_CODE (op) == PRE_DEC)
8283 op = XEXP (op, 0);
8284 else if (GET_CODE (op) == PRE_MODIFY
8285 || GET_CODE (op) == LO_SUM)
8286 op = XEXP (op, 1);
8287
8288 if (GET_CODE (op) == CONST)
8289 op = XEXP (op, 0);
8290
8291 if (GET_CODE (op) == PLUS)
8292 op = XEXP (op, 1);
8293
8294 if (CONST_INT_P (op))
8295 return op;
8296
8297 return NULL_RTX;
8298 }
8299
8300 /* This tests that a lo_sum {constant, symbol, symbol+offset} is valid for
8301 the mode. If we can't find (or don't know) the alignment of the symbol
8302 we assume (optimistically) that it's sufficiently aligned [??? maybe we
8303 should be pessimistic]. Offsets are validated in the same way as for
8304 reg + offset. */
8305 static bool
8306 darwin_rs6000_legitimate_lo_sum_const_p (rtx x, machine_mode mode)
8307 {
8308 /* We should not get here with this. */
8309 gcc_checking_assert (! mode_supports_dq_form (mode));
8310
8311 if (GET_CODE (x) == CONST)
8312 x = XEXP (x, 0);
8313
8314 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_MACHOPIC_OFFSET)
8315 x = XVECEXP (x, 0, 0);
8316
8317 rtx sym = NULL_RTX;
8318 unsigned HOST_WIDE_INT offset = 0;
8319
8320 if (GET_CODE (x) == PLUS)
8321 {
8322 sym = XEXP (x, 0);
8323 if (! SYMBOL_REF_P (sym))
8324 return false;
8325 if (!CONST_INT_P (XEXP (x, 1)))
8326 return false;
8327 offset = INTVAL (XEXP (x, 1));
8328 }
8329 else if (SYMBOL_REF_P (x))
8330 sym = x;
8331 else if (CONST_INT_P (x))
8332 offset = INTVAL (x);
8333 else if (GET_CODE (x) == LABEL_REF)
8334 offset = 0; // We assume code labels are Pmode aligned
8335 else
8336 return false; // not sure what we have here.
8337
8338 /* If we don't know the alignment of the thing to which the symbol refers,
8339 we assume optimistically it is "enough".
8340 ??? maybe we should be pessimistic instead. */
8341 unsigned align = 0;
8342
8343 if (sym)
8344 {
8345 tree decl = SYMBOL_REF_DECL (sym);
8346 #if TARGET_MACHO
8347 if (MACHO_SYMBOL_INDIRECTION_P (sym))
8348 /* The decl in an indirection symbol is the original one, which might
8349 be less aligned than the indirection. Our indirections are always
8350 pointer-aligned. */
8351 ;
8352 else
8353 #endif
8354 if (decl && DECL_ALIGN (decl))
8355 align = DECL_ALIGN_UNIT (decl);
8356 }
8357
8358 unsigned int extra = 0;
8359 switch (mode)
8360 {
8361 case E_DFmode:
8362 case E_DDmode:
8363 case E_DImode:
8364 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8365 addressing. */
8366 if (VECTOR_MEM_VSX_P (mode))
8367 return false;
8368
8369 if (!TARGET_POWERPC64)
8370 extra = 4;
8371 else if ((offset & 3) || (align & 3))
8372 return false;
8373 break;
8374
8375 case E_TFmode:
8376 case E_IFmode:
8377 case E_KFmode:
8378 case E_TDmode:
8379 case E_TImode:
8380 case E_PTImode:
8381 extra = 8;
8382 if (!TARGET_POWERPC64)
8383 extra = 12;
8384 else if ((offset & 3) || (align & 3))
8385 return false;
8386 break;
8387
8388 default:
8389 break;
8390 }
8391
8392 /* We only care if the access(es) would cause a change to the high part. */
8393 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
8394 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8395 }
8396
8397 /* Return true if the MEM operand is a memory operand suitable for use
8398 with a (full width, possibly multiple) gpr load/store. On
8399 powerpc64 this means the offset must be divisible by 4.
8400 Implements 'Y' constraint.
8401
8402 Accept direct, indexed, offset, lo_sum and tocref. Since this is
8403 a constraint function we know the operand has satisfied a suitable
8404 memory predicate.
8405
8406 Offsetting a lo_sum should not be allowed, except where we know by
8407 alignment that a 32k boundary is not crossed. Note that by
8408 "offsetting" here we mean a further offset to access parts of the
8409 MEM. It's fine to have a lo_sum where the inner address is offset
8410 from a sym, since the same sym+offset will appear in the high part
8411 of the address calculation. */
8412
8413 bool
8414 mem_operand_gpr (rtx op, machine_mode mode)
8415 {
8416 unsigned HOST_WIDE_INT offset;
8417 int extra;
8418 rtx addr = XEXP (op, 0);
8419
8420 /* PR85755: Allow PRE_INC and PRE_DEC addresses. */
8421 if (TARGET_UPDATE
8422 && (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
8423 && mode_supports_pre_incdec_p (mode)
8424 && legitimate_indirect_address_p (XEXP (addr, 0), false))
8425 return true;
8426
8427 /* Allow prefixed instructions if supported. If the bottom two bits of the
8428 offset are non-zero, we could use a prefixed instruction (which does not
8429 have the DS-form constraint that the traditional instruction had) instead
8430 of forcing the unaligned offset to a GPR. */
8431 if (address_is_prefixed (addr, mode, NON_PREFIXED_DS))
8432 return true;
8433
8434 /* We need to look through Mach-O PIC unspecs to determine if a lo_sum is
8435 really OK. Doing this early avoids teaching all the other machinery
8436 about them. */
8437 if (TARGET_MACHO && GET_CODE (addr) == LO_SUM)
8438 return darwin_rs6000_legitimate_lo_sum_const_p (XEXP (addr, 1), mode);
8439
8440 /* Only allow offsettable addresses. See PRs 83969 and 84279. */
8441 if (!rs6000_offsettable_memref_p (op, mode, false))
8442 return false;
8443
8444 op = address_offset (addr);
8445 if (op == NULL_RTX)
8446 return true;
8447
8448 offset = INTVAL (op);
8449 if (TARGET_POWERPC64 && (offset & 3) != 0)
8450 return false;
8451
8452 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8453 if (extra < 0)
8454 extra = 0;
8455
8456 if (GET_CODE (addr) == LO_SUM)
8457 /* For lo_sum addresses, we must allow any offset except one that
8458 causes a wrap, so test only the low 16 bits. */
8459 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
8460
8461 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8462 }
8463
8464 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
8465 enforce an offset divisible by 4 even for 32-bit. */
8466
8467 bool
8468 mem_operand_ds_form (rtx op, machine_mode mode)
8469 {
8470 unsigned HOST_WIDE_INT offset;
8471 int extra;
8472 rtx addr = XEXP (op, 0);
8473
8474 /* Allow prefixed instructions if supported. If the bottom two bits of the
8475 offset are non-zero, we could use a prefixed instruction (which does not
8476 have the DS-form constraint that the traditional instruction had) instead
8477 of forcing the unaligned offset to a GPR. */
8478 if (address_is_prefixed (addr, mode, NON_PREFIXED_DS))
8479 return true;
8480
8481 if (!offsettable_address_p (false, mode, addr))
8482 return false;
8483
8484 op = address_offset (addr);
8485 if (op == NULL_RTX)
8486 return true;
8487
8488 offset = INTVAL (op);
8489 if ((offset & 3) != 0)
8490 return false;
8491
8492 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8493 if (extra < 0)
8494 extra = 0;
8495
8496 if (GET_CODE (addr) == LO_SUM)
8497 /* For lo_sum addresses, we must allow any offset except one that
8498 causes a wrap, so test only the low 16 bits. */
8499 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
8500
8501 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8502 }
8503 \f
8504 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
8505
8506 static bool
8507 reg_offset_addressing_ok_p (machine_mode mode)
8508 {
8509 switch (mode)
8510 {
8511 case E_V16QImode:
8512 case E_V8HImode:
8513 case E_V4SFmode:
8514 case E_V4SImode:
8515 case E_V2DFmode:
8516 case E_V2DImode:
8517 case E_V1TImode:
8518 case E_TImode:
8519 case E_TFmode:
8520 case E_KFmode:
8521 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
8522 ISA 3.0 vector d-form addressing mode was added. While TImode is not
8523 a vector mode, if we want to use the VSX registers to move it around,
8524 we need to restrict ourselves to reg+reg addressing. Similarly for
8525 IEEE 128-bit floating point that is passed in a single vector
8526 register. */
8527 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
8528 return mode_supports_dq_form (mode);
8529 break;
8530
8531 /* The vector pair/quad types support offset addressing if the
8532 underlying vectors support offset addressing. */
8533 case E_OOmode:
8534 case E_XOmode:
8535 return TARGET_MMA;
8536
8537 case E_SDmode:
8538 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
8539 addressing for the LFIWZX and STFIWX instructions. */
8540 if (TARGET_NO_SDMODE_STACK)
8541 return false;
8542 break;
8543
8544 default:
8545 break;
8546 }
8547
8548 return true;
8549 }
8550
8551 static bool
8552 virtual_stack_registers_memory_p (rtx op)
8553 {
8554 int regnum;
8555
8556 if (REG_P (op))
8557 regnum = REGNO (op);
8558
8559 else if (GET_CODE (op) == PLUS
8560 && REG_P (XEXP (op, 0))
8561 && CONST_INT_P (XEXP (op, 1)))
8562 regnum = REGNO (XEXP (op, 0));
8563
8564 else
8565 return false;
8566
8567 return (regnum >= FIRST_VIRTUAL_REGISTER
8568 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
8569 }
8570
8571 /* Return true if a MODE sized memory accesses to OP plus OFFSET
8572 is known to not straddle a 32k boundary. This function is used
8573 to determine whether -mcmodel=medium code can use TOC pointer
8574 relative addressing for OP. This means the alignment of the TOC
8575 pointer must also be taken into account, and unfortunately that is
8576 only 8 bytes. */
8577
8578 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
8579 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
8580 #endif
8581
8582 static bool
8583 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
8584 machine_mode mode)
8585 {
8586 tree decl;
8587 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
8588
8589 if (!SYMBOL_REF_P (op))
8590 return false;
8591
8592 /* ISA 3.0 vector d-form addressing is restricted, don't allow
8593 SYMBOL_REF. */
8594 if (mode_supports_dq_form (mode))
8595 return false;
8596
8597 dsize = GET_MODE_SIZE (mode);
8598 decl = SYMBOL_REF_DECL (op);
8599 if (!decl)
8600 {
8601 if (dsize == 0)
8602 return false;
8603
8604 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
8605 replacing memory addresses with an anchor plus offset. We
8606 could find the decl by rummaging around in the block->objects
8607 VEC for the given offset but that seems like too much work. */
8608 dalign = BITS_PER_UNIT;
8609 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
8610 && SYMBOL_REF_ANCHOR_P (op)
8611 && SYMBOL_REF_BLOCK (op) != NULL)
8612 {
8613 struct object_block *block = SYMBOL_REF_BLOCK (op);
8614
8615 dalign = block->alignment;
8616 offset += SYMBOL_REF_BLOCK_OFFSET (op);
8617 }
8618 else if (CONSTANT_POOL_ADDRESS_P (op))
8619 {
8620 /* It would be nice to have get_pool_align().. */
8621 machine_mode cmode = get_pool_mode (op);
8622
8623 dalign = GET_MODE_ALIGNMENT (cmode);
8624 }
8625 }
8626 else if (DECL_P (decl))
8627 {
8628 dalign = DECL_ALIGN (decl);
8629
8630 if (dsize == 0)
8631 {
8632 /* Allow BLKmode when the entire object is known to not
8633 cross a 32k boundary. */
8634 if (!DECL_SIZE_UNIT (decl))
8635 return false;
8636
8637 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
8638 return false;
8639
8640 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
8641 if (dsize > 32768)
8642 return false;
8643
8644 dalign /= BITS_PER_UNIT;
8645 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8646 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8647 return dalign >= dsize;
8648 }
8649 }
8650 else
8651 gcc_unreachable ();
8652
8653 /* Find how many bits of the alignment we know for this access. */
8654 dalign /= BITS_PER_UNIT;
8655 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8656 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8657 mask = dalign - 1;
8658 lsb = offset & -offset;
8659 mask &= lsb - 1;
8660 dalign = mask + 1;
8661
8662 return dalign >= dsize;
8663 }
8664
8665 static bool
8666 constant_pool_expr_p (rtx op)
8667 {
8668 rtx base, offset;
8669
8670 split_const (op, &base, &offset);
8671 return (SYMBOL_REF_P (base)
8672 && CONSTANT_POOL_ADDRESS_P (base)
8673 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
8674 }
8675
8676 /* Create a TOC reference for symbol_ref SYMBOL. If LARGETOC_REG is non-null,
8677 use that as the register to put the HIGH value into if register allocation
8678 is already done. */
8679
8680 rtx
8681 create_TOC_reference (rtx symbol, rtx largetoc_reg)
8682 {
8683 rtx tocrel, tocreg, hi;
8684
8685 gcc_assert (TARGET_TOC);
8686
8687 if (TARGET_DEBUG_ADDR)
8688 {
8689 if (SYMBOL_REF_P (symbol))
8690 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
8691 XSTR (symbol, 0));
8692 else
8693 {
8694 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
8695 GET_RTX_NAME (GET_CODE (symbol)));
8696 debug_rtx (symbol);
8697 }
8698 }
8699
8700 if (!can_create_pseudo_p ())
8701 df_set_regs_ever_live (TOC_REGISTER, true);
8702
8703 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
8704 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
8705 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
8706 return tocrel;
8707
8708 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
8709 if (largetoc_reg != NULL)
8710 {
8711 emit_move_insn (largetoc_reg, hi);
8712 hi = largetoc_reg;
8713 }
8714 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
8715 }
8716
8717 /* These are only used to pass through from print_operand/print_operand_address
8718 to rs6000_output_addr_const_extra over the intervening function
8719 output_addr_const which is not target code. */
8720 static const_rtx tocrel_base_oac, tocrel_offset_oac;
8721
8722 /* Return true if OP is a toc pointer relative address (the output
8723 of create_TOC_reference). If STRICT, do not match non-split
8724 -mcmodel=large/medium toc pointer relative addresses. If the pointers
8725 are non-NULL, place base and offset pieces in TOCREL_BASE_RET and
8726 TOCREL_OFFSET_RET respectively. */
8727
8728 bool
8729 toc_relative_expr_p (const_rtx op, bool strict, const_rtx *tocrel_base_ret,
8730 const_rtx *tocrel_offset_ret)
8731 {
8732 if (!TARGET_TOC)
8733 return false;
8734
8735 if (TARGET_CMODEL != CMODEL_SMALL)
8736 {
8737 /* When strict ensure we have everything tidy. */
8738 if (strict
8739 && !(GET_CODE (op) == LO_SUM
8740 && REG_P (XEXP (op, 0))
8741 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict)))
8742 return false;
8743
8744 /* When not strict, allow non-split TOC addresses and also allow
8745 (lo_sum (high ..)) TOC addresses created during reload. */
8746 if (GET_CODE (op) == LO_SUM)
8747 op = XEXP (op, 1);
8748 }
8749
8750 const_rtx tocrel_base = op;
8751 const_rtx tocrel_offset = const0_rtx;
8752
8753 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
8754 {
8755 tocrel_base = XEXP (op, 0);
8756 tocrel_offset = XEXP (op, 1);
8757 }
8758
8759 if (tocrel_base_ret)
8760 *tocrel_base_ret = tocrel_base;
8761 if (tocrel_offset_ret)
8762 *tocrel_offset_ret = tocrel_offset;
8763
8764 return (GET_CODE (tocrel_base) == UNSPEC
8765 && XINT (tocrel_base, 1) == UNSPEC_TOCREL
8766 && REG_P (XVECEXP (tocrel_base, 0, 1))
8767 && REGNO (XVECEXP (tocrel_base, 0, 1)) == TOC_REGISTER);
8768 }
8769
8770 /* Return true if X is a constant pool address, and also for cmodel=medium
8771 if X is a toc-relative address known to be offsettable within MODE. */
8772
8773 bool
8774 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
8775 bool strict)
8776 {
8777 const_rtx tocrel_base, tocrel_offset;
8778 return (toc_relative_expr_p (x, strict, &tocrel_base, &tocrel_offset)
8779 && (TARGET_CMODEL != CMODEL_MEDIUM
8780 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
8781 || mode == QImode
8782 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
8783 INTVAL (tocrel_offset), mode)));
8784 }
8785
8786 static bool
8787 legitimate_small_data_p (machine_mode mode, rtx x)
8788 {
8789 return (DEFAULT_ABI == ABI_V4
8790 && !flag_pic && !TARGET_TOC
8791 && (SYMBOL_REF_P (x) || GET_CODE (x) == CONST)
8792 && small_data_operand (x, mode));
8793 }
8794
8795 bool
8796 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
8797 bool strict, bool worst_case)
8798 {
8799 unsigned HOST_WIDE_INT offset;
8800 unsigned int extra;
8801
8802 if (GET_CODE (x) != PLUS)
8803 return false;
8804 if (!REG_P (XEXP (x, 0)))
8805 return false;
8806 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8807 return false;
8808 if (mode_supports_dq_form (mode))
8809 return quad_address_p (x, mode, strict);
8810 if (!reg_offset_addressing_ok_p (mode))
8811 return virtual_stack_registers_memory_p (x);
8812 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
8813 return true;
8814 if (!CONST_INT_P (XEXP (x, 1)))
8815 return false;
8816
8817 offset = INTVAL (XEXP (x, 1));
8818 extra = 0;
8819 switch (mode)
8820 {
8821 case E_DFmode:
8822 case E_DDmode:
8823 case E_DImode:
8824 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8825 addressing. */
8826 if (VECTOR_MEM_VSX_P (mode))
8827 return false;
8828
8829 if (!worst_case)
8830 break;
8831 if (!TARGET_POWERPC64)
8832 extra = 4;
8833 else if (offset & 3)
8834 return false;
8835 break;
8836
8837 case E_TFmode:
8838 case E_IFmode:
8839 case E_KFmode:
8840 case E_TDmode:
8841 case E_TImode:
8842 case E_PTImode:
8843 extra = 8;
8844 if (!worst_case)
8845 break;
8846 if (!TARGET_POWERPC64)
8847 extra = 12;
8848 else if (offset & 3)
8849 return false;
8850 break;
8851
8852 default:
8853 break;
8854 }
8855
8856 if (TARGET_PREFIXED)
8857 return SIGNED_34BIT_OFFSET_EXTRA_P (offset, extra);
8858 else
8859 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8860 }
8861
8862 bool
8863 legitimate_indexed_address_p (rtx x, int strict)
8864 {
8865 rtx op0, op1;
8866
8867 if (GET_CODE (x) != PLUS)
8868 return false;
8869
8870 op0 = XEXP (x, 0);
8871 op1 = XEXP (x, 1);
8872
8873 return (REG_P (op0) && REG_P (op1)
8874 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
8875 && INT_REG_OK_FOR_INDEX_P (op1, strict))
8876 || (INT_REG_OK_FOR_BASE_P (op1, strict)
8877 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
8878 }
8879
8880 bool
8881 avoiding_indexed_address_p (machine_mode mode)
8882 {
8883 unsigned int msize = GET_MODE_SIZE (mode);
8884
8885 /* Avoid indexed addressing for modes that have non-indexed load/store
8886 instruction forms. On power10, vector pairs have an indexed
8887 form, but vector quads don't. */
8888 if (msize > 16)
8889 return msize != 32;
8890
8891 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
8892 }
8893
8894 bool
8895 legitimate_indirect_address_p (rtx x, int strict)
8896 {
8897 return REG_P (x) && INT_REG_OK_FOR_BASE_P (x, strict);
8898 }
8899
8900 bool
8901 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
8902 {
8903 if (!TARGET_MACHO || !flag_pic
8904 || mode != SImode || !MEM_P (x))
8905 return false;
8906 x = XEXP (x, 0);
8907
8908 if (GET_CODE (x) != LO_SUM)
8909 return false;
8910 if (!REG_P (XEXP (x, 0)))
8911 return false;
8912 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
8913 return false;
8914 x = XEXP (x, 1);
8915
8916 return CONSTANT_P (x);
8917 }
8918
8919 static bool
8920 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
8921 {
8922 if (GET_CODE (x) != LO_SUM)
8923 return false;
8924 if (!REG_P (XEXP (x, 0)))
8925 return false;
8926 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8927 return false;
8928 /* quad word addresses are restricted, and we can't use LO_SUM. */
8929 if (mode_supports_dq_form (mode))
8930 return false;
8931 x = XEXP (x, 1);
8932
8933 if (TARGET_ELF || TARGET_MACHO)
8934 {
8935 bool large_toc_ok;
8936
8937 if (DEFAULT_ABI == ABI_V4 && flag_pic)
8938 return false;
8939 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
8940 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
8941 recognizes some LO_SUM addresses as valid although this
8942 function says opposite. In most cases, LRA through different
8943 transformations can generate correct code for address reloads.
8944 It cannot manage only some LO_SUM cases. So we need to add
8945 code here saying that some addresses are still valid. */
8946 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
8947 && small_toc_ref (x, VOIDmode));
8948 if (TARGET_TOC && ! large_toc_ok)
8949 return false;
8950 if (GET_MODE_NUNITS (mode) != 1)
8951 return false;
8952 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
8953 && !(/* ??? Assume floating point reg based on mode? */
8954 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
8955 return false;
8956
8957 return CONSTANT_P (x) || large_toc_ok;
8958 }
8959
8960 return false;
8961 }
8962
8963
8964 /* Try machine-dependent ways of modifying an illegitimate address
8965 to be legitimate. If we find one, return the new, valid address.
8966 This is used from only one place: `memory_address' in explow.cc.
8967
8968 OLDX is the address as it was before break_out_memory_refs was
8969 called. In some cases it is useful to look at this to decide what
8970 needs to be done.
8971
8972 It is always safe for this function to do nothing. It exists to
8973 recognize opportunities to optimize the output.
8974
8975 On RS/6000, first check for the sum of a register with a constant
8976 integer that is out of range. If so, generate code to add the
8977 constant with the low-order 16 bits masked to the register and force
8978 this result into another register (this can be done with `cau').
8979 Then generate an address of REG+(CONST&0xffff), allowing for the
8980 possibility of bit 16 being a one.
8981
8982 Then check for the sum of a register and something not constant, try to
8983 load the other things into a register and return the sum. */
8984
8985 static rtx
8986 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
8987 machine_mode mode)
8988 {
8989 unsigned int extra;
8990
8991 if (!reg_offset_addressing_ok_p (mode)
8992 || mode_supports_dq_form (mode))
8993 {
8994 if (virtual_stack_registers_memory_p (x))
8995 return x;
8996
8997 /* In theory we should not be seeing addresses of the form reg+0,
8998 but just in case it is generated, optimize it away. */
8999 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
9000 return force_reg (Pmode, XEXP (x, 0));
9001
9002 /* For TImode with load/store quad, restrict addresses to just a single
9003 pointer, so it works with both GPRs and VSX registers. */
9004 /* Make sure both operands are registers. */
9005 else if (GET_CODE (x) == PLUS
9006 && (mode != TImode || !TARGET_VSX))
9007 return gen_rtx_PLUS (Pmode,
9008 force_reg (Pmode, XEXP (x, 0)),
9009 force_reg (Pmode, XEXP (x, 1)));
9010 else
9011 return force_reg (Pmode, x);
9012 }
9013 if (SYMBOL_REF_P (x))
9014 {
9015 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
9016 if (model != 0)
9017 return rs6000_legitimize_tls_address (x, model);
9018 }
9019
9020 extra = 0;
9021 switch (mode)
9022 {
9023 case E_TFmode:
9024 case E_TDmode:
9025 case E_TImode:
9026 case E_PTImode:
9027 case E_IFmode:
9028 case E_KFmode:
9029 /* As in legitimate_offset_address_p we do not assume
9030 worst-case. The mode here is just a hint as to the registers
9031 used. A TImode is usually in gprs, but may actually be in
9032 fprs. Leave worst-case scenario for reload to handle via
9033 insn constraints. PTImode is only GPRs. */
9034 extra = 8;
9035 break;
9036 default:
9037 break;
9038 }
9039
9040 if (GET_CODE (x) == PLUS
9041 && REG_P (XEXP (x, 0))
9042 && CONST_INT_P (XEXP (x, 1))
9043 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
9044 >= 0x10000 - extra))
9045 {
9046 HOST_WIDE_INT high_int, low_int;
9047 rtx sum;
9048 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
9049 if (low_int >= 0x8000 - extra)
9050 low_int = 0;
9051 high_int = INTVAL (XEXP (x, 1)) - low_int;
9052 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
9053 gen_int_mode (high_int, Pmode)), 0);
9054 return plus_constant (Pmode, sum, low_int);
9055 }
9056 else if (GET_CODE (x) == PLUS
9057 && REG_P (XEXP (x, 0))
9058 && !CONST_INT_P (XEXP (x, 1))
9059 && GET_MODE_NUNITS (mode) == 1
9060 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
9061 || (/* ??? Assume floating point reg based on mode? */
9062 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
9063 && !avoiding_indexed_address_p (mode))
9064 {
9065 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
9066 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
9067 }
9068 else if ((TARGET_ELF
9069 #if TARGET_MACHO
9070 || !MACHO_DYNAMIC_NO_PIC_P
9071 #endif
9072 )
9073 && TARGET_32BIT
9074 && TARGET_NO_TOC_OR_PCREL
9075 && !flag_pic
9076 && !CONST_INT_P (x)
9077 && !CONST_WIDE_INT_P (x)
9078 && !CONST_DOUBLE_P (x)
9079 && CONSTANT_P (x)
9080 && GET_MODE_NUNITS (mode) == 1
9081 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
9082 || (/* ??? Assume floating point reg based on mode? */
9083 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode))))
9084 {
9085 rtx reg = gen_reg_rtx (Pmode);
9086 if (TARGET_ELF)
9087 emit_insn (gen_elf_high (reg, x));
9088 else
9089 emit_insn (gen_macho_high (Pmode, reg, x));
9090 return gen_rtx_LO_SUM (Pmode, reg, x);
9091 }
9092 else if (TARGET_TOC
9093 && SYMBOL_REF_P (x)
9094 && constant_pool_expr_p (x)
9095 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
9096 return create_TOC_reference (x, NULL_RTX);
9097 else
9098 return x;
9099 }
9100
9101 /* Debug version of rs6000_legitimize_address. */
9102 static rtx
9103 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
9104 {
9105 rtx ret;
9106 rtx_insn *insns;
9107
9108 start_sequence ();
9109 ret = rs6000_legitimize_address (x, oldx, mode);
9110 insns = get_insns ();
9111 end_sequence ();
9112
9113 if (ret != x)
9114 {
9115 fprintf (stderr,
9116 "\nrs6000_legitimize_address: mode %s, old code %s, "
9117 "new code %s, modified\n",
9118 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
9119 GET_RTX_NAME (GET_CODE (ret)));
9120
9121 fprintf (stderr, "Original address:\n");
9122 debug_rtx (x);
9123
9124 fprintf (stderr, "oldx:\n");
9125 debug_rtx (oldx);
9126
9127 fprintf (stderr, "New address:\n");
9128 debug_rtx (ret);
9129
9130 if (insns)
9131 {
9132 fprintf (stderr, "Insns added:\n");
9133 debug_rtx_list (insns, 20);
9134 }
9135 }
9136 else
9137 {
9138 fprintf (stderr,
9139 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
9140 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
9141
9142 debug_rtx (x);
9143 }
9144
9145 if (insns)
9146 emit_insn (insns);
9147
9148 return ret;
9149 }
9150
9151 /* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL.
9152 We need to emit DTP-relative relocations. */
9153
9154 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
9155 static void
9156 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
9157 {
9158 switch (size)
9159 {
9160 case 4:
9161 fputs ("\t.long\t", file);
9162 break;
9163 case 8:
9164 fputs (DOUBLE_INT_ASM_OP, file);
9165 break;
9166 default:
9167 gcc_unreachable ();
9168 }
9169 output_addr_const (file, x);
9170 if (TARGET_ELF)
9171 fputs ("@dtprel+0x8000", file);
9172 }
9173
9174 /* Return true if X is a symbol that refers to real (rather than emulated)
9175 TLS. */
9176
9177 static bool
9178 rs6000_real_tls_symbol_ref_p (rtx x)
9179 {
9180 return (SYMBOL_REF_P (x)
9181 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
9182 }
9183
9184 /* In the name of slightly smaller debug output, and to cater to
9185 general assembler lossage, recognize various UNSPEC sequences
9186 and turn them back into a direct symbol reference. */
9187
9188 static rtx
9189 rs6000_delegitimize_address (rtx orig_x)
9190 {
9191 rtx x, y, offset;
9192
9193 /* UNSPEC_FUSION_GPR is created by the peephole2 for power8 fusion. It
9194 encodes loading up the high part of the address of a TOC reference along
9195 with a load of a GPR using the same base register used for the load. We
9196 return the original SYMBOL_REF.
9197
9198 (set (reg:INT1 <reg>
9199 (unspec:INT1 [<combined-address>] UNSPEC_FUSION_GPR)))
9200
9201 UNSPEC_PCREL_OPT_LD_ADDR is used by the power10 PCREL_OPT pass. These
9202 UNSPECs include the external SYMBOL_REF along with the value being loaded.
9203 We return the original SYMBOL_REF.
9204
9205 (parallel [(set (reg:DI <base-reg>)
9206 (unspec:DI [(symbol_ref <symbol>)
9207 (const_int <marker>)]
9208 UNSPEC_PCREL_OPT_LD_ADDR))
9209 (set (reg:DI <load-reg>)
9210 (unspec:DI [(const_int 0)]
9211 UNSPEC_PCREL_OPT_LD_DATA))])
9212
9213 UNSPEC_PCREL_OPT_LD_SAME_REG is an alternative that is used if the
9214 GPR being loaded is the same as the GPR used to hold the external address.
9215
9216 (set (reg:DI <base-reg>)
9217 (unspec:DI [(symbol_ref <symbol>)
9218 (const_int <marker>)]
9219 UNSPEC_PCREL_OPT_LD_SAME_REG))
9220
9221 UNSPEC_PCREL_OPT_ST_ADDR is used by the power10 PCREL_OPT pass. This
9222 UNSPEC include the external SYMBOL_REF along with the value being loaded.
9223 We return the original SYMBOL_REF.
9224
9225 (parallel [(set (reg:DI <base-reg>)
9226 (unspec:DI [(symbol_ref <symbol>)
9227 (const_int <marker>)]
9228 UNSPEC_PCREL_OPT_ST_ADDR))
9229 (use (reg <store-reg>))]) */
9230
9231 if (GET_CODE (orig_x) == UNSPEC)
9232 switch (XINT (orig_x, 1))
9233 {
9234 case UNSPEC_FUSION_GPR:
9235 case UNSPEC_PCREL_OPT_LD_ADDR:
9236 case UNSPEC_PCREL_OPT_LD_SAME_REG:
9237 case UNSPEC_PCREL_OPT_ST_ADDR:
9238 orig_x = XVECEXP (orig_x, 0, 0);
9239 break;
9240
9241 default:
9242 break;
9243 }
9244
9245 orig_x = delegitimize_mem_from_attrs (orig_x);
9246
9247 x = orig_x;
9248 if (MEM_P (x))
9249 x = XEXP (x, 0);
9250
9251 y = x;
9252 if (TARGET_CMODEL != CMODEL_SMALL && GET_CODE (y) == LO_SUM)
9253 y = XEXP (y, 1);
9254
9255 offset = NULL_RTX;
9256 if (GET_CODE (y) == PLUS
9257 && GET_MODE (y) == Pmode
9258 && CONST_INT_P (XEXP (y, 1)))
9259 {
9260 offset = XEXP (y, 1);
9261 y = XEXP (y, 0);
9262 }
9263
9264 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_TOCREL)
9265 {
9266 y = XVECEXP (y, 0, 0);
9267
9268 #ifdef HAVE_AS_TLS
9269 /* Do not associate thread-local symbols with the original
9270 constant pool symbol. */
9271 if (TARGET_XCOFF
9272 && SYMBOL_REF_P (y)
9273 && CONSTANT_POOL_ADDRESS_P (y)
9274 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
9275 return orig_x;
9276 #endif
9277
9278 if (offset != NULL_RTX)
9279 y = gen_rtx_PLUS (Pmode, y, offset);
9280 if (!MEM_P (orig_x))
9281 return y;
9282 else
9283 return replace_equiv_address_nv (orig_x, y);
9284 }
9285
9286 if (TARGET_MACHO
9287 && GET_CODE (orig_x) == LO_SUM
9288 && GET_CODE (XEXP (orig_x, 1)) == CONST)
9289 {
9290 y = XEXP (XEXP (orig_x, 1), 0);
9291 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
9292 return XVECEXP (y, 0, 0);
9293 }
9294
9295 return orig_x;
9296 }
9297
9298 /* Return true if X shouldn't be emitted into the debug info.
9299 The linker doesn't like .toc section references from
9300 .debug_* sections, so reject .toc section symbols. */
9301
9302 static bool
9303 rs6000_const_not_ok_for_debug_p (rtx x)
9304 {
9305 if (GET_CODE (x) == UNSPEC)
9306 return true;
9307 if (SYMBOL_REF_P (x)
9308 && CONSTANT_POOL_ADDRESS_P (x))
9309 {
9310 rtx c = get_pool_constant (x);
9311 machine_mode cmode = get_pool_mode (x);
9312 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
9313 return true;
9314 }
9315
9316 return false;
9317 }
9318
9319 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
9320
9321 static bool
9322 rs6000_legitimate_combined_insn (rtx_insn *insn)
9323 {
9324 int icode = INSN_CODE (insn);
9325
9326 /* Reject creating doloop insns. Combine should not be allowed
9327 to create these for a number of reasons:
9328 1) In a nested loop, if combine creates one of these in an
9329 outer loop and the register allocator happens to allocate ctr
9330 to the outer loop insn, then the inner loop can't use ctr.
9331 Inner loops ought to be more highly optimized.
9332 2) Combine often wants to create one of these from what was
9333 originally a three insn sequence, first combining the three
9334 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not
9335 allocated ctr, the splitter takes use back to the three insn
9336 sequence. It's better to stop combine at the two insn
9337 sequence.
9338 3) Faced with not being able to allocate ctr for ctrsi/crtdi
9339 insns, the register allocator sometimes uses floating point
9340 or vector registers for the pseudo. Since ctrsi/ctrdi is a
9341 jump insn and output reloads are not implemented for jumps,
9342 the ctrsi/ctrdi splitters need to handle all possible cases.
9343 That's a pain, and it gets to be seriously difficult when a
9344 splitter that runs after reload needs memory to transfer from
9345 a gpr to fpr. See PR70098 and PR71763 which are not fixed
9346 for the difficult case. It's better to not create problems
9347 in the first place. */
9348 if (icode != CODE_FOR_nothing
9349 && (icode == CODE_FOR_bdz_si
9350 || icode == CODE_FOR_bdz_di
9351 || icode == CODE_FOR_bdnz_si
9352 || icode == CODE_FOR_bdnz_di
9353 || icode == CODE_FOR_bdztf_si
9354 || icode == CODE_FOR_bdztf_di
9355 || icode == CODE_FOR_bdnztf_si
9356 || icode == CODE_FOR_bdnztf_di))
9357 return false;
9358
9359 return true;
9360 }
9361
9362 /* Construct the SYMBOL_REF for the tls_get_addr function. */
9363
9364 static GTY(()) rtx rs6000_tls_symbol;
9365 static rtx
9366 rs6000_tls_get_addr (void)
9367 {
9368 if (!rs6000_tls_symbol)
9369 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
9370
9371 return rs6000_tls_symbol;
9372 }
9373
9374 /* Construct the SYMBOL_REF for TLS GOT references. */
9375
9376 static GTY(()) rtx rs6000_got_symbol;
9377 rtx
9378 rs6000_got_sym (void)
9379 {
9380 if (!rs6000_got_symbol)
9381 {
9382 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
9383 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
9384 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
9385 }
9386
9387 return rs6000_got_symbol;
9388 }
9389
9390 /* AIX Thread-Local Address support. */
9391
9392 static rtx
9393 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
9394 {
9395 rtx sym, mem, tocref, tlsreg, tmpreg, dest;
9396 const char *name;
9397 char *tlsname;
9398
9399 /* Place addr into TOC constant pool. */
9400 sym = force_const_mem (GET_MODE (addr), addr);
9401
9402 /* Output the TOC entry and create the MEM referencing the value. */
9403 if (constant_pool_expr_p (XEXP (sym, 0))
9404 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
9405 {
9406 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
9407 mem = gen_const_mem (Pmode, tocref);
9408 set_mem_alias_set (mem, get_TOC_alias_set ());
9409 }
9410 else
9411 return sym;
9412
9413 /* Use global-dynamic for local-dynamic. */
9414 if (model == TLS_MODEL_GLOBAL_DYNAMIC
9415 || model == TLS_MODEL_LOCAL_DYNAMIC)
9416 {
9417 /* Create new TOC reference for @m symbol. */
9418 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
9419 tlsname = XALLOCAVEC (char, strlen (name) + 1);
9420 strcpy (tlsname, "*LCM");
9421 strcat (tlsname, name + 3);
9422 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
9423 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
9424 tocref = create_TOC_reference (modaddr, NULL_RTX);
9425 rtx modmem = gen_const_mem (Pmode, tocref);
9426 set_mem_alias_set (modmem, get_TOC_alias_set ());
9427
9428 rtx modreg = gen_reg_rtx (Pmode);
9429 emit_insn (gen_rtx_SET (modreg, modmem));
9430
9431 tmpreg = gen_reg_rtx (Pmode);
9432 emit_insn (gen_rtx_SET (tmpreg, mem));
9433
9434 dest = gen_reg_rtx (Pmode);
9435 if (TARGET_32BIT)
9436 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
9437 else
9438 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
9439 return dest;
9440 }
9441 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
9442 else if (TARGET_32BIT)
9443 {
9444 tlsreg = gen_reg_rtx (SImode);
9445 emit_insn (gen_tls_get_tpointer (tlsreg));
9446 }
9447 else
9448 {
9449 tlsreg = gen_rtx_REG (DImode, 13);
9450 xcoff_tls_exec_model_detected = true;
9451 }
9452
9453 /* Load the TOC value into temporary register. */
9454 tmpreg = gen_reg_rtx (Pmode);
9455 emit_insn (gen_rtx_SET (tmpreg, mem));
9456 set_unique_reg_note (get_last_insn (), REG_EQUAL,
9457 gen_rtx_MINUS (Pmode, addr, tlsreg));
9458
9459 /* Add TOC symbol value to TLS pointer. */
9460 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
9461
9462 return dest;
9463 }
9464
9465 /* Passes the tls arg value for global dynamic and local dynamic
9466 emit_library_call_value in rs6000_legitimize_tls_address to
9467 rs6000_call_aix and rs6000_call_sysv. This is used to emit the
9468 marker relocs put on __tls_get_addr calls. */
9469 static rtx global_tlsarg;
9470
9471 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
9472 this (thread-local) address. */
9473
9474 static rtx
9475 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
9476 {
9477 rtx dest, insn;
9478
9479 if (TARGET_XCOFF)
9480 return rs6000_legitimize_tls_address_aix (addr, model);
9481
9482 dest = gen_reg_rtx (Pmode);
9483 if (model == TLS_MODEL_LOCAL_EXEC
9484 && (rs6000_tls_size == 16 || rs6000_pcrel_p ()))
9485 {
9486 rtx tlsreg;
9487
9488 if (TARGET_64BIT)
9489 {
9490 tlsreg = gen_rtx_REG (Pmode, 13);
9491 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
9492 }
9493 else
9494 {
9495 tlsreg = gen_rtx_REG (Pmode, 2);
9496 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
9497 }
9498 emit_insn (insn);
9499 }
9500 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
9501 {
9502 rtx tlsreg, tmp;
9503
9504 tmp = gen_reg_rtx (Pmode);
9505 if (TARGET_64BIT)
9506 {
9507 tlsreg = gen_rtx_REG (Pmode, 13);
9508 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
9509 }
9510 else
9511 {
9512 tlsreg = gen_rtx_REG (Pmode, 2);
9513 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
9514 }
9515 emit_insn (insn);
9516 if (TARGET_64BIT)
9517 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
9518 else
9519 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
9520 emit_insn (insn);
9521 }
9522 else
9523 {
9524 rtx got, tga, tmp1, tmp2;
9525
9526 /* We currently use relocations like @got@tlsgd for tls, which
9527 means the linker will handle allocation of tls entries, placing
9528 them in the .got section. So use a pointer to the .got section,
9529 not one to secondary TOC sections used by 64-bit -mminimal-toc,
9530 or to secondary GOT sections used by 32-bit -fPIC. */
9531 if (rs6000_pcrel_p ())
9532 got = const0_rtx;
9533 else if (TARGET_64BIT)
9534 got = gen_rtx_REG (Pmode, 2);
9535 else
9536 {
9537 if (flag_pic == 1)
9538 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
9539 else
9540 {
9541 rtx gsym = rs6000_got_sym ();
9542 got = gen_reg_rtx (Pmode);
9543 if (flag_pic == 0)
9544 rs6000_emit_move (got, gsym, Pmode);
9545 else
9546 {
9547 rtx mem, lab;
9548
9549 tmp1 = gen_reg_rtx (Pmode);
9550 tmp2 = gen_reg_rtx (Pmode);
9551 mem = gen_const_mem (Pmode, tmp1);
9552 lab = gen_label_rtx ();
9553 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
9554 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
9555 if (TARGET_LINK_STACK)
9556 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
9557 emit_move_insn (tmp2, mem);
9558 rtx_insn *last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
9559 set_unique_reg_note (last, REG_EQUAL, gsym);
9560 }
9561 }
9562 }
9563
9564 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
9565 {
9566 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addr, got),
9567 UNSPEC_TLSGD);
9568 tga = rs6000_tls_get_addr ();
9569 rtx argreg = gen_rtx_REG (Pmode, 3);
9570 emit_insn (gen_rtx_SET (argreg, arg));
9571 global_tlsarg = arg;
9572 emit_library_call_value (tga, dest, LCT_CONST, Pmode, argreg, Pmode);
9573 global_tlsarg = NULL_RTX;
9574
9575 /* Make a note so that the result of this call can be CSEd. */
9576 rtvec vec = gen_rtvec (1, copy_rtx (arg));
9577 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
9578 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
9579 }
9580 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
9581 {
9582 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got), UNSPEC_TLSLD);
9583 tga = rs6000_tls_get_addr ();
9584 tmp1 = gen_reg_rtx (Pmode);
9585 rtx argreg = gen_rtx_REG (Pmode, 3);
9586 emit_insn (gen_rtx_SET (argreg, arg));
9587 global_tlsarg = arg;
9588 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode, argreg, Pmode);
9589 global_tlsarg = NULL_RTX;
9590
9591 /* Make a note so that the result of this call can be CSEd. */
9592 rtvec vec = gen_rtvec (1, copy_rtx (arg));
9593 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
9594 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
9595
9596 if (rs6000_tls_size == 16 || rs6000_pcrel_p ())
9597 {
9598 if (TARGET_64BIT)
9599 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
9600 else
9601 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
9602 }
9603 else if (rs6000_tls_size == 32)
9604 {
9605 tmp2 = gen_reg_rtx (Pmode);
9606 if (TARGET_64BIT)
9607 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
9608 else
9609 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
9610 emit_insn (insn);
9611 if (TARGET_64BIT)
9612 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
9613 else
9614 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
9615 }
9616 else
9617 {
9618 tmp2 = gen_reg_rtx (Pmode);
9619 if (TARGET_64BIT)
9620 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
9621 else
9622 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
9623 emit_insn (insn);
9624 insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
9625 }
9626 emit_insn (insn);
9627 }
9628 else
9629 {
9630 /* IE, or 64-bit offset LE. */
9631 tmp2 = gen_reg_rtx (Pmode);
9632 if (TARGET_64BIT)
9633 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
9634 else
9635 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
9636 emit_insn (insn);
9637 if (rs6000_pcrel_p ())
9638 {
9639 if (TARGET_64BIT)
9640 insn = gen_tls_tls_pcrel_64 (dest, tmp2, addr);
9641 else
9642 insn = gen_tls_tls_pcrel_32 (dest, tmp2, addr);
9643 }
9644 else if (TARGET_64BIT)
9645 insn = gen_tls_tls_64 (dest, tmp2, addr);
9646 else
9647 insn = gen_tls_tls_32 (dest, tmp2, addr);
9648 emit_insn (insn);
9649 }
9650 }
9651
9652 return dest;
9653 }
9654
9655 /* Only create the global variable for the stack protect guard if we are using
9656 the global flavor of that guard. */
9657 static tree
9658 rs6000_init_stack_protect_guard (void)
9659 {
9660 if (rs6000_stack_protector_guard == SSP_GLOBAL)
9661 return default_stack_protect_guard ();
9662
9663 return NULL_TREE;
9664 }
9665
9666 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9667
9668 static bool
9669 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9670 {
9671 if (GET_CODE (x) == HIGH
9672 && GET_CODE (XEXP (x, 0)) == UNSPEC)
9673 return true;
9674
9675 /* A TLS symbol in the TOC cannot contain a sum. */
9676 if (GET_CODE (x) == CONST
9677 && GET_CODE (XEXP (x, 0)) == PLUS
9678 && SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
9679 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
9680 return true;
9681
9682 /* Allow AIX TOC TLS symbols in the constant pool,
9683 but not ELF TLS symbols. */
9684 return TARGET_ELF && tls_referenced_p (x);
9685 }
9686
9687 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
9688 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
9689 can be addressed relative to the toc pointer. */
9690
9691 static bool
9692 use_toc_relative_ref (rtx sym, machine_mode mode)
9693 {
9694 return ((constant_pool_expr_p (sym)
9695 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
9696 get_pool_mode (sym)))
9697 || (TARGET_CMODEL == CMODEL_MEDIUM
9698 && SYMBOL_REF_LOCAL_P (sym)
9699 && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
9700 }
9701
9702 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
9703 that is a valid memory address for an instruction.
9704 The MODE argument is the machine mode for the MEM expression
9705 that wants to use this address.
9706
9707 On the RS/6000, there are four valid address: a SYMBOL_REF that
9708 refers to a constant pool entry of an address (or the sum of it
9709 plus a constant), a short (16-bit signed) constant plus a register,
9710 the sum of two registers, or a register indirect, possibly with an
9711 auto-increment. For DFmode, DDmode and DImode with a constant plus
9712 register, we must ensure that both words are addressable or PowerPC64
9713 with offset word aligned.
9714
9715 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
9716 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
9717 because adjacent memory cells are accessed by adding word-sized offsets
9718 during assembly output. */
9719 static bool
9720 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
9721 {
9722 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
9723 bool quad_offset_p = mode_supports_dq_form (mode);
9724
9725 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
9726 return 0;
9727
9728 /* Handle unaligned altivec lvx/stvx type addresses. */
9729 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
9730 && GET_CODE (x) == AND
9731 && CONST_INT_P (XEXP (x, 1))
9732 && INTVAL (XEXP (x, 1)) == -16)
9733 {
9734 x = XEXP (x, 0);
9735 return (legitimate_indirect_address_p (x, reg_ok_strict)
9736 || legitimate_indexed_address_p (x, reg_ok_strict)
9737 || virtual_stack_registers_memory_p (x));
9738 }
9739
9740 if (legitimate_indirect_address_p (x, reg_ok_strict))
9741 return 1;
9742 if (TARGET_UPDATE
9743 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
9744 && mode_supports_pre_incdec_p (mode)
9745 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
9746 return 1;
9747
9748 /* Handle prefixed addresses (PC-relative or 34-bit offset). */
9749 if (address_is_prefixed (x, mode, NON_PREFIXED_DEFAULT))
9750 return 1;
9751
9752 /* Handle restricted vector d-form offsets in ISA 3.0. */
9753 if (quad_offset_p)
9754 {
9755 if (quad_address_p (x, mode, reg_ok_strict))
9756 return 1;
9757 }
9758 else if (virtual_stack_registers_memory_p (x))
9759 return 1;
9760
9761 else if (reg_offset_p)
9762 {
9763 if (legitimate_small_data_p (mode, x))
9764 return 1;
9765 if (legitimate_constant_pool_address_p (x, mode,
9766 reg_ok_strict || lra_in_progress))
9767 return 1;
9768 }
9769
9770 /* For TImode, if we have TImode in VSX registers, only allow register
9771 indirect addresses. This will allow the values to go in either GPRs
9772 or VSX registers without reloading. The vector types would tend to
9773 go into VSX registers, so we allow REG+REG, while TImode seems
9774 somewhat split, in that some uses are GPR based, and some VSX based. */
9775 /* FIXME: We could loosen this by changing the following to
9776 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX)
9777 but currently we cannot allow REG+REG addressing for TImode. See
9778 PR72827 for complete details on how this ends up hoodwinking DSE. */
9779 if (mode == TImode && TARGET_VSX)
9780 return 0;
9781 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
9782 if (! reg_ok_strict
9783 && reg_offset_p
9784 && GET_CODE (x) == PLUS
9785 && REG_P (XEXP (x, 0))
9786 && (XEXP (x, 0) == virtual_stack_vars_rtx
9787 || XEXP (x, 0) == arg_pointer_rtx)
9788 && CONST_INT_P (XEXP (x, 1)))
9789 return 1;
9790 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
9791 return 1;
9792 if (!FLOAT128_2REG_P (mode)
9793 && (TARGET_HARD_FLOAT
9794 || TARGET_POWERPC64
9795 || (mode != DFmode && mode != DDmode))
9796 && (TARGET_POWERPC64 || mode != DImode)
9797 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
9798 && mode != PTImode
9799 && !avoiding_indexed_address_p (mode)
9800 && legitimate_indexed_address_p (x, reg_ok_strict))
9801 return 1;
9802 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
9803 && mode_supports_pre_modify_p (mode)
9804 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
9805 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
9806 reg_ok_strict, false)
9807 || (!avoiding_indexed_address_p (mode)
9808 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
9809 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
9810 {
9811 /* There is no prefixed version of the load/store with update. */
9812 rtx addr = XEXP (x, 1);
9813 return !address_is_prefixed (addr, mode, NON_PREFIXED_DEFAULT);
9814 }
9815 if (reg_offset_p && !quad_offset_p
9816 && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
9817 return 1;
9818 return 0;
9819 }
9820
9821 /* Debug version of rs6000_legitimate_address_p. */
9822 static bool
9823 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
9824 bool reg_ok_strict)
9825 {
9826 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
9827 fprintf (stderr,
9828 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
9829 "strict = %d, reload = %s, code = %s\n",
9830 ret ? "true" : "false",
9831 GET_MODE_NAME (mode),
9832 reg_ok_strict,
9833 (reload_completed ? "after" : "before"),
9834 GET_RTX_NAME (GET_CODE (x)));
9835 debug_rtx (x);
9836
9837 return ret;
9838 }
9839
9840 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
9841
9842 static bool
9843 rs6000_mode_dependent_address_p (const_rtx addr,
9844 addr_space_t as ATTRIBUTE_UNUSED)
9845 {
9846 return rs6000_mode_dependent_address_ptr (addr);
9847 }
9848
9849 /* Go to LABEL if ADDR (a legitimate address expression)
9850 has an effect that depends on the machine mode it is used for.
9851
9852 On the RS/6000 this is true of all integral offsets (since AltiVec
9853 and VSX modes don't allow them) or is a pre-increment or decrement.
9854
9855 ??? Except that due to conceptual problems in offsettable_address_p
9856 we can't really report the problems of integral offsets. So leave
9857 this assuming that the adjustable offset must be valid for the
9858 sub-words of a TFmode operand, which is what we had before. */
9859
9860 static bool
9861 rs6000_mode_dependent_address (const_rtx addr)
9862 {
9863 switch (GET_CODE (addr))
9864 {
9865 case PLUS:
9866 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
9867 is considered a legitimate address before reload, so there
9868 are no offset restrictions in that case. Note that this
9869 condition is safe in strict mode because any address involving
9870 virtual_stack_vars_rtx or arg_pointer_rtx would already have
9871 been rejected as illegitimate. */
9872 if (XEXP (addr, 0) != virtual_stack_vars_rtx
9873 && XEXP (addr, 0) != arg_pointer_rtx
9874 && CONST_INT_P (XEXP (addr, 1)))
9875 {
9876 HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
9877 HOST_WIDE_INT extra = TARGET_POWERPC64 ? 8 : 12;
9878 if (TARGET_PREFIXED)
9879 return !SIGNED_34BIT_OFFSET_EXTRA_P (val, extra);
9880 else
9881 return !SIGNED_16BIT_OFFSET_EXTRA_P (val, extra);
9882 }
9883 break;
9884
9885 case LO_SUM:
9886 /* Anything in the constant pool is sufficiently aligned that
9887 all bytes have the same high part address. */
9888 return !legitimate_constant_pool_address_p (addr, QImode, false);
9889
9890 /* Auto-increment cases are now treated generically in recog.cc. */
9891 case PRE_MODIFY:
9892 return TARGET_UPDATE;
9893
9894 /* AND is only allowed in Altivec loads. */
9895 case AND:
9896 return true;
9897
9898 default:
9899 break;
9900 }
9901
9902 return false;
9903 }
9904
9905 /* Debug version of rs6000_mode_dependent_address. */
9906 static bool
9907 rs6000_debug_mode_dependent_address (const_rtx addr)
9908 {
9909 bool ret = rs6000_mode_dependent_address (addr);
9910
9911 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
9912 ret ? "true" : "false");
9913 debug_rtx (addr);
9914
9915 return ret;
9916 }
9917
9918 /* Implement FIND_BASE_TERM. */
9919
9920 rtx
9921 rs6000_find_base_term (rtx op)
9922 {
9923 rtx base;
9924
9925 base = op;
9926 if (GET_CODE (base) == CONST)
9927 base = XEXP (base, 0);
9928 if (GET_CODE (base) == PLUS)
9929 base = XEXP (base, 0);
9930 if (GET_CODE (base) == UNSPEC)
9931 switch (XINT (base, 1))
9932 {
9933 case UNSPEC_TOCREL:
9934 case UNSPEC_MACHOPIC_OFFSET:
9935 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
9936 for aliasing purposes. */
9937 return XVECEXP (base, 0, 0);
9938 }
9939
9940 return op;
9941 }
9942
9943 /* More elaborate version of recog's offsettable_memref_p predicate
9944 that works around the ??? note of rs6000_mode_dependent_address.
9945 In particular it accepts
9946
9947 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
9948
9949 in 32-bit mode, that the recog predicate rejects. */
9950
9951 static bool
9952 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode, bool strict)
9953 {
9954 bool worst_case;
9955
9956 if (!MEM_P (op))
9957 return false;
9958
9959 /* First mimic offsettable_memref_p. */
9960 if (offsettable_address_p (strict, GET_MODE (op), XEXP (op, 0)))
9961 return true;
9962
9963 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
9964 the latter predicate knows nothing about the mode of the memory
9965 reference and, therefore, assumes that it is the largest supported
9966 mode (TFmode). As a consequence, legitimate offsettable memory
9967 references are rejected. rs6000_legitimate_offset_address_p contains
9968 the correct logic for the PLUS case of rs6000_mode_dependent_address,
9969 at least with a little bit of help here given that we know the
9970 actual registers used. */
9971 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
9972 || GET_MODE_SIZE (reg_mode) == 4);
9973 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
9974 strict, worst_case);
9975 }
9976
9977 /* Determine the reassociation width to be used in reassociate_bb.
9978 This takes into account how many parallel operations we
9979 can actually do of a given type, and also the latency.
9980 P8:
9981 int add/sub 6/cycle
9982 mul 2/cycle
9983 vect add/sub/mul 2/cycle
9984 fp add/sub/mul 2/cycle
9985 dfp 1/cycle
9986 */
9987
9988 static int
9989 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
9990 machine_mode mode)
9991 {
9992 switch (rs6000_tune)
9993 {
9994 case PROCESSOR_POWER8:
9995 case PROCESSOR_POWER9:
9996 case PROCESSOR_POWER10:
9997 if (DECIMAL_FLOAT_MODE_P (mode))
9998 return 1;
9999 if (VECTOR_MODE_P (mode))
10000 return 4;
10001 if (INTEGRAL_MODE_P (mode))
10002 return 1;
10003 if (FLOAT_MODE_P (mode))
10004 return 4;
10005 break;
10006 default:
10007 break;
10008 }
10009 return 1;
10010 }
10011
10012 /* Change register usage conditional on target flags. */
10013 static void
10014 rs6000_conditional_register_usage (void)
10015 {
10016 int i;
10017
10018 if (TARGET_DEBUG_TARGET)
10019 fprintf (stderr, "rs6000_conditional_register_usage called\n");
10020
10021 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
10022 if (TARGET_64BIT)
10023 fixed_regs[13] = call_used_regs[13] = 1;
10024
10025 /* Conditionally disable FPRs. */
10026 if (TARGET_SOFT_FLOAT)
10027 for (i = 32; i < 64; i++)
10028 fixed_regs[i] = call_used_regs[i] = 1;
10029
10030 /* The TOC register is not killed across calls in a way that is
10031 visible to the compiler. */
10032 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
10033 call_used_regs[2] = 0;
10034
10035 if (DEFAULT_ABI == ABI_V4 && flag_pic == 2)
10036 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10037
10038 if (DEFAULT_ABI == ABI_V4 && flag_pic == 1)
10039 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10040 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10041
10042 if (DEFAULT_ABI == ABI_DARWIN && flag_pic)
10043 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10044 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10045
10046 if (TARGET_TOC && TARGET_MINIMAL_TOC)
10047 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10048
10049 if (!TARGET_ALTIVEC && !TARGET_VSX)
10050 {
10051 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
10052 fixed_regs[i] = call_used_regs[i] = 1;
10053 call_used_regs[VRSAVE_REGNO] = 1;
10054 }
10055
10056 if (TARGET_ALTIVEC || TARGET_VSX)
10057 global_regs[VSCR_REGNO] = 1;
10058
10059 if (TARGET_ALTIVEC_ABI)
10060 {
10061 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
10062 call_used_regs[i] = 1;
10063
10064 /* AIX reserves VR20:31 in non-extended ABI mode. */
10065 if (TARGET_XCOFF && !rs6000_aix_extabi)
10066 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
10067 fixed_regs[i] = call_used_regs[i] = 1;
10068 }
10069 }
10070
10071 \f
10072 /* Output insns to set DEST equal to the constant SOURCE as a series of
10073 lis, ori and shl instructions and return TRUE. */
10074
10075 bool
10076 rs6000_emit_set_const (rtx dest, rtx source)
10077 {
10078 machine_mode mode = GET_MODE (dest);
10079 rtx temp, set;
10080 rtx_insn *insn;
10081 HOST_WIDE_INT c;
10082
10083 gcc_checking_assert (CONST_INT_P (source));
10084 c = INTVAL (source);
10085 switch (mode)
10086 {
10087 case E_QImode:
10088 case E_HImode:
10089 emit_insn (gen_rtx_SET (dest, source));
10090 return true;
10091
10092 case E_SImode:
10093 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
10094
10095 emit_insn (gen_rtx_SET (copy_rtx (temp),
10096 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
10097 emit_insn (gen_rtx_SET (dest,
10098 gen_rtx_IOR (SImode, copy_rtx (temp),
10099 GEN_INT (c & 0xffff))));
10100 break;
10101
10102 case E_DImode:
10103 if (!TARGET_POWERPC64)
10104 {
10105 rtx hi, lo;
10106
10107 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
10108 DImode);
10109 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
10110 DImode);
10111 emit_move_insn (hi, GEN_INT (c >> 32));
10112 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
10113 emit_move_insn (lo, GEN_INT (c));
10114 }
10115 else
10116 rs6000_emit_set_long_const (dest, c);
10117 break;
10118
10119 default:
10120 gcc_unreachable ();
10121 }
10122
10123 insn = get_last_insn ();
10124 set = single_set (insn);
10125 if (! CONSTANT_P (SET_SRC (set)))
10126 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
10127
10128 return true;
10129 }
10130
10131 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
10132 Output insns to set DEST equal to the constant C as a series of
10133 lis, ori and shl instructions. */
10134
10135 static void
10136 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
10137 {
10138 rtx temp;
10139 HOST_WIDE_INT ud1, ud2, ud3, ud4;
10140
10141 ud1 = c & 0xffff;
10142 c = c >> 16;
10143 ud2 = c & 0xffff;
10144 c = c >> 16;
10145 ud3 = c & 0xffff;
10146 c = c >> 16;
10147 ud4 = c & 0xffff;
10148
10149 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
10150 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
10151 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
10152
10153 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
10154 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
10155 {
10156 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10157
10158 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
10159 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
10160 if (ud1 != 0)
10161 emit_move_insn (dest,
10162 gen_rtx_IOR (DImode, copy_rtx (temp),
10163 GEN_INT (ud1)));
10164 }
10165 else if (ud3 == 0 && ud4 == 0)
10166 {
10167 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10168
10169 gcc_assert (ud2 & 0x8000);
10170 emit_move_insn (copy_rtx (temp),
10171 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
10172 if (ud1 != 0)
10173 emit_move_insn (copy_rtx (temp),
10174 gen_rtx_IOR (DImode, copy_rtx (temp),
10175 GEN_INT (ud1)));
10176 emit_move_insn (dest,
10177 gen_rtx_ZERO_EXTEND (DImode,
10178 gen_lowpart (SImode,
10179 copy_rtx (temp))));
10180 }
10181 else if (ud1 == ud3 && ud2 == ud4)
10182 {
10183 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10184 HOST_WIDE_INT num = (ud2 << 16) | ud1;
10185 rs6000_emit_set_long_const (temp, (num ^ 0x80000000) - 0x80000000);
10186 rtx one = gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff));
10187 rtx two = gen_rtx_ASHIFT (DImode, temp, GEN_INT (32));
10188 emit_move_insn (dest, gen_rtx_IOR (DImode, one, two));
10189 }
10190 else if ((ud4 == 0xffff && (ud3 & 0x8000))
10191 || (ud4 == 0 && ! (ud3 & 0x8000)))
10192 {
10193 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10194
10195 emit_move_insn (copy_rtx (temp),
10196 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
10197 if (ud2 != 0)
10198 emit_move_insn (copy_rtx (temp),
10199 gen_rtx_IOR (DImode, copy_rtx (temp),
10200 GEN_INT (ud2)));
10201 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
10202 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
10203 GEN_INT (16)));
10204 if (ud1 != 0)
10205 emit_move_insn (dest,
10206 gen_rtx_IOR (DImode, copy_rtx (temp),
10207 GEN_INT (ud1)));
10208 }
10209 else
10210 {
10211 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10212
10213 emit_move_insn (copy_rtx (temp),
10214 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
10215 if (ud3 != 0)
10216 emit_move_insn (copy_rtx (temp),
10217 gen_rtx_IOR (DImode, copy_rtx (temp),
10218 GEN_INT (ud3)));
10219
10220 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
10221 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
10222 GEN_INT (32)));
10223 if (ud2 != 0)
10224 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
10225 gen_rtx_IOR (DImode, copy_rtx (temp),
10226 GEN_INT (ud2 << 16)));
10227 if (ud1 != 0)
10228 emit_move_insn (dest,
10229 gen_rtx_IOR (DImode, copy_rtx (temp),
10230 GEN_INT (ud1)));
10231 }
10232 }
10233
10234 /* Helper for the following. Get rid of [r+r] memory refs
10235 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
10236
10237 static void
10238 rs6000_eliminate_indexed_memrefs (rtx operands[2])
10239 {
10240 if (MEM_P (operands[0])
10241 && !REG_P (XEXP (operands[0], 0))
10242 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
10243 GET_MODE (operands[0]), false))
10244 operands[0]
10245 = replace_equiv_address (operands[0],
10246 copy_addr_to_reg (XEXP (operands[0], 0)));
10247
10248 if (MEM_P (operands[1])
10249 && !REG_P (XEXP (operands[1], 0))
10250 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
10251 GET_MODE (operands[1]), false))
10252 operands[1]
10253 = replace_equiv_address (operands[1],
10254 copy_addr_to_reg (XEXP (operands[1], 0)));
10255 }
10256
10257 /* Generate a vector of constants to permute MODE for a little-endian
10258 storage operation by swapping the two halves of a vector. */
10259 static rtvec
10260 rs6000_const_vec (machine_mode mode)
10261 {
10262 int i, subparts;
10263 rtvec v;
10264
10265 switch (mode)
10266 {
10267 case E_V1TImode:
10268 subparts = 1;
10269 break;
10270 case E_V2DFmode:
10271 case E_V2DImode:
10272 subparts = 2;
10273 break;
10274 case E_V4SFmode:
10275 case E_V4SImode:
10276 subparts = 4;
10277 break;
10278 case E_V8HImode:
10279 subparts = 8;
10280 break;
10281 case E_V16QImode:
10282 subparts = 16;
10283 break;
10284 default:
10285 gcc_unreachable();
10286 }
10287
10288 v = rtvec_alloc (subparts);
10289
10290 for (i = 0; i < subparts / 2; ++i)
10291 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
10292 for (i = subparts / 2; i < subparts; ++i)
10293 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
10294
10295 return v;
10296 }
10297
10298 /* Emit an lxvd2x, stxvd2x, or xxpermdi instruction for a VSX load or
10299 store operation. */
10300 void
10301 rs6000_emit_le_vsx_permute (rtx dest, rtx source, machine_mode mode)
10302 {
10303 gcc_assert (!altivec_indexed_or_indirect_operand (dest, mode));
10304 gcc_assert (!altivec_indexed_or_indirect_operand (source, mode));
10305
10306 /* Scalar permutations are easier to express in integer modes rather than
10307 floating-point modes, so cast them here. We use V1TImode instead
10308 of TImode to ensure that the values don't go through GPRs. */
10309 if (FLOAT128_VECTOR_P (mode))
10310 {
10311 dest = gen_lowpart (V1TImode, dest);
10312 source = gen_lowpart (V1TImode, source);
10313 mode = V1TImode;
10314 }
10315
10316 /* Use ROTATE instead of VEC_SELECT if the mode contains only a single
10317 scalar. */
10318 if (mode == TImode || mode == V1TImode)
10319 emit_insn (gen_rtx_SET (dest, gen_rtx_ROTATE (mode, source,
10320 GEN_INT (64))));
10321 else
10322 {
10323 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
10324 emit_insn (gen_rtx_SET (dest, gen_rtx_VEC_SELECT (mode, source, par)));
10325 }
10326 }
10327
10328 /* Emit a little-endian load from vector memory location SOURCE to VSX
10329 register DEST in mode MODE. The load is done with two permuting
10330 insn's that represent an lxvd2x and xxpermdi. */
10331 void
10332 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
10333 {
10334 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
10335 V1TImode). */
10336 if (mode == TImode || mode == V1TImode)
10337 {
10338 mode = V2DImode;
10339 dest = gen_lowpart (V2DImode, dest);
10340 source = adjust_address (source, V2DImode, 0);
10341 }
10342
10343 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
10344 rs6000_emit_le_vsx_permute (tmp, source, mode);
10345 rs6000_emit_le_vsx_permute (dest, tmp, mode);
10346 }
10347
10348 /* Emit a little-endian store to vector memory location DEST from VSX
10349 register SOURCE in mode MODE. The store is done with two permuting
10350 insn's that represent an xxpermdi and an stxvd2x. */
10351 void
10352 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
10353 {
10354 /* This should never be called after LRA. */
10355 gcc_assert (can_create_pseudo_p ());
10356
10357 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
10358 V1TImode). */
10359 if (mode == TImode || mode == V1TImode)
10360 {
10361 mode = V2DImode;
10362 dest = adjust_address (dest, V2DImode, 0);
10363 source = gen_lowpart (V2DImode, source);
10364 }
10365
10366 rtx tmp = gen_reg_rtx_and_attrs (source);
10367 rs6000_emit_le_vsx_permute (tmp, source, mode);
10368 rs6000_emit_le_vsx_permute (dest, tmp, mode);
10369 }
10370
10371 /* Emit a sequence representing a little-endian VSX load or store,
10372 moving data from SOURCE to DEST in mode MODE. This is done
10373 separately from rs6000_emit_move to ensure it is called only
10374 during expand. LE VSX loads and stores introduced later are
10375 handled with a split. The expand-time RTL generation allows
10376 us to optimize away redundant pairs of register-permutes. */
10377 void
10378 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
10379 {
10380 gcc_assert (!BYTES_BIG_ENDIAN
10381 && VECTOR_MEM_VSX_P (mode)
10382 && !TARGET_P9_VECTOR
10383 && !gpr_or_gpr_p (dest, source)
10384 && (MEM_P (source) ^ MEM_P (dest)));
10385
10386 if (MEM_P (source))
10387 {
10388 gcc_assert (REG_P (dest) || SUBREG_P (dest));
10389 rs6000_emit_le_vsx_load (dest, source, mode);
10390 }
10391 else
10392 {
10393 if (!REG_P (source))
10394 source = force_reg (mode, source);
10395 rs6000_emit_le_vsx_store (dest, source, mode);
10396 }
10397 }
10398
10399 /* Return whether a SFmode or SImode move can be done without converting one
10400 mode to another. This arrises when we have:
10401
10402 (SUBREG:SF (REG:SI ...))
10403 (SUBREG:SI (REG:SF ...))
10404
10405 and one of the values is in a floating point/vector register, where SFmode
10406 scalars are stored in DFmode format. */
10407
10408 bool
10409 valid_sf_si_move (rtx dest, rtx src, machine_mode mode)
10410 {
10411 if (TARGET_ALLOW_SF_SUBREG)
10412 return true;
10413
10414 if (mode != SFmode && GET_MODE_CLASS (mode) != MODE_INT)
10415 return true;
10416
10417 if (!SUBREG_P (src) || !sf_subreg_operand (src, mode))
10418 return true;
10419
10420 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */
10421 if (SUBREG_P (dest))
10422 {
10423 rtx dest_subreg = SUBREG_REG (dest);
10424 rtx src_subreg = SUBREG_REG (src);
10425 return GET_MODE (dest_subreg) == GET_MODE (src_subreg);
10426 }
10427
10428 return false;
10429 }
10430
10431
10432 /* Helper function to change moves with:
10433
10434 (SUBREG:SF (REG:SI)) and
10435 (SUBREG:SI (REG:SF))
10436
10437 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode
10438 values are stored as DFmode values in the VSX registers. We need to convert
10439 the bits before we can use a direct move or operate on the bits in the
10440 vector register as an integer type.
10441
10442 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */
10443
10444 static bool
10445 rs6000_emit_move_si_sf_subreg (rtx dest, rtx source, machine_mode mode)
10446 {
10447 if (TARGET_DIRECT_MOVE_64BIT && !reload_completed
10448 && (!SUBREG_P (dest) || !sf_subreg_operand (dest, mode))
10449 && SUBREG_P (source) && sf_subreg_operand (source, mode))
10450 {
10451 rtx inner_source = SUBREG_REG (source);
10452 machine_mode inner_mode = GET_MODE (inner_source);
10453
10454 if (mode == SImode && inner_mode == SFmode)
10455 {
10456 emit_insn (gen_movsi_from_sf (dest, inner_source));
10457 return true;
10458 }
10459
10460 if (mode == SFmode && inner_mode == SImode)
10461 {
10462 emit_insn (gen_movsf_from_si (dest, inner_source));
10463 return true;
10464 }
10465 }
10466
10467 return false;
10468 }
10469
10470 /* Emit a move from SOURCE to DEST in mode MODE. */
10471 void
10472 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
10473 {
10474 rtx operands[2];
10475 operands[0] = dest;
10476 operands[1] = source;
10477
10478 if (TARGET_DEBUG_ADDR)
10479 {
10480 fprintf (stderr,
10481 "\nrs6000_emit_move: mode = %s, lra_in_progress = %d, "
10482 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
10483 GET_MODE_NAME (mode),
10484 lra_in_progress,
10485 reload_completed,
10486 can_create_pseudo_p ());
10487 debug_rtx (dest);
10488 fprintf (stderr, "source:\n");
10489 debug_rtx (source);
10490 }
10491
10492 /* Check that we get CONST_WIDE_INT only when we should. */
10493 if (CONST_WIDE_INT_P (operands[1])
10494 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
10495 gcc_unreachable ();
10496
10497 #ifdef HAVE_AS_GNU_ATTRIBUTE
10498 /* If we use a long double type, set the flags in .gnu_attribute that say
10499 what the long double type is. This is to allow the linker's warning
10500 message for the wrong long double to be useful, even if the function does
10501 not do a call (for example, doing a 128-bit add on power9 if the long
10502 double type is IEEE 128-bit. Do not set this if __ibm128 or __floa128 are
10503 used if they aren't the default long dobule type. */
10504 if (rs6000_gnu_attr && (HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT))
10505 {
10506 if (TARGET_LONG_DOUBLE_128 && (mode == TFmode || mode == TCmode))
10507 rs6000_passes_float = rs6000_passes_long_double = true;
10508
10509 else if (!TARGET_LONG_DOUBLE_128 && (mode == DFmode || mode == DCmode))
10510 rs6000_passes_float = rs6000_passes_long_double = true;
10511 }
10512 #endif
10513
10514 /* See if we need to special case SImode/SFmode SUBREG moves. */
10515 if ((mode == SImode || mode == SFmode) && SUBREG_P (source)
10516 && rs6000_emit_move_si_sf_subreg (dest, source, mode))
10517 return;
10518
10519 /* Check if GCC is setting up a block move that will end up using FP
10520 registers as temporaries. We must make sure this is acceptable. */
10521 if (MEM_P (operands[0])
10522 && MEM_P (operands[1])
10523 && mode == DImode
10524 && (rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[0]))
10525 || rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[1])))
10526 && ! (rs6000_slow_unaligned_access (SImode,
10527 (MEM_ALIGN (operands[0]) > 32
10528 ? 32 : MEM_ALIGN (operands[0])))
10529 || rs6000_slow_unaligned_access (SImode,
10530 (MEM_ALIGN (operands[1]) > 32
10531 ? 32 : MEM_ALIGN (operands[1]))))
10532 && ! MEM_VOLATILE_P (operands [0])
10533 && ! MEM_VOLATILE_P (operands [1]))
10534 {
10535 emit_move_insn (adjust_address (operands[0], SImode, 0),
10536 adjust_address (operands[1], SImode, 0));
10537 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
10538 adjust_address (copy_rtx (operands[1]), SImode, 4));
10539 return;
10540 }
10541
10542 if (can_create_pseudo_p () && MEM_P (operands[0])
10543 && !gpc_reg_operand (operands[1], mode))
10544 operands[1] = force_reg (mode, operands[1]);
10545
10546 /* Recognize the case where operand[1] is a reference to thread-local
10547 data and load its address to a register. */
10548 if (tls_referenced_p (operands[1]))
10549 {
10550 enum tls_model model;
10551 rtx tmp = operands[1];
10552 rtx addend = NULL;
10553
10554 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
10555 {
10556 addend = XEXP (XEXP (tmp, 0), 1);
10557 tmp = XEXP (XEXP (tmp, 0), 0);
10558 }
10559
10560 gcc_assert (SYMBOL_REF_P (tmp));
10561 model = SYMBOL_REF_TLS_MODEL (tmp);
10562 gcc_assert (model != 0);
10563
10564 tmp = rs6000_legitimize_tls_address (tmp, model);
10565 if (addend)
10566 {
10567 tmp = gen_rtx_PLUS (mode, tmp, addend);
10568 tmp = force_operand (tmp, operands[0]);
10569 }
10570 operands[1] = tmp;
10571 }
10572
10573 /* 128-bit constant floating-point values on Darwin should really be loaded
10574 as two parts. However, this premature splitting is a problem when DFmode
10575 values can go into Altivec registers. */
10576 if (TARGET_MACHO && CONST_DOUBLE_P (operands[1]) && FLOAT128_IBM_P (mode)
10577 && !reg_addr[DFmode].scalar_in_vmx_p)
10578 {
10579 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
10580 simplify_gen_subreg (DFmode, operands[1], mode, 0),
10581 DFmode);
10582 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
10583 GET_MODE_SIZE (DFmode)),
10584 simplify_gen_subreg (DFmode, operands[1], mode,
10585 GET_MODE_SIZE (DFmode)),
10586 DFmode);
10587 return;
10588 }
10589
10590 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
10591 p1:SD) if p1 is not of floating point class and p0 is spilled as
10592 we can have no analogous movsd_store for this. */
10593 if (lra_in_progress && mode == DDmode
10594 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
10595 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
10596 && SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1]))
10597 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
10598 {
10599 enum reg_class cl;
10600 int regno = REGNO (SUBREG_REG (operands[1]));
10601
10602 if (!HARD_REGISTER_NUM_P (regno))
10603 {
10604 cl = reg_preferred_class (regno);
10605 regno = reg_renumber[regno];
10606 if (regno < 0)
10607 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
10608 }
10609 if (regno >= 0 && ! FP_REGNO_P (regno))
10610 {
10611 mode = SDmode;
10612 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
10613 operands[1] = SUBREG_REG (operands[1]);
10614 }
10615 }
10616 if (lra_in_progress
10617 && mode == SDmode
10618 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
10619 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
10620 && (REG_P (operands[1])
10621 || (SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1])))))
10622 {
10623 int regno = reg_or_subregno (operands[1]);
10624 enum reg_class cl;
10625
10626 if (!HARD_REGISTER_NUM_P (regno))
10627 {
10628 cl = reg_preferred_class (regno);
10629 gcc_assert (cl != NO_REGS);
10630 regno = reg_renumber[regno];
10631 if (regno < 0)
10632 regno = ira_class_hard_regs[cl][0];
10633 }
10634 if (FP_REGNO_P (regno))
10635 {
10636 if (GET_MODE (operands[0]) != DDmode)
10637 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
10638 emit_insn (gen_movsd_store (operands[0], operands[1]));
10639 }
10640 else if (INT_REGNO_P (regno))
10641 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
10642 else
10643 gcc_unreachable();
10644 return;
10645 }
10646 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
10647 p:DD)) if p0 is not of floating point class and p1 is spilled as
10648 we can have no analogous movsd_load for this. */
10649 if (lra_in_progress && mode == DDmode
10650 && SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))
10651 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
10652 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
10653 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
10654 {
10655 enum reg_class cl;
10656 int regno = REGNO (SUBREG_REG (operands[0]));
10657
10658 if (!HARD_REGISTER_NUM_P (regno))
10659 {
10660 cl = reg_preferred_class (regno);
10661 regno = reg_renumber[regno];
10662 if (regno < 0)
10663 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
10664 }
10665 if (regno >= 0 && ! FP_REGNO_P (regno))
10666 {
10667 mode = SDmode;
10668 operands[0] = SUBREG_REG (operands[0]);
10669 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
10670 }
10671 }
10672 if (lra_in_progress
10673 && mode == SDmode
10674 && (REG_P (operands[0])
10675 || (SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))))
10676 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
10677 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
10678 {
10679 int regno = reg_or_subregno (operands[0]);
10680 enum reg_class cl;
10681
10682 if (!HARD_REGISTER_NUM_P (regno))
10683 {
10684 cl = reg_preferred_class (regno);
10685 gcc_assert (cl != NO_REGS);
10686 regno = reg_renumber[regno];
10687 if (regno < 0)
10688 regno = ira_class_hard_regs[cl][0];
10689 }
10690 if (FP_REGNO_P (regno))
10691 {
10692 if (GET_MODE (operands[1]) != DDmode)
10693 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
10694 emit_insn (gen_movsd_load (operands[0], operands[1]));
10695 }
10696 else if (INT_REGNO_P (regno))
10697 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
10698 else
10699 gcc_unreachable();
10700 return;
10701 }
10702
10703 /* FIXME: In the long term, this switch statement should go away
10704 and be replaced by a sequence of tests based on things like
10705 mode == Pmode. */
10706 switch (mode)
10707 {
10708 case E_HImode:
10709 case E_QImode:
10710 if (CONSTANT_P (operands[1])
10711 && !CONST_INT_P (operands[1]))
10712 operands[1] = force_const_mem (mode, operands[1]);
10713 break;
10714
10715 case E_TFmode:
10716 case E_TDmode:
10717 case E_IFmode:
10718 case E_KFmode:
10719 if (FLOAT128_2REG_P (mode))
10720 rs6000_eliminate_indexed_memrefs (operands);
10721 /* fall through */
10722
10723 case E_DFmode:
10724 case E_DDmode:
10725 case E_SFmode:
10726 case E_SDmode:
10727 if (CONSTANT_P (operands[1])
10728 && ! easy_fp_constant (operands[1], mode))
10729 operands[1] = force_const_mem (mode, operands[1]);
10730 break;
10731
10732 case E_V16QImode:
10733 case E_V8HImode:
10734 case E_V4SFmode:
10735 case E_V4SImode:
10736 case E_V2DFmode:
10737 case E_V2DImode:
10738 case E_V1TImode:
10739 if (CONSTANT_P (operands[1])
10740 && !easy_vector_constant (operands[1], mode))
10741 operands[1] = force_const_mem (mode, operands[1]);
10742 break;
10743
10744 case E_OOmode:
10745 case E_XOmode:
10746 if (CONST_INT_P (operands[1]) && INTVAL (operands[1]) != 0)
10747 error ("%qs is an opaque type, and you cannot set it to other values",
10748 (mode == OOmode) ? "__vector_pair" : "__vector_quad");
10749 break;
10750
10751 case E_SImode:
10752 case E_DImode:
10753 /* Use default pattern for address of ELF small data */
10754 if (TARGET_ELF
10755 && mode == Pmode
10756 && DEFAULT_ABI == ABI_V4
10757 && (SYMBOL_REF_P (operands[1])
10758 || GET_CODE (operands[1]) == CONST)
10759 && small_data_operand (operands[1], mode))
10760 {
10761 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10762 return;
10763 }
10764
10765 /* Use the default pattern for loading up PC-relative addresses. */
10766 if (TARGET_PCREL && mode == Pmode
10767 && pcrel_local_or_external_address (operands[1], Pmode))
10768 {
10769 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10770 return;
10771 }
10772
10773 if (DEFAULT_ABI == ABI_V4
10774 && mode == Pmode && mode == SImode
10775 && flag_pic == 1 && got_operand (operands[1], mode))
10776 {
10777 emit_insn (gen_movsi_got (operands[0], operands[1]));
10778 return;
10779 }
10780
10781 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
10782 && TARGET_NO_TOC_OR_PCREL
10783 && ! flag_pic
10784 && mode == Pmode
10785 && CONSTANT_P (operands[1])
10786 && GET_CODE (operands[1]) != HIGH
10787 && !CONST_INT_P (operands[1]))
10788 {
10789 rtx target = (!can_create_pseudo_p ()
10790 ? operands[0]
10791 : gen_reg_rtx (mode));
10792
10793 /* If this is a function address on -mcall-aixdesc,
10794 convert it to the address of the descriptor. */
10795 if (DEFAULT_ABI == ABI_AIX
10796 && SYMBOL_REF_P (operands[1])
10797 && XSTR (operands[1], 0)[0] == '.')
10798 {
10799 const char *name = XSTR (operands[1], 0);
10800 rtx new_ref;
10801 while (*name == '.')
10802 name++;
10803 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
10804 CONSTANT_POOL_ADDRESS_P (new_ref)
10805 = CONSTANT_POOL_ADDRESS_P (operands[1]);
10806 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
10807 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
10808 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
10809 operands[1] = new_ref;
10810 }
10811
10812 if (DEFAULT_ABI == ABI_DARWIN)
10813 {
10814 #if TARGET_MACHO
10815 /* This is not PIC code, but could require the subset of
10816 indirections used by mdynamic-no-pic. */
10817 if (MACHO_DYNAMIC_NO_PIC_P)
10818 {
10819 /* Take care of any required data indirection. */
10820 operands[1] = rs6000_machopic_legitimize_pic_address (
10821 operands[1], mode, operands[0]);
10822 if (operands[0] != operands[1])
10823 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10824 return;
10825 }
10826 #endif
10827 emit_insn (gen_macho_high (Pmode, target, operands[1]));
10828 emit_insn (gen_macho_low (Pmode, operands[0],
10829 target, operands[1]));
10830 return;
10831 }
10832
10833 emit_insn (gen_elf_high (target, operands[1]));
10834 emit_insn (gen_elf_low (operands[0], target, operands[1]));
10835 return;
10836 }
10837
10838 /* If this is a SYMBOL_REF that refers to a constant pool entry,
10839 and we have put it in the TOC, we just need to make a TOC-relative
10840 reference to it. */
10841 if (TARGET_TOC
10842 && SYMBOL_REF_P (operands[1])
10843 && use_toc_relative_ref (operands[1], mode))
10844 operands[1] = create_TOC_reference (operands[1], operands[0]);
10845 else if (mode == Pmode
10846 && CONSTANT_P (operands[1])
10847 && GET_CODE (operands[1]) != HIGH
10848 && ((REG_P (operands[0])
10849 && FP_REGNO_P (REGNO (operands[0])))
10850 || !CONST_INT_P (operands[1])
10851 || (num_insns_constant (operands[1], mode)
10852 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
10853 && !toc_relative_expr_p (operands[1], false, NULL, NULL)
10854 && (TARGET_CMODEL == CMODEL_SMALL
10855 || can_create_pseudo_p ()
10856 || (REG_P (operands[0])
10857 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
10858 {
10859
10860 #if TARGET_MACHO
10861 /* Darwin uses a special PIC legitimizer. */
10862 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
10863 {
10864 operands[1] =
10865 rs6000_machopic_legitimize_pic_address (operands[1], mode,
10866 operands[0]);
10867 if (operands[0] != operands[1])
10868 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10869 return;
10870 }
10871 #endif
10872
10873 /* If we are to limit the number of things we put in the TOC and
10874 this is a symbol plus a constant we can add in one insn,
10875 just put the symbol in the TOC and add the constant. */
10876 if (GET_CODE (operands[1]) == CONST
10877 && TARGET_NO_SUM_IN_TOC
10878 && GET_CODE (XEXP (operands[1], 0)) == PLUS
10879 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
10880 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
10881 || SYMBOL_REF_P (XEXP (XEXP (operands[1], 0), 0)))
10882 && ! side_effects_p (operands[0]))
10883 {
10884 rtx sym =
10885 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
10886 rtx other = XEXP (XEXP (operands[1], 0), 1);
10887
10888 sym = force_reg (mode, sym);
10889 emit_insn (gen_add3_insn (operands[0], sym, other));
10890 return;
10891 }
10892
10893 operands[1] = force_const_mem (mode, operands[1]);
10894
10895 if (TARGET_TOC
10896 && SYMBOL_REF_P (XEXP (operands[1], 0))
10897 && use_toc_relative_ref (XEXP (operands[1], 0), mode))
10898 {
10899 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
10900 operands[0]);
10901 operands[1] = gen_const_mem (mode, tocref);
10902 set_mem_alias_set (operands[1], get_TOC_alias_set ());
10903 }
10904 }
10905 break;
10906
10907 case E_TImode:
10908 if (!VECTOR_MEM_VSX_P (TImode))
10909 rs6000_eliminate_indexed_memrefs (operands);
10910 break;
10911
10912 case E_PTImode:
10913 rs6000_eliminate_indexed_memrefs (operands);
10914 break;
10915
10916 default:
10917 fatal_insn ("bad move", gen_rtx_SET (dest, source));
10918 }
10919
10920 /* Above, we may have called force_const_mem which may have returned
10921 an invalid address. If we can, fix this up; otherwise, reload will
10922 have to deal with it. */
10923 if (MEM_P (operands[1]))
10924 operands[1] = validize_mem (operands[1]);
10925
10926 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10927 }
10928 \f
10929
10930 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
10931 static void
10932 init_float128_ibm (machine_mode mode)
10933 {
10934 if (!TARGET_XL_COMPAT)
10935 {
10936 set_optab_libfunc (add_optab, mode, "__gcc_qadd");
10937 set_optab_libfunc (sub_optab, mode, "__gcc_qsub");
10938 set_optab_libfunc (smul_optab, mode, "__gcc_qmul");
10939 set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv");
10940
10941 if (!TARGET_HARD_FLOAT)
10942 {
10943 set_optab_libfunc (neg_optab, mode, "__gcc_qneg");
10944 set_optab_libfunc (eq_optab, mode, "__gcc_qeq");
10945 set_optab_libfunc (ne_optab, mode, "__gcc_qne");
10946 set_optab_libfunc (gt_optab, mode, "__gcc_qgt");
10947 set_optab_libfunc (ge_optab, mode, "__gcc_qge");
10948 set_optab_libfunc (lt_optab, mode, "__gcc_qlt");
10949 set_optab_libfunc (le_optab, mode, "__gcc_qle");
10950 set_optab_libfunc (unord_optab, mode, "__gcc_qunord");
10951
10952 set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq");
10953 set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq");
10954 set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos");
10955 set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod");
10956 set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi");
10957 set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou");
10958 set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq");
10959 set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq");
10960 }
10961 }
10962 else
10963 {
10964 set_optab_libfunc (add_optab, mode, "_xlqadd");
10965 set_optab_libfunc (sub_optab, mode, "_xlqsub");
10966 set_optab_libfunc (smul_optab, mode, "_xlqmul");
10967 set_optab_libfunc (sdiv_optab, mode, "_xlqdiv");
10968 }
10969
10970 /* Add various conversions for IFmode to use the traditional TFmode
10971 names. */
10972 if (mode == IFmode)
10973 {
10974 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf");
10975 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf");
10976 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdtf");
10977 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd");
10978 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd");
10979 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtftd");
10980
10981 if (TARGET_POWERPC64)
10982 {
10983 set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti");
10984 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti");
10985 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf");
10986 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf");
10987 }
10988 }
10989 }
10990
10991 /* Create a decl for either complex long double multiply or complex long double
10992 divide when long double is IEEE 128-bit floating point. We can't use
10993 __multc3 and __divtc3 because the original long double using IBM extended
10994 double used those names. The complex multiply/divide functions are encoded
10995 as builtin functions with a complex result and 4 scalar inputs. */
10996
10997 static void
10998 create_complex_muldiv (const char *name, built_in_function fncode, tree fntype)
10999 {
11000 tree fndecl = add_builtin_function (name, fntype, fncode, BUILT_IN_NORMAL,
11001 name, NULL_TREE);
11002
11003 set_builtin_decl (fncode, fndecl, true);
11004
11005 if (TARGET_DEBUG_BUILTIN)
11006 fprintf (stderr, "create complex %s, fncode: %d\n", name, (int) fncode);
11007
11008 return;
11009 }
11010
11011 /* Set up IEEE 128-bit floating point routines. Use different names if the
11012 arguments can be passed in a vector register. The historical PowerPC
11013 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
11014 continue to use that if we aren't using vector registers to pass IEEE
11015 128-bit floating point. */
11016
11017 static void
11018 init_float128_ieee (machine_mode mode)
11019 {
11020 if (FLOAT128_VECTOR_P (mode))
11021 {
11022 static bool complex_muldiv_init_p = false;
11023
11024 /* Set up to call __mulkc3 and __divkc3 under -mabi=ieeelongdouble. If
11025 we have clone or target attributes, this will be called a second
11026 time. We want to create the built-in function only once. */
11027 if (mode == TFmode && TARGET_IEEEQUAD && !complex_muldiv_init_p)
11028 {
11029 complex_muldiv_init_p = true;
11030 built_in_function fncode_mul =
11031 (built_in_function) (BUILT_IN_COMPLEX_MUL_MIN + TCmode
11032 - MIN_MODE_COMPLEX_FLOAT);
11033 built_in_function fncode_div =
11034 (built_in_function) (BUILT_IN_COMPLEX_DIV_MIN + TCmode
11035 - MIN_MODE_COMPLEX_FLOAT);
11036
11037 tree fntype = build_function_type_list (complex_long_double_type_node,
11038 long_double_type_node,
11039 long_double_type_node,
11040 long_double_type_node,
11041 long_double_type_node,
11042 NULL_TREE);
11043
11044 create_complex_muldiv ("__mulkc3", fncode_mul, fntype);
11045 create_complex_muldiv ("__divkc3", fncode_div, fntype);
11046 }
11047
11048 set_optab_libfunc (add_optab, mode, "__addkf3");
11049 set_optab_libfunc (sub_optab, mode, "__subkf3");
11050 set_optab_libfunc (neg_optab, mode, "__negkf2");
11051 set_optab_libfunc (smul_optab, mode, "__mulkf3");
11052 set_optab_libfunc (sdiv_optab, mode, "__divkf3");
11053 set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2");
11054 set_optab_libfunc (abs_optab, mode, "__abskf2");
11055 set_optab_libfunc (powi_optab, mode, "__powikf2");
11056
11057 set_optab_libfunc (eq_optab, mode, "__eqkf2");
11058 set_optab_libfunc (ne_optab, mode, "__nekf2");
11059 set_optab_libfunc (gt_optab, mode, "__gtkf2");
11060 set_optab_libfunc (ge_optab, mode, "__gekf2");
11061 set_optab_libfunc (lt_optab, mode, "__ltkf2");
11062 set_optab_libfunc (le_optab, mode, "__lekf2");
11063 set_optab_libfunc (unord_optab, mode, "__unordkf2");
11064
11065 set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2");
11066 set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2");
11067 set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2");
11068 set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2");
11069
11070 set_conv_libfunc (sext_optab, mode, IFmode, "__trunctfkf2");
11071 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
11072 set_conv_libfunc (sext_optab, mode, TFmode, "__trunctfkf2");
11073
11074 set_conv_libfunc (trunc_optab, IFmode, mode, "__extendkftf2");
11075 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
11076 set_conv_libfunc (trunc_optab, TFmode, mode, "__extendkftf2");
11077
11078 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf");
11079 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf");
11080 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdkf");
11081 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd");
11082 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd");
11083 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendkftd");
11084
11085 set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi");
11086 set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi");
11087 set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi");
11088 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi");
11089
11090 set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf");
11091 set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf");
11092 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf");
11093 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf");
11094
11095 if (TARGET_POWERPC64)
11096 {
11097 set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti_sw");
11098 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti_sw");
11099 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf_sw");
11100 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf_sw");
11101 }
11102 }
11103
11104 else
11105 {
11106 set_optab_libfunc (add_optab, mode, "_q_add");
11107 set_optab_libfunc (sub_optab, mode, "_q_sub");
11108 set_optab_libfunc (neg_optab, mode, "_q_neg");
11109 set_optab_libfunc (smul_optab, mode, "_q_mul");
11110 set_optab_libfunc (sdiv_optab, mode, "_q_div");
11111 if (TARGET_PPC_GPOPT)
11112 set_optab_libfunc (sqrt_optab, mode, "_q_sqrt");
11113
11114 set_optab_libfunc (eq_optab, mode, "_q_feq");
11115 set_optab_libfunc (ne_optab, mode, "_q_fne");
11116 set_optab_libfunc (gt_optab, mode, "_q_fgt");
11117 set_optab_libfunc (ge_optab, mode, "_q_fge");
11118 set_optab_libfunc (lt_optab, mode, "_q_flt");
11119 set_optab_libfunc (le_optab, mode, "_q_fle");
11120
11121 set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq");
11122 set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq");
11123 set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos");
11124 set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod");
11125 set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi");
11126 set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou");
11127 set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq");
11128 set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq");
11129 }
11130 }
11131
11132 static void
11133 rs6000_init_libfuncs (void)
11134 {
11135 /* __float128 support. */
11136 if (TARGET_FLOAT128_TYPE)
11137 {
11138 init_float128_ibm (IFmode);
11139 init_float128_ieee (KFmode);
11140 }
11141
11142 /* AIX/Darwin/64-bit Linux quad floating point routines. */
11143 if (TARGET_LONG_DOUBLE_128)
11144 {
11145 if (!TARGET_IEEEQUAD)
11146 init_float128_ibm (TFmode);
11147
11148 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
11149 else
11150 init_float128_ieee (TFmode);
11151 }
11152 }
11153
11154 /* Emit a potentially record-form instruction, setting DST from SRC.
11155 If DOT is 0, that is all; otherwise, set CCREG to the result of the
11156 signed comparison of DST with zero. If DOT is 1, the generated RTL
11157 doesn't care about the DST result; if DOT is 2, it does. If CCREG
11158 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
11159 a separate COMPARE. */
11160
11161 void
11162 rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
11163 {
11164 if (dot == 0)
11165 {
11166 emit_move_insn (dst, src);
11167 return;
11168 }
11169
11170 if (cc_reg_not_cr0_operand (ccreg, CCmode))
11171 {
11172 emit_move_insn (dst, src);
11173 emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx));
11174 return;
11175 }
11176
11177 rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx));
11178 if (dot == 1)
11179 {
11180 rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst);
11181 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber)));
11182 }
11183 else
11184 {
11185 rtx set = gen_rtx_SET (dst, src);
11186 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set)));
11187 }
11188 }
11189
11190 \f
11191 /* A validation routine: say whether CODE, a condition code, and MODE
11192 match. The other alternatives either don't make sense or should
11193 never be generated. */
11194
11195 void
11196 validate_condition_mode (enum rtx_code code, machine_mode mode)
11197 {
11198 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
11199 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
11200 && GET_MODE_CLASS (mode) == MODE_CC);
11201
11202 /* These don't make sense. */
11203 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
11204 || mode != CCUNSmode);
11205
11206 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
11207 || mode == CCUNSmode);
11208
11209 gcc_assert (mode == CCFPmode
11210 || (code != ORDERED && code != UNORDERED
11211 && code != UNEQ && code != LTGT
11212 && code != UNGT && code != UNLT
11213 && code != UNGE && code != UNLE));
11214
11215 /* These are invalid; the information is not there. */
11216 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
11217 }
11218
11219 \f
11220 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
11221 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
11222 not zero, store there the bit offset (counted from the right) where
11223 the single stretch of 1 bits begins; and similarly for B, the bit
11224 offset where it ends. */
11225
11226 bool
11227 rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
11228 {
11229 unsigned HOST_WIDE_INT val = INTVAL (mask);
11230 unsigned HOST_WIDE_INT bit;
11231 int nb, ne;
11232 int n = GET_MODE_PRECISION (mode);
11233
11234 if (mode != DImode && mode != SImode)
11235 return false;
11236
11237 if (INTVAL (mask) >= 0)
11238 {
11239 bit = val & -val;
11240 ne = exact_log2 (bit);
11241 nb = exact_log2 (val + bit);
11242 }
11243 else if (val + 1 == 0)
11244 {
11245 nb = n;
11246 ne = 0;
11247 }
11248 else if (val & 1)
11249 {
11250 val = ~val;
11251 bit = val & -val;
11252 nb = exact_log2 (bit);
11253 ne = exact_log2 (val + bit);
11254 }
11255 else
11256 {
11257 bit = val & -val;
11258 ne = exact_log2 (bit);
11259 if (val + bit == 0)
11260 nb = n;
11261 else
11262 nb = 0;
11263 }
11264
11265 nb--;
11266
11267 if (nb < 0 || ne < 0 || nb >= n || ne >= n)
11268 return false;
11269
11270 if (b)
11271 *b = nb;
11272 if (e)
11273 *e = ne;
11274
11275 return true;
11276 }
11277
11278 bool
11279 rs6000_is_valid_rotate_dot_mask (rtx mask, machine_mode mode)
11280 {
11281 int nb, ne;
11282 return rs6000_is_valid_mask (mask, &nb, &ne, mode) && nb >= ne && ne > 0;
11283 }
11284
11285 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
11286 or rldicr instruction, to implement an AND with it in mode MODE. */
11287
11288 bool
11289 rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
11290 {
11291 int nb, ne;
11292
11293 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
11294 return false;
11295
11296 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
11297 does not wrap. */
11298 if (mode == DImode)
11299 return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
11300
11301 /* For SImode, rlwinm can do everything. */
11302 if (mode == SImode)
11303 return (nb < 32 && ne < 32);
11304
11305 return false;
11306 }
11307
11308 /* Return the instruction template for an AND with mask in mode MODE, with
11309 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11310
11311 const char *
11312 rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
11313 {
11314 int nb, ne;
11315
11316 if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
11317 gcc_unreachable ();
11318
11319 if (mode == DImode && ne == 0)
11320 {
11321 operands[3] = GEN_INT (63 - nb);
11322 if (dot)
11323 return "rldicl. %0,%1,0,%3";
11324 return "rldicl %0,%1,0,%3";
11325 }
11326
11327 if (mode == DImode && nb == 63)
11328 {
11329 operands[3] = GEN_INT (63 - ne);
11330 if (dot)
11331 return "rldicr. %0,%1,0,%3";
11332 return "rldicr %0,%1,0,%3";
11333 }
11334
11335 if (nb < 32 && ne < 32)
11336 {
11337 operands[3] = GEN_INT (31 - nb);
11338 operands[4] = GEN_INT (31 - ne);
11339 if (dot)
11340 return "rlwinm. %0,%1,0,%3,%4";
11341 return "rlwinm %0,%1,0,%3,%4";
11342 }
11343
11344 gcc_unreachable ();
11345 }
11346
11347 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
11348 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
11349 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
11350
11351 bool
11352 rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
11353 {
11354 int nb, ne;
11355
11356 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
11357 return false;
11358
11359 int n = GET_MODE_PRECISION (mode);
11360 int sh = -1;
11361
11362 if (CONST_INT_P (XEXP (shift, 1)))
11363 {
11364 sh = INTVAL (XEXP (shift, 1));
11365 if (sh < 0 || sh >= n)
11366 return false;
11367 }
11368
11369 rtx_code code = GET_CODE (shift);
11370
11371 /* Convert any shift by 0 to a rotate, to simplify below code. */
11372 if (sh == 0)
11373 code = ROTATE;
11374
11375 /* Convert rotate to simple shift if we can, to make analysis simpler. */
11376 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
11377 code = ASHIFT;
11378 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
11379 {
11380 code = LSHIFTRT;
11381 sh = n - sh;
11382 }
11383
11384 /* DImode rotates need rld*. */
11385 if (mode == DImode && code == ROTATE)
11386 return (nb == 63 || ne == 0 || ne == sh);
11387
11388 /* SImode rotates need rlw*. */
11389 if (mode == SImode && code == ROTATE)
11390 return (nb < 32 && ne < 32 && sh < 32);
11391
11392 /* Wrap-around masks are only okay for rotates. */
11393 if (ne > nb)
11394 return false;
11395
11396 /* Variable shifts are only okay for rotates. */
11397 if (sh < 0)
11398 return false;
11399
11400 /* Don't allow ASHIFT if the mask is wrong for that. */
11401 if (code == ASHIFT && ne < sh)
11402 return false;
11403
11404 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
11405 if the mask is wrong for that. */
11406 if (nb < 32 && ne < 32 && sh < 32
11407 && !(code == LSHIFTRT && nb >= 32 - sh))
11408 return true;
11409
11410 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
11411 if the mask is wrong for that. */
11412 if (code == LSHIFTRT)
11413 sh = 64 - sh;
11414 if (nb == 63 || ne == 0 || ne == sh)
11415 return !(code == LSHIFTRT && nb >= sh);
11416
11417 return false;
11418 }
11419
11420 /* Return the instruction template for a shift with mask in mode MODE, with
11421 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11422
11423 const char *
11424 rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
11425 {
11426 int nb, ne;
11427
11428 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
11429 gcc_unreachable ();
11430
11431 if (mode == DImode && ne == 0)
11432 {
11433 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
11434 operands[2] = GEN_INT (64 - INTVAL (operands[2]));
11435 operands[3] = GEN_INT (63 - nb);
11436 if (dot)
11437 return "rld%I2cl. %0,%1,%2,%3";
11438 return "rld%I2cl %0,%1,%2,%3";
11439 }
11440
11441 if (mode == DImode && nb == 63)
11442 {
11443 operands[3] = GEN_INT (63 - ne);
11444 if (dot)
11445 return "rld%I2cr. %0,%1,%2,%3";
11446 return "rld%I2cr %0,%1,%2,%3";
11447 }
11448
11449 if (mode == DImode
11450 && GET_CODE (operands[4]) != LSHIFTRT
11451 && CONST_INT_P (operands[2])
11452 && ne == INTVAL (operands[2]))
11453 {
11454 operands[3] = GEN_INT (63 - nb);
11455 if (dot)
11456 return "rld%I2c. %0,%1,%2,%3";
11457 return "rld%I2c %0,%1,%2,%3";
11458 }
11459
11460 if (nb < 32 && ne < 32)
11461 {
11462 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
11463 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
11464 operands[3] = GEN_INT (31 - nb);
11465 operands[4] = GEN_INT (31 - ne);
11466 /* This insn can also be a 64-bit rotate with mask that really makes
11467 it just a shift right (with mask); the %h below are to adjust for
11468 that situation (shift count is >= 32 in that case). */
11469 if (dot)
11470 return "rlw%I2nm. %0,%1,%h2,%3,%4";
11471 return "rlw%I2nm %0,%1,%h2,%3,%4";
11472 }
11473
11474 gcc_unreachable ();
11475 }
11476
11477 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
11478 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
11479 ASHIFT, or LSHIFTRT) in mode MODE. */
11480
11481 bool
11482 rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
11483 {
11484 int nb, ne;
11485
11486 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
11487 return false;
11488
11489 int n = GET_MODE_PRECISION (mode);
11490
11491 int sh = INTVAL (XEXP (shift, 1));
11492 if (sh < 0 || sh >= n)
11493 return false;
11494
11495 rtx_code code = GET_CODE (shift);
11496
11497 /* Convert any shift by 0 to a rotate, to simplify below code. */
11498 if (sh == 0)
11499 code = ROTATE;
11500
11501 /* Convert rotate to simple shift if we can, to make analysis simpler. */
11502 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
11503 code = ASHIFT;
11504 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
11505 {
11506 code = LSHIFTRT;
11507 sh = n - sh;
11508 }
11509
11510 /* DImode rotates need rldimi. */
11511 if (mode == DImode && code == ROTATE)
11512 return (ne == sh);
11513
11514 /* SImode rotates need rlwimi. */
11515 if (mode == SImode && code == ROTATE)
11516 return (nb < 32 && ne < 32 && sh < 32);
11517
11518 /* Wrap-around masks are only okay for rotates. */
11519 if (ne > nb)
11520 return false;
11521
11522 /* Don't allow ASHIFT if the mask is wrong for that. */
11523 if (code == ASHIFT && ne < sh)
11524 return false;
11525
11526 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
11527 if the mask is wrong for that. */
11528 if (nb < 32 && ne < 32 && sh < 32
11529 && !(code == LSHIFTRT && nb >= 32 - sh))
11530 return true;
11531
11532 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
11533 if the mask is wrong for that. */
11534 if (code == LSHIFTRT)
11535 sh = 64 - sh;
11536 if (ne == sh)
11537 return !(code == LSHIFTRT && nb >= sh);
11538
11539 return false;
11540 }
11541
11542 /* Return the instruction template for an insert with mask in mode MODE, with
11543 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11544
11545 const char *
11546 rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
11547 {
11548 int nb, ne;
11549
11550 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
11551 gcc_unreachable ();
11552
11553 /* Prefer rldimi because rlwimi is cracked. */
11554 if (TARGET_POWERPC64
11555 && (!dot || mode == DImode)
11556 && GET_CODE (operands[4]) != LSHIFTRT
11557 && ne == INTVAL (operands[2]))
11558 {
11559 operands[3] = GEN_INT (63 - nb);
11560 if (dot)
11561 return "rldimi. %0,%1,%2,%3";
11562 return "rldimi %0,%1,%2,%3";
11563 }
11564
11565 if (nb < 32 && ne < 32)
11566 {
11567 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
11568 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
11569 operands[3] = GEN_INT (31 - nb);
11570 operands[4] = GEN_INT (31 - ne);
11571 if (dot)
11572 return "rlwimi. %0,%1,%2,%3,%4";
11573 return "rlwimi %0,%1,%2,%3,%4";
11574 }
11575
11576 gcc_unreachable ();
11577 }
11578
11579 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
11580 using two machine instructions. */
11581
11582 bool
11583 rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
11584 {
11585 /* There are two kinds of AND we can handle with two insns:
11586 1) those we can do with two rl* insn;
11587 2) ori[s];xori[s].
11588
11589 We do not handle that last case yet. */
11590
11591 /* If there is just one stretch of ones, we can do it. */
11592 if (rs6000_is_valid_mask (c, NULL, NULL, mode))
11593 return true;
11594
11595 /* Otherwise, fill in the lowest "hole"; if we can do the result with
11596 one insn, we can do the whole thing with two. */
11597 unsigned HOST_WIDE_INT val = INTVAL (c);
11598 unsigned HOST_WIDE_INT bit1 = val & -val;
11599 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
11600 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
11601 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
11602 return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
11603 }
11604
11605 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
11606 If EXPAND is true, split rotate-and-mask instructions we generate to
11607 their constituent parts as well (this is used during expand); if DOT
11608 is 1, make the last insn a record-form instruction clobbering the
11609 destination GPR and setting the CC reg (from operands[3]); if 2, set
11610 that GPR as well as the CC reg. */
11611
11612 void
11613 rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot)
11614 {
11615 gcc_assert (!(expand && dot));
11616
11617 unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
11618
11619 /* If it is one stretch of ones, it is DImode; shift left, mask, then
11620 shift right. This generates better code than doing the masks without
11621 shifts, or shifting first right and then left. */
11622 int nb, ne;
11623 if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
11624 {
11625 gcc_assert (mode == DImode);
11626
11627 int shift = 63 - nb;
11628 if (expand)
11629 {
11630 rtx tmp1 = gen_reg_rtx (DImode);
11631 rtx tmp2 = gen_reg_rtx (DImode);
11632 emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
11633 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
11634 emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
11635 }
11636 else
11637 {
11638 rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
11639 tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
11640 emit_move_insn (operands[0], tmp);
11641 tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift));
11642 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
11643 }
11644 return;
11645 }
11646
11647 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
11648 that does the rest. */
11649 unsigned HOST_WIDE_INT bit1 = val & -val;
11650 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
11651 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
11652 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
11653
11654 unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
11655 unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
11656
11657 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
11658
11659 /* Two "no-rotate"-and-mask instructions, for SImode. */
11660 if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
11661 {
11662 gcc_assert (mode == SImode);
11663
11664 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
11665 rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
11666 emit_move_insn (reg, tmp);
11667 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
11668 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
11669 return;
11670 }
11671
11672 gcc_assert (mode == DImode);
11673
11674 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
11675 insns; we have to do the first in SImode, because it wraps. */
11676 if (mask2 <= 0xffffffff
11677 && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
11678 {
11679 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
11680 rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
11681 GEN_INT (mask1));
11682 rtx reg_low = gen_lowpart (SImode, reg);
11683 emit_move_insn (reg_low, tmp);
11684 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
11685 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
11686 return;
11687 }
11688
11689 /* Two rld* insns: rotate, clear the hole in the middle (which now is
11690 at the top end), rotate back and clear the other hole. */
11691 int right = exact_log2 (bit3);
11692 int left = 64 - right;
11693
11694 /* Rotate the mask too. */
11695 mask1 = (mask1 >> right) | ((bit2 - 1) << left);
11696
11697 if (expand)
11698 {
11699 rtx tmp1 = gen_reg_rtx (DImode);
11700 rtx tmp2 = gen_reg_rtx (DImode);
11701 rtx tmp3 = gen_reg_rtx (DImode);
11702 emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
11703 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
11704 emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
11705 emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
11706 }
11707 else
11708 {
11709 rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
11710 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
11711 emit_move_insn (operands[0], tmp);
11712 tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
11713 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
11714 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
11715 }
11716 }
11717 \f
11718 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
11719 for lfq and stfq insns iff the registers are hard registers. */
11720
11721 int
11722 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
11723 {
11724 /* We might have been passed a SUBREG. */
11725 if (!REG_P (reg1) || !REG_P (reg2))
11726 return 0;
11727
11728 /* We might have been passed non floating point registers. */
11729 if (!FP_REGNO_P (REGNO (reg1))
11730 || !FP_REGNO_P (REGNO (reg2)))
11731 return 0;
11732
11733 return (REGNO (reg1) == REGNO (reg2) - 1);
11734 }
11735
11736 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
11737 addr1 and addr2 must be in consecutive memory locations
11738 (addr2 == addr1 + 8). */
11739
11740 int
11741 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
11742 {
11743 rtx addr1, addr2;
11744 unsigned int reg1, reg2;
11745 int offset1, offset2;
11746
11747 /* The mems cannot be volatile. */
11748 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
11749 return 0;
11750
11751 addr1 = XEXP (mem1, 0);
11752 addr2 = XEXP (mem2, 0);
11753
11754 /* Extract an offset (if used) from the first addr. */
11755 if (GET_CODE (addr1) == PLUS)
11756 {
11757 /* If not a REG, return zero. */
11758 if (!REG_P (XEXP (addr1, 0)))
11759 return 0;
11760 else
11761 {
11762 reg1 = REGNO (XEXP (addr1, 0));
11763 /* The offset must be constant! */
11764 if (!CONST_INT_P (XEXP (addr1, 1)))
11765 return 0;
11766 offset1 = INTVAL (XEXP (addr1, 1));
11767 }
11768 }
11769 else if (!REG_P (addr1))
11770 return 0;
11771 else
11772 {
11773 reg1 = REGNO (addr1);
11774 /* This was a simple (mem (reg)) expression. Offset is 0. */
11775 offset1 = 0;
11776 }
11777
11778 /* And now for the second addr. */
11779 if (GET_CODE (addr2) == PLUS)
11780 {
11781 /* If not a REG, return zero. */
11782 if (!REG_P (XEXP (addr2, 0)))
11783 return 0;
11784 else
11785 {
11786 reg2 = REGNO (XEXP (addr2, 0));
11787 /* The offset must be constant. */
11788 if (!CONST_INT_P (XEXP (addr2, 1)))
11789 return 0;
11790 offset2 = INTVAL (XEXP (addr2, 1));
11791 }
11792 }
11793 else if (!REG_P (addr2))
11794 return 0;
11795 else
11796 {
11797 reg2 = REGNO (addr2);
11798 /* This was a simple (mem (reg)) expression. Offset is 0. */
11799 offset2 = 0;
11800 }
11801
11802 /* Both of these must have the same base register. */
11803 if (reg1 != reg2)
11804 return 0;
11805
11806 /* The offset for the second addr must be 8 more than the first addr. */
11807 if (offset2 != offset1 + 8)
11808 return 0;
11809
11810 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
11811 instructions. */
11812 return 1;
11813 }
11814 \f
11815 /* Implement TARGET_SECONDARY_RELOAD_NEEDED_MODE. For SDmode values we
11816 need to use DDmode, in all other cases we can use the same mode. */
11817 static machine_mode
11818 rs6000_secondary_memory_needed_mode (machine_mode mode)
11819 {
11820 if (lra_in_progress && mode == SDmode)
11821 return DDmode;
11822 return mode;
11823 }
11824
11825 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
11826 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
11827 only work on the traditional altivec registers, note if an altivec register
11828 was chosen. */
11829
11830 static enum rs6000_reg_type
11831 register_to_reg_type (rtx reg, bool *is_altivec)
11832 {
11833 HOST_WIDE_INT regno;
11834 enum reg_class rclass;
11835
11836 if (SUBREG_P (reg))
11837 reg = SUBREG_REG (reg);
11838
11839 if (!REG_P (reg))
11840 return NO_REG_TYPE;
11841
11842 regno = REGNO (reg);
11843 if (!HARD_REGISTER_NUM_P (regno))
11844 {
11845 if (!lra_in_progress && !reload_completed)
11846 return PSEUDO_REG_TYPE;
11847
11848 regno = true_regnum (reg);
11849 if (regno < 0 || !HARD_REGISTER_NUM_P (regno))
11850 return PSEUDO_REG_TYPE;
11851 }
11852
11853 gcc_assert (regno >= 0);
11854
11855 if (is_altivec && ALTIVEC_REGNO_P (regno))
11856 *is_altivec = true;
11857
11858 rclass = rs6000_regno_regclass[regno];
11859 return reg_class_to_reg_type[(int)rclass];
11860 }
11861
11862 /* Helper function to return the cost of adding a TOC entry address. */
11863
11864 static inline int
11865 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
11866 {
11867 int ret;
11868
11869 if (TARGET_CMODEL != CMODEL_SMALL)
11870 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
11871
11872 else
11873 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
11874
11875 return ret;
11876 }
11877
11878 /* Helper function for rs6000_secondary_reload to determine whether the memory
11879 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
11880 needs reloading. Return negative if the memory is not handled by the memory
11881 helper functions and to try a different reload method, 0 if no additional
11882 instructions are need, and positive to give the extra cost for the
11883 memory. */
11884
11885 static int
11886 rs6000_secondary_reload_memory (rtx addr,
11887 enum reg_class rclass,
11888 machine_mode mode)
11889 {
11890 int extra_cost = 0;
11891 rtx reg, and_arg, plus_arg0, plus_arg1;
11892 addr_mask_type addr_mask;
11893 const char *type = NULL;
11894 const char *fail_msg = NULL;
11895
11896 if (GPR_REG_CLASS_P (rclass))
11897 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
11898
11899 else if (rclass == FLOAT_REGS)
11900 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
11901
11902 else if (rclass == ALTIVEC_REGS)
11903 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
11904
11905 /* For the combined VSX_REGS, turn off Altivec AND -16. */
11906 else if (rclass == VSX_REGS)
11907 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
11908 & ~RELOAD_REG_AND_M16);
11909
11910 /* If the register allocator hasn't made up its mind yet on the register
11911 class to use, settle on defaults to use. */
11912 else if (rclass == NO_REGS)
11913 {
11914 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY]
11915 & ~RELOAD_REG_AND_M16);
11916
11917 if ((addr_mask & RELOAD_REG_MULTIPLE) != 0)
11918 addr_mask &= ~(RELOAD_REG_INDEXED
11919 | RELOAD_REG_PRE_INCDEC
11920 | RELOAD_REG_PRE_MODIFY);
11921 }
11922
11923 else
11924 addr_mask = 0;
11925
11926 /* If the register isn't valid in this register class, just return now. */
11927 if ((addr_mask & RELOAD_REG_VALID) == 0)
11928 {
11929 if (TARGET_DEBUG_ADDR)
11930 {
11931 fprintf (stderr,
11932 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
11933 "not valid in class\n",
11934 GET_MODE_NAME (mode), reg_class_names[rclass]);
11935 debug_rtx (addr);
11936 }
11937
11938 return -1;
11939 }
11940
11941 switch (GET_CODE (addr))
11942 {
11943 /* Does the register class supports auto update forms for this mode? We
11944 don't need a scratch register, since the powerpc only supports
11945 PRE_INC, PRE_DEC, and PRE_MODIFY. */
11946 case PRE_INC:
11947 case PRE_DEC:
11948 reg = XEXP (addr, 0);
11949 if (!base_reg_operand (addr, GET_MODE (reg)))
11950 {
11951 fail_msg = "no base register #1";
11952 extra_cost = -1;
11953 }
11954
11955 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
11956 {
11957 extra_cost = 1;
11958 type = "update";
11959 }
11960 break;
11961
11962 case PRE_MODIFY:
11963 reg = XEXP (addr, 0);
11964 plus_arg1 = XEXP (addr, 1);
11965 if (!base_reg_operand (reg, GET_MODE (reg))
11966 || GET_CODE (plus_arg1) != PLUS
11967 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
11968 {
11969 fail_msg = "bad PRE_MODIFY";
11970 extra_cost = -1;
11971 }
11972
11973 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
11974 {
11975 extra_cost = 1;
11976 type = "update";
11977 }
11978 break;
11979
11980 /* Do we need to simulate AND -16 to clear the bottom address bits used
11981 in VMX load/stores? Only allow the AND for vector sizes. */
11982 case AND:
11983 and_arg = XEXP (addr, 0);
11984 if (GET_MODE_SIZE (mode) != 16
11985 || !CONST_INT_P (XEXP (addr, 1))
11986 || INTVAL (XEXP (addr, 1)) != -16)
11987 {
11988 fail_msg = "bad Altivec AND #1";
11989 extra_cost = -1;
11990 }
11991
11992 if (rclass != ALTIVEC_REGS)
11993 {
11994 if (legitimate_indirect_address_p (and_arg, false))
11995 extra_cost = 1;
11996
11997 else if (legitimate_indexed_address_p (and_arg, false))
11998 extra_cost = 2;
11999
12000 else
12001 {
12002 fail_msg = "bad Altivec AND #2";
12003 extra_cost = -1;
12004 }
12005
12006 type = "and";
12007 }
12008 break;
12009
12010 /* If this is an indirect address, make sure it is a base register. */
12011 case REG:
12012 case SUBREG:
12013 if (!legitimate_indirect_address_p (addr, false))
12014 {
12015 extra_cost = 1;
12016 type = "move";
12017 }
12018 break;
12019
12020 /* If this is an indexed address, make sure the register class can handle
12021 indexed addresses for this mode. */
12022 case PLUS:
12023 plus_arg0 = XEXP (addr, 0);
12024 plus_arg1 = XEXP (addr, 1);
12025
12026 /* (plus (plus (reg) (constant)) (constant)) is generated during
12027 push_reload processing, so handle it now. */
12028 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
12029 {
12030 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12031 {
12032 extra_cost = 1;
12033 type = "offset";
12034 }
12035 }
12036
12037 /* (plus (plus (reg) (constant)) (reg)) is also generated during
12038 push_reload processing, so handle it now. */
12039 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
12040 {
12041 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
12042 {
12043 extra_cost = 1;
12044 type = "indexed #2";
12045 }
12046 }
12047
12048 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
12049 {
12050 fail_msg = "no base register #2";
12051 extra_cost = -1;
12052 }
12053
12054 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
12055 {
12056 if ((addr_mask & RELOAD_REG_INDEXED) == 0
12057 || !legitimate_indexed_address_p (addr, false))
12058 {
12059 extra_cost = 1;
12060 type = "indexed";
12061 }
12062 }
12063
12064 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0
12065 && CONST_INT_P (plus_arg1))
12066 {
12067 if (!quad_address_offset_p (INTVAL (plus_arg1)))
12068 {
12069 extra_cost = 1;
12070 type = "vector d-form offset";
12071 }
12072 }
12073
12074 /* Make sure the register class can handle offset addresses. */
12075 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
12076 {
12077 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12078 {
12079 extra_cost = 1;
12080 type = "offset #2";
12081 }
12082 }
12083
12084 else
12085 {
12086 fail_msg = "bad PLUS";
12087 extra_cost = -1;
12088 }
12089
12090 break;
12091
12092 case LO_SUM:
12093 /* Quad offsets are restricted and can't handle normal addresses. */
12094 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
12095 {
12096 extra_cost = -1;
12097 type = "vector d-form lo_sum";
12098 }
12099
12100 else if (!legitimate_lo_sum_address_p (mode, addr, false))
12101 {
12102 fail_msg = "bad LO_SUM";
12103 extra_cost = -1;
12104 }
12105
12106 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12107 {
12108 extra_cost = 1;
12109 type = "lo_sum";
12110 }
12111 break;
12112
12113 /* Static addresses need to create a TOC entry. */
12114 case CONST:
12115 case SYMBOL_REF:
12116 case LABEL_REF:
12117 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
12118 {
12119 extra_cost = -1;
12120 type = "vector d-form lo_sum #2";
12121 }
12122
12123 else
12124 {
12125 type = "address";
12126 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
12127 }
12128 break;
12129
12130 /* TOC references look like offsetable memory. */
12131 case UNSPEC:
12132 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
12133 {
12134 fail_msg = "bad UNSPEC";
12135 extra_cost = -1;
12136 }
12137
12138 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
12139 {
12140 extra_cost = -1;
12141 type = "vector d-form lo_sum #3";
12142 }
12143
12144 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12145 {
12146 extra_cost = 1;
12147 type = "toc reference";
12148 }
12149 break;
12150
12151 default:
12152 {
12153 fail_msg = "bad address";
12154 extra_cost = -1;
12155 }
12156 }
12157
12158 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
12159 {
12160 if (extra_cost < 0)
12161 fprintf (stderr,
12162 "rs6000_secondary_reload_memory error: mode = %s, "
12163 "class = %s, addr_mask = '%s', %s\n",
12164 GET_MODE_NAME (mode),
12165 reg_class_names[rclass],
12166 rs6000_debug_addr_mask (addr_mask, false),
12167 (fail_msg != NULL) ? fail_msg : "<bad address>");
12168
12169 else
12170 fprintf (stderr,
12171 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
12172 "addr_mask = '%s', extra cost = %d, %s\n",
12173 GET_MODE_NAME (mode),
12174 reg_class_names[rclass],
12175 rs6000_debug_addr_mask (addr_mask, false),
12176 extra_cost,
12177 (type) ? type : "<none>");
12178
12179 debug_rtx (addr);
12180 }
12181
12182 return extra_cost;
12183 }
12184
12185 /* Helper function for rs6000_secondary_reload to return true if a move to a
12186 different register classe is really a simple move. */
12187
12188 static bool
12189 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
12190 enum rs6000_reg_type from_type,
12191 machine_mode mode)
12192 {
12193 int size = GET_MODE_SIZE (mode);
12194
12195 /* Add support for various direct moves available. In this function, we only
12196 look at cases where we don't need any extra registers, and one or more
12197 simple move insns are issued. Originally small integers are not allowed
12198 in FPR/VSX registers. Single precision binary floating is not a simple
12199 move because we need to convert to the single precision memory layout.
12200 The 4-byte SDmode can be moved. TDmode values are disallowed since they
12201 need special direct move handling, which we do not support yet. */
12202 if (TARGET_DIRECT_MOVE
12203 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
12204 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
12205 {
12206 if (TARGET_POWERPC64)
12207 {
12208 /* ISA 2.07: MTVSRD or MVFVSRD. */
12209 if (size == 8)
12210 return true;
12211
12212 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
12213 if (size == 16 && TARGET_P9_VECTOR && mode != TDmode)
12214 return true;
12215 }
12216
12217 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
12218 if (TARGET_P8_VECTOR)
12219 {
12220 if (mode == SImode)
12221 return true;
12222
12223 if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode))
12224 return true;
12225 }
12226
12227 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
12228 if (mode == SDmode)
12229 return true;
12230 }
12231
12232 /* Move to/from SPR. */
12233 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
12234 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
12235 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
12236 return true;
12237
12238 return false;
12239 }
12240
12241 /* Direct move helper function for rs6000_secondary_reload, handle all of the
12242 special direct moves that involve allocating an extra register, return the
12243 insn code of the helper function if there is such a function or
12244 CODE_FOR_nothing if not. */
12245
12246 static bool
12247 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
12248 enum rs6000_reg_type from_type,
12249 machine_mode mode,
12250 secondary_reload_info *sri,
12251 bool altivec_p)
12252 {
12253 bool ret = false;
12254 enum insn_code icode = CODE_FOR_nothing;
12255 int cost = 0;
12256 int size = GET_MODE_SIZE (mode);
12257
12258 if (TARGET_POWERPC64 && size == 16)
12259 {
12260 /* Handle moving 128-bit values from GPRs to VSX point registers on
12261 ISA 2.07 (power8, power9) when running in 64-bit mode using
12262 XXPERMDI to glue the two 64-bit values back together. */
12263 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
12264 {
12265 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
12266 icode = reg_addr[mode].reload_vsx_gpr;
12267 }
12268
12269 /* Handle moving 128-bit values from VSX point registers to GPRs on
12270 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
12271 bottom 64-bit value. */
12272 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
12273 {
12274 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
12275 icode = reg_addr[mode].reload_gpr_vsx;
12276 }
12277 }
12278
12279 else if (TARGET_POWERPC64 && mode == SFmode)
12280 {
12281 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
12282 {
12283 cost = 3; /* xscvdpspn, mfvsrd, and. */
12284 icode = reg_addr[mode].reload_gpr_vsx;
12285 }
12286
12287 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
12288 {
12289 cost = 2; /* mtvsrz, xscvspdpn. */
12290 icode = reg_addr[mode].reload_vsx_gpr;
12291 }
12292 }
12293
12294 else if (!TARGET_POWERPC64 && size == 8)
12295 {
12296 /* Handle moving 64-bit values from GPRs to floating point registers on
12297 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
12298 32-bit values back together. Altivec register classes must be handled
12299 specially since a different instruction is used, and the secondary
12300 reload support requires a single instruction class in the scratch
12301 register constraint. However, right now TFmode is not allowed in
12302 Altivec registers, so the pattern will never match. */
12303 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
12304 {
12305 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
12306 icode = reg_addr[mode].reload_fpr_gpr;
12307 }
12308 }
12309
12310 if (icode != CODE_FOR_nothing)
12311 {
12312 ret = true;
12313 if (sri)
12314 {
12315 sri->icode = icode;
12316 sri->extra_cost = cost;
12317 }
12318 }
12319
12320 return ret;
12321 }
12322
12323 /* Return whether a move between two register classes can be done either
12324 directly (simple move) or via a pattern that uses a single extra temporary
12325 (using ISA 2.07's direct move in this case. */
12326
12327 static bool
12328 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
12329 enum rs6000_reg_type from_type,
12330 machine_mode mode,
12331 secondary_reload_info *sri,
12332 bool altivec_p)
12333 {
12334 /* Fall back to load/store reloads if either type is not a register. */
12335 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
12336 return false;
12337
12338 /* If we haven't allocated registers yet, assume the move can be done for the
12339 standard register types. */
12340 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
12341 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
12342 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
12343 return true;
12344
12345 /* Moves to the same set of registers is a simple move for non-specialized
12346 registers. */
12347 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
12348 return true;
12349
12350 /* Check whether a simple move can be done directly. */
12351 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
12352 {
12353 if (sri)
12354 {
12355 sri->icode = CODE_FOR_nothing;
12356 sri->extra_cost = 0;
12357 }
12358 return true;
12359 }
12360
12361 /* Now check if we can do it in a few steps. */
12362 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
12363 altivec_p);
12364 }
12365
12366 /* Inform reload about cases where moving X with a mode MODE to a register in
12367 RCLASS requires an extra scratch or immediate register. Return the class
12368 needed for the immediate register.
12369
12370 For VSX and Altivec, we may need a register to convert sp+offset into
12371 reg+sp.
12372
12373 For misaligned 64-bit gpr loads and stores we need a register to
12374 convert an offset address to indirect. */
12375
12376 static reg_class_t
12377 rs6000_secondary_reload (bool in_p,
12378 rtx x,
12379 reg_class_t rclass_i,
12380 machine_mode mode,
12381 secondary_reload_info *sri)
12382 {
12383 enum reg_class rclass = (enum reg_class) rclass_i;
12384 reg_class_t ret = ALL_REGS;
12385 enum insn_code icode;
12386 bool default_p = false;
12387 bool done_p = false;
12388
12389 /* Allow subreg of memory before/during reload. */
12390 bool memory_p = (MEM_P (x)
12391 || (!reload_completed && SUBREG_P (x)
12392 && MEM_P (SUBREG_REG (x))));
12393
12394 sri->icode = CODE_FOR_nothing;
12395 sri->t_icode = CODE_FOR_nothing;
12396 sri->extra_cost = 0;
12397 icode = ((in_p)
12398 ? reg_addr[mode].reload_load
12399 : reg_addr[mode].reload_store);
12400
12401 if (REG_P (x) || register_operand (x, mode))
12402 {
12403 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
12404 bool altivec_p = (rclass == ALTIVEC_REGS);
12405 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
12406
12407 if (!in_p)
12408 std::swap (to_type, from_type);
12409
12410 /* Can we do a direct move of some sort? */
12411 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
12412 altivec_p))
12413 {
12414 icode = (enum insn_code)sri->icode;
12415 default_p = false;
12416 done_p = true;
12417 ret = NO_REGS;
12418 }
12419 }
12420
12421 /* Make sure 0.0 is not reloaded or forced into memory. */
12422 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
12423 {
12424 ret = NO_REGS;
12425 default_p = false;
12426 done_p = true;
12427 }
12428
12429 /* If this is a scalar floating point value and we want to load it into the
12430 traditional Altivec registers, do it via a move via a traditional floating
12431 point register, unless we have D-form addressing. Also make sure that
12432 non-zero constants use a FPR. */
12433 if (!done_p && reg_addr[mode].scalar_in_vmx_p
12434 && !mode_supports_vmx_dform (mode)
12435 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
12436 && (memory_p || CONST_DOUBLE_P (x)))
12437 {
12438 ret = FLOAT_REGS;
12439 default_p = false;
12440 done_p = true;
12441 }
12442
12443 /* Handle reload of load/stores if we have reload helper functions. */
12444 if (!done_p && icode != CODE_FOR_nothing && memory_p)
12445 {
12446 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
12447 mode);
12448
12449 if (extra_cost >= 0)
12450 {
12451 done_p = true;
12452 ret = NO_REGS;
12453 if (extra_cost > 0)
12454 {
12455 sri->extra_cost = extra_cost;
12456 sri->icode = icode;
12457 }
12458 }
12459 }
12460
12461 /* Handle unaligned loads and stores of integer registers. */
12462 if (!done_p && TARGET_POWERPC64
12463 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
12464 && memory_p
12465 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
12466 {
12467 rtx addr = XEXP (x, 0);
12468 rtx off = address_offset (addr);
12469
12470 if (off != NULL_RTX)
12471 {
12472 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
12473 unsigned HOST_WIDE_INT offset = INTVAL (off);
12474
12475 /* We need a secondary reload when our legitimate_address_p
12476 says the address is good (as otherwise the entire address
12477 will be reloaded), and the offset is not a multiple of
12478 four or we have an address wrap. Address wrap will only
12479 occur for LO_SUMs since legitimate_offset_address_p
12480 rejects addresses for 16-byte mems that will wrap. */
12481 if (GET_CODE (addr) == LO_SUM
12482 ? (1 /* legitimate_address_p allows any offset for lo_sum */
12483 && ((offset & 3) != 0
12484 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
12485 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
12486 && (offset & 3) != 0))
12487 {
12488 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
12489 if (in_p)
12490 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
12491 : CODE_FOR_reload_di_load);
12492 else
12493 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
12494 : CODE_FOR_reload_di_store);
12495 sri->extra_cost = 2;
12496 ret = NO_REGS;
12497 done_p = true;
12498 }
12499 else
12500 default_p = true;
12501 }
12502 else
12503 default_p = true;
12504 }
12505
12506 if (!done_p && !TARGET_POWERPC64
12507 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
12508 && memory_p
12509 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
12510 {
12511 rtx addr = XEXP (x, 0);
12512 rtx off = address_offset (addr);
12513
12514 if (off != NULL_RTX)
12515 {
12516 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
12517 unsigned HOST_WIDE_INT offset = INTVAL (off);
12518
12519 /* We need a secondary reload when our legitimate_address_p
12520 says the address is good (as otherwise the entire address
12521 will be reloaded), and we have a wrap.
12522
12523 legitimate_lo_sum_address_p allows LO_SUM addresses to
12524 have any offset so test for wrap in the low 16 bits.
12525
12526 legitimate_offset_address_p checks for the range
12527 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
12528 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
12529 [0x7ff4,0x7fff] respectively, so test for the
12530 intersection of these ranges, [0x7ffc,0x7fff] and
12531 [0x7ff4,0x7ff7] respectively.
12532
12533 Note that the address we see here may have been
12534 manipulated by legitimize_reload_address. */
12535 if (GET_CODE (addr) == LO_SUM
12536 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
12537 : offset - (0x8000 - extra) < UNITS_PER_WORD)
12538 {
12539 if (in_p)
12540 sri->icode = CODE_FOR_reload_si_load;
12541 else
12542 sri->icode = CODE_FOR_reload_si_store;
12543 sri->extra_cost = 2;
12544 ret = NO_REGS;
12545 done_p = true;
12546 }
12547 else
12548 default_p = true;
12549 }
12550 else
12551 default_p = true;
12552 }
12553
12554 if (!done_p)
12555 default_p = true;
12556
12557 if (default_p)
12558 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
12559
12560 gcc_assert (ret != ALL_REGS);
12561
12562 if (TARGET_DEBUG_ADDR)
12563 {
12564 fprintf (stderr,
12565 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
12566 "mode = %s",
12567 reg_class_names[ret],
12568 in_p ? "true" : "false",
12569 reg_class_names[rclass],
12570 GET_MODE_NAME (mode));
12571
12572 if (reload_completed)
12573 fputs (", after reload", stderr);
12574
12575 if (!done_p)
12576 fputs (", done_p not set", stderr);
12577
12578 if (default_p)
12579 fputs (", default secondary reload", stderr);
12580
12581 if (sri->icode != CODE_FOR_nothing)
12582 fprintf (stderr, ", reload func = %s, extra cost = %d",
12583 insn_data[sri->icode].name, sri->extra_cost);
12584
12585 else if (sri->extra_cost > 0)
12586 fprintf (stderr, ", extra cost = %d", sri->extra_cost);
12587
12588 fputs ("\n", stderr);
12589 debug_rtx (x);
12590 }
12591
12592 return ret;
12593 }
12594
12595 /* Better tracing for rs6000_secondary_reload_inner. */
12596
12597 static void
12598 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
12599 bool store_p)
12600 {
12601 rtx set, clobber;
12602
12603 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
12604
12605 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
12606 store_p ? "store" : "load");
12607
12608 if (store_p)
12609 set = gen_rtx_SET (mem, reg);
12610 else
12611 set = gen_rtx_SET (reg, mem);
12612
12613 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
12614 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
12615 }
12616
12617 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
12618 ATTRIBUTE_NORETURN;
12619
12620 static void
12621 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
12622 bool store_p)
12623 {
12624 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
12625 gcc_unreachable ();
12626 }
12627
12628 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
12629 reload helper functions. These were identified in
12630 rs6000_secondary_reload_memory, and if reload decided to use the secondary
12631 reload, it calls the insns:
12632 reload_<RELOAD:mode>_<P:mptrsize>_store
12633 reload_<RELOAD:mode>_<P:mptrsize>_load
12634
12635 which in turn calls this function, to do whatever is necessary to create
12636 valid addresses. */
12637
12638 void
12639 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
12640 {
12641 int regno = true_regnum (reg);
12642 machine_mode mode = GET_MODE (reg);
12643 addr_mask_type addr_mask;
12644 rtx addr;
12645 rtx new_addr;
12646 rtx op_reg, op0, op1;
12647 rtx and_op;
12648 rtx cc_clobber;
12649 rtvec rv;
12650
12651 if (regno < 0 || !HARD_REGISTER_NUM_P (regno) || !MEM_P (mem)
12652 || !base_reg_operand (scratch, GET_MODE (scratch)))
12653 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12654
12655 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
12656 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
12657
12658 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
12659 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
12660
12661 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
12662 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
12663
12664 else
12665 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12666
12667 /* Make sure the mode is valid in this register class. */
12668 if ((addr_mask & RELOAD_REG_VALID) == 0)
12669 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12670
12671 if (TARGET_DEBUG_ADDR)
12672 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
12673
12674 new_addr = addr = XEXP (mem, 0);
12675 switch (GET_CODE (addr))
12676 {
12677 /* Does the register class support auto update forms for this mode? If
12678 not, do the update now. We don't need a scratch register, since the
12679 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
12680 case PRE_INC:
12681 case PRE_DEC:
12682 op_reg = XEXP (addr, 0);
12683 if (!base_reg_operand (op_reg, Pmode))
12684 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12685
12686 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
12687 {
12688 int delta = GET_MODE_SIZE (mode);
12689 if (GET_CODE (addr) == PRE_DEC)
12690 delta = -delta;
12691 emit_insn (gen_add2_insn (op_reg, GEN_INT (delta)));
12692 new_addr = op_reg;
12693 }
12694 break;
12695
12696 case PRE_MODIFY:
12697 op0 = XEXP (addr, 0);
12698 op1 = XEXP (addr, 1);
12699 if (!base_reg_operand (op0, Pmode)
12700 || GET_CODE (op1) != PLUS
12701 || !rtx_equal_p (op0, XEXP (op1, 0)))
12702 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12703
12704 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
12705 {
12706 emit_insn (gen_rtx_SET (op0, op1));
12707 new_addr = reg;
12708 }
12709 break;
12710
12711 /* Do we need to simulate AND -16 to clear the bottom address bits used
12712 in VMX load/stores? */
12713 case AND:
12714 op0 = XEXP (addr, 0);
12715 op1 = XEXP (addr, 1);
12716 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
12717 {
12718 if (REG_P (op0) || SUBREG_P (op0))
12719 op_reg = op0;
12720
12721 else if (GET_CODE (op1) == PLUS)
12722 {
12723 emit_insn (gen_rtx_SET (scratch, op1));
12724 op_reg = scratch;
12725 }
12726
12727 else
12728 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12729
12730 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
12731 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
12732 rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
12733 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
12734 new_addr = scratch;
12735 }
12736 break;
12737
12738 /* If this is an indirect address, make sure it is a base register. */
12739 case REG:
12740 case SUBREG:
12741 if (!base_reg_operand (addr, GET_MODE (addr)))
12742 {
12743 emit_insn (gen_rtx_SET (scratch, addr));
12744 new_addr = scratch;
12745 }
12746 break;
12747
12748 /* If this is an indexed address, make sure the register class can handle
12749 indexed addresses for this mode. */
12750 case PLUS:
12751 op0 = XEXP (addr, 0);
12752 op1 = XEXP (addr, 1);
12753 if (!base_reg_operand (op0, Pmode))
12754 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12755
12756 else if (int_reg_operand (op1, Pmode))
12757 {
12758 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
12759 {
12760 emit_insn (gen_rtx_SET (scratch, addr));
12761 new_addr = scratch;
12762 }
12763 }
12764
12765 else if (mode_supports_dq_form (mode) && CONST_INT_P (op1))
12766 {
12767 if (((addr_mask & RELOAD_REG_QUAD_OFFSET) == 0)
12768 || !quad_address_p (addr, mode, false))
12769 {
12770 emit_insn (gen_rtx_SET (scratch, addr));
12771 new_addr = scratch;
12772 }
12773 }
12774
12775 /* Make sure the register class can handle offset addresses. */
12776 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
12777 {
12778 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12779 {
12780 emit_insn (gen_rtx_SET (scratch, addr));
12781 new_addr = scratch;
12782 }
12783 }
12784
12785 else
12786 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12787
12788 break;
12789
12790 case LO_SUM:
12791 op0 = XEXP (addr, 0);
12792 op1 = XEXP (addr, 1);
12793 if (!base_reg_operand (op0, Pmode))
12794 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12795
12796 else if (int_reg_operand (op1, Pmode))
12797 {
12798 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
12799 {
12800 emit_insn (gen_rtx_SET (scratch, addr));
12801 new_addr = scratch;
12802 }
12803 }
12804
12805 /* Quad offsets are restricted and can't handle normal addresses. */
12806 else if (mode_supports_dq_form (mode))
12807 {
12808 emit_insn (gen_rtx_SET (scratch, addr));
12809 new_addr = scratch;
12810 }
12811
12812 /* Make sure the register class can handle offset addresses. */
12813 else if (legitimate_lo_sum_address_p (mode, addr, false))
12814 {
12815 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12816 {
12817 emit_insn (gen_rtx_SET (scratch, addr));
12818 new_addr = scratch;
12819 }
12820 }
12821
12822 else
12823 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12824
12825 break;
12826
12827 case SYMBOL_REF:
12828 case CONST:
12829 case LABEL_REF:
12830 rs6000_emit_move (scratch, addr, Pmode);
12831 new_addr = scratch;
12832 break;
12833
12834 default:
12835 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12836 }
12837
12838 /* Adjust the address if it changed. */
12839 if (addr != new_addr)
12840 {
12841 mem = replace_equiv_address_nv (mem, new_addr);
12842 if (TARGET_DEBUG_ADDR)
12843 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
12844 }
12845
12846 /* Now create the move. */
12847 if (store_p)
12848 emit_insn (gen_rtx_SET (mem, reg));
12849 else
12850 emit_insn (gen_rtx_SET (reg, mem));
12851
12852 return;
12853 }
12854
12855 /* Convert reloads involving 64-bit gprs and misaligned offset
12856 addressing, or multiple 32-bit gprs and offsets that are too large,
12857 to use indirect addressing. */
12858
12859 void
12860 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
12861 {
12862 int regno = true_regnum (reg);
12863 enum reg_class rclass;
12864 rtx addr;
12865 rtx scratch_or_premodify = scratch;
12866
12867 if (TARGET_DEBUG_ADDR)
12868 {
12869 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
12870 store_p ? "store" : "load");
12871 fprintf (stderr, "reg:\n");
12872 debug_rtx (reg);
12873 fprintf (stderr, "mem:\n");
12874 debug_rtx (mem);
12875 fprintf (stderr, "scratch:\n");
12876 debug_rtx (scratch);
12877 }
12878
12879 gcc_assert (regno >= 0 && HARD_REGISTER_NUM_P (regno));
12880 gcc_assert (MEM_P (mem));
12881 rclass = REGNO_REG_CLASS (regno);
12882 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
12883 addr = XEXP (mem, 0);
12884
12885 if (GET_CODE (addr) == PRE_MODIFY)
12886 {
12887 gcc_assert (REG_P (XEXP (addr, 0))
12888 && GET_CODE (XEXP (addr, 1)) == PLUS
12889 && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0));
12890 scratch_or_premodify = XEXP (addr, 0);
12891 addr = XEXP (addr, 1);
12892 }
12893 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
12894
12895 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
12896
12897 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
12898
12899 /* Now create the move. */
12900 if (store_p)
12901 emit_insn (gen_rtx_SET (mem, reg));
12902 else
12903 emit_insn (gen_rtx_SET (reg, mem));
12904
12905 return;
12906 }
12907
12908 /* Given an rtx X being reloaded into a reg required to be
12909 in class CLASS, return the class of reg to actually use.
12910 In general this is just CLASS; but on some machines
12911 in some cases it is preferable to use a more restrictive class.
12912
12913 On the RS/6000, we have to return NO_REGS when we want to reload a
12914 floating-point CONST_DOUBLE to force it to be copied to memory.
12915
12916 We also don't want to reload integer values into floating-point
12917 registers if we can at all help it. In fact, this can
12918 cause reload to die, if it tries to generate a reload of CTR
12919 into a FP register and discovers it doesn't have the memory location
12920 required.
12921
12922 ??? Would it be a good idea to have reload do the converse, that is
12923 try to reload floating modes into FP registers if possible?
12924 */
12925
12926 static enum reg_class
12927 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
12928 {
12929 machine_mode mode = GET_MODE (x);
12930 bool is_constant = CONSTANT_P (x);
12931
12932 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
12933 reload class for it. */
12934 if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS)
12935 && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0)
12936 return NO_REGS;
12937
12938 if ((rclass == FLOAT_REGS || rclass == VSX_REGS)
12939 && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
12940 return NO_REGS;
12941
12942 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
12943 the reloading of address expressions using PLUS into floating point
12944 registers. */
12945 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
12946 {
12947 if (is_constant)
12948 {
12949 /* Zero is always allowed in all VSX registers. */
12950 if (x == CONST0_RTX (mode))
12951 return rclass;
12952
12953 /* If this is a vector constant that can be formed with a few Altivec
12954 instructions, we want altivec registers. */
12955 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
12956 return ALTIVEC_REGS;
12957
12958 /* If this is an integer constant that can easily be loaded into
12959 vector registers, allow it. */
12960 if (CONST_INT_P (x))
12961 {
12962 HOST_WIDE_INT value = INTVAL (x);
12963
12964 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
12965 2.06 can generate it in the Altivec registers with
12966 VSPLTI<x>. */
12967 if (value == -1)
12968 {
12969 if (TARGET_P8_VECTOR)
12970 return rclass;
12971 else if (rclass == ALTIVEC_REGS || rclass == VSX_REGS)
12972 return ALTIVEC_REGS;
12973 else
12974 return NO_REGS;
12975 }
12976
12977 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
12978 a sign extend in the Altivec registers. */
12979 if (IN_RANGE (value, -128, 127) && TARGET_P9_VECTOR
12980 && (rclass == ALTIVEC_REGS || rclass == VSX_REGS))
12981 return ALTIVEC_REGS;
12982 }
12983
12984 /* Force constant to memory. */
12985 return NO_REGS;
12986 }
12987
12988 /* D-form addressing can easily reload the value. */
12989 if (mode_supports_vmx_dform (mode)
12990 || mode_supports_dq_form (mode))
12991 return rclass;
12992
12993 /* If this is a scalar floating point value and we don't have D-form
12994 addressing, prefer the traditional floating point registers so that we
12995 can use D-form (register+offset) addressing. */
12996 if (rclass == VSX_REGS
12997 && (mode == SFmode || GET_MODE_SIZE (mode) == 8))
12998 return FLOAT_REGS;
12999
13000 /* Prefer the Altivec registers if Altivec is handling the vector
13001 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
13002 loads. */
13003 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
13004 || mode == V1TImode)
13005 return ALTIVEC_REGS;
13006
13007 return rclass;
13008 }
13009
13010 if (is_constant || GET_CODE (x) == PLUS)
13011 {
13012 if (reg_class_subset_p (GENERAL_REGS, rclass))
13013 return GENERAL_REGS;
13014 if (reg_class_subset_p (BASE_REGS, rclass))
13015 return BASE_REGS;
13016 return NO_REGS;
13017 }
13018
13019 /* For the vector pair and vector quad modes, prefer their natural register
13020 (VSX or FPR) rather than GPR registers. For other integer types, prefer
13021 the GPR registers. */
13022 if (rclass == GEN_OR_FLOAT_REGS)
13023 {
13024 if (mode == OOmode)
13025 return VSX_REGS;
13026
13027 if (mode == XOmode)
13028 return FLOAT_REGS;
13029
13030 if (GET_MODE_CLASS (mode) == MODE_INT)
13031 return GENERAL_REGS;
13032 }
13033
13034 return rclass;
13035 }
13036
13037 /* Debug version of rs6000_preferred_reload_class. */
13038 static enum reg_class
13039 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
13040 {
13041 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
13042
13043 fprintf (stderr,
13044 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
13045 "mode = %s, x:\n",
13046 reg_class_names[ret], reg_class_names[rclass],
13047 GET_MODE_NAME (GET_MODE (x)));
13048 debug_rtx (x);
13049
13050 return ret;
13051 }
13052
13053 /* If we are copying between FP or AltiVec registers and anything else, we need
13054 a memory location. The exception is when we are targeting ppc64 and the
13055 move to/from fpr to gpr instructions are available. Also, under VSX, you
13056 can copy vector registers from the FP register set to the Altivec register
13057 set and vice versa. */
13058
13059 static bool
13060 rs6000_secondary_memory_needed (machine_mode mode,
13061 reg_class_t from_class,
13062 reg_class_t to_class)
13063 {
13064 enum rs6000_reg_type from_type, to_type;
13065 bool altivec_p = ((from_class == ALTIVEC_REGS)
13066 || (to_class == ALTIVEC_REGS));
13067
13068 /* If a simple/direct move is available, we don't need secondary memory */
13069 from_type = reg_class_to_reg_type[(int)from_class];
13070 to_type = reg_class_to_reg_type[(int)to_class];
13071
13072 if (rs6000_secondary_reload_move (to_type, from_type, mode,
13073 (secondary_reload_info *)0, altivec_p))
13074 return false;
13075
13076 /* If we have a floating point or vector register class, we need to use
13077 memory to transfer the data. */
13078 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
13079 return true;
13080
13081 return false;
13082 }
13083
13084 /* Debug version of rs6000_secondary_memory_needed. */
13085 static bool
13086 rs6000_debug_secondary_memory_needed (machine_mode mode,
13087 reg_class_t from_class,
13088 reg_class_t to_class)
13089 {
13090 bool ret = rs6000_secondary_memory_needed (mode, from_class, to_class);
13091
13092 fprintf (stderr,
13093 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
13094 "to_class = %s, mode = %s\n",
13095 ret ? "true" : "false",
13096 reg_class_names[from_class],
13097 reg_class_names[to_class],
13098 GET_MODE_NAME (mode));
13099
13100 return ret;
13101 }
13102
13103 /* Return the register class of a scratch register needed to copy IN into
13104 or out of a register in RCLASS in MODE. If it can be done directly,
13105 NO_REGS is returned. */
13106
13107 static enum reg_class
13108 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
13109 rtx in)
13110 {
13111 int regno;
13112
13113 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
13114 #if TARGET_MACHO
13115 && MACHOPIC_INDIRECT
13116 #endif
13117 ))
13118 {
13119 /* We cannot copy a symbolic operand directly into anything
13120 other than BASE_REGS for TARGET_ELF. So indicate that a
13121 register from BASE_REGS is needed as an intermediate
13122 register.
13123
13124 On Darwin, pic addresses require a load from memory, which
13125 needs a base register. */
13126 if (rclass != BASE_REGS
13127 && (SYMBOL_REF_P (in)
13128 || GET_CODE (in) == HIGH
13129 || GET_CODE (in) == LABEL_REF
13130 || GET_CODE (in) == CONST))
13131 return BASE_REGS;
13132 }
13133
13134 if (REG_P (in))
13135 {
13136 regno = REGNO (in);
13137 if (!HARD_REGISTER_NUM_P (regno))
13138 {
13139 regno = true_regnum (in);
13140 if (!HARD_REGISTER_NUM_P (regno))
13141 regno = -1;
13142 }
13143 }
13144 else if (SUBREG_P (in))
13145 {
13146 regno = true_regnum (in);
13147 if (!HARD_REGISTER_NUM_P (regno))
13148 regno = -1;
13149 }
13150 else
13151 regno = -1;
13152
13153 /* If we have VSX register moves, prefer moving scalar values between
13154 Altivec registers and GPR by going via an FPR (and then via memory)
13155 instead of reloading the secondary memory address for Altivec moves. */
13156 if (TARGET_VSX
13157 && GET_MODE_SIZE (mode) < 16
13158 && !mode_supports_vmx_dform (mode)
13159 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
13160 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
13161 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
13162 && (regno >= 0 && INT_REGNO_P (regno)))))
13163 return FLOAT_REGS;
13164
13165 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
13166 into anything. */
13167 if (rclass == GENERAL_REGS || rclass == BASE_REGS
13168 || (regno >= 0 && INT_REGNO_P (regno)))
13169 return NO_REGS;
13170
13171 /* Constants, memory, and VSX registers can go into VSX registers (both the
13172 traditional floating point and the altivec registers). */
13173 if (rclass == VSX_REGS
13174 && (regno == -1 || VSX_REGNO_P (regno)))
13175 return NO_REGS;
13176
13177 /* Constants, memory, and FP registers can go into FP registers. */
13178 if ((regno == -1 || FP_REGNO_P (regno))
13179 && (rclass == FLOAT_REGS || rclass == GEN_OR_FLOAT_REGS))
13180 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
13181
13182 /* Memory, and AltiVec registers can go into AltiVec registers. */
13183 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
13184 && rclass == ALTIVEC_REGS)
13185 return NO_REGS;
13186
13187 /* We can copy among the CR registers. */
13188 if ((rclass == CR_REGS || rclass == CR0_REGS)
13189 && regno >= 0 && CR_REGNO_P (regno))
13190 return NO_REGS;
13191
13192 /* Otherwise, we need GENERAL_REGS. */
13193 return GENERAL_REGS;
13194 }
13195
13196 /* Debug version of rs6000_secondary_reload_class. */
13197 static enum reg_class
13198 rs6000_debug_secondary_reload_class (enum reg_class rclass,
13199 machine_mode mode, rtx in)
13200 {
13201 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
13202 fprintf (stderr,
13203 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
13204 "mode = %s, input rtx:\n",
13205 reg_class_names[ret], reg_class_names[rclass],
13206 GET_MODE_NAME (mode));
13207 debug_rtx (in);
13208
13209 return ret;
13210 }
13211
13212 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
13213
13214 static bool
13215 rs6000_can_change_mode_class (machine_mode from,
13216 machine_mode to,
13217 reg_class_t rclass)
13218 {
13219 unsigned from_size = GET_MODE_SIZE (from);
13220 unsigned to_size = GET_MODE_SIZE (to);
13221
13222 if (from_size != to_size)
13223 {
13224 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
13225
13226 if (reg_classes_intersect_p (xclass, rclass))
13227 {
13228 unsigned to_nregs = hard_regno_nregs (FIRST_FPR_REGNO, to);
13229 unsigned from_nregs = hard_regno_nregs (FIRST_FPR_REGNO, from);
13230 bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
13231 bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
13232
13233 /* Don't allow 64-bit types to overlap with 128-bit types that take a
13234 single register under VSX because the scalar part of the register
13235 is in the upper 64-bits, and not the lower 64-bits. Types like
13236 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
13237 IEEE floating point can't overlap, and neither can small
13238 values. */
13239
13240 if (to_float128_vector_p && from_float128_vector_p)
13241 return true;
13242
13243 else if (to_float128_vector_p || from_float128_vector_p)
13244 return false;
13245
13246 /* TDmode in floating-mode registers must always go into a register
13247 pair with the most significant word in the even-numbered register
13248 to match ISA requirements. In little-endian mode, this does not
13249 match subreg numbering, so we cannot allow subregs. */
13250 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
13251 return false;
13252
13253 /* Allow SD<->DD changes, since SDmode values are stored in
13254 the low half of the DDmode, just like target-independent
13255 code expects. We need to allow at least SD->DD since
13256 rs6000_secondary_memory_needed_mode asks for that change
13257 to be made for SD reloads. */
13258 if ((to == DDmode && from == SDmode)
13259 || (to == SDmode && from == DDmode))
13260 return true;
13261
13262 if (from_size < 8 || to_size < 8)
13263 return false;
13264
13265 if (from_size == 8 && (8 * to_nregs) != to_size)
13266 return false;
13267
13268 if (to_size == 8 && (8 * from_nregs) != from_size)
13269 return false;
13270
13271 return true;
13272 }
13273 else
13274 return true;
13275 }
13276
13277 /* Since the VSX register set includes traditional floating point registers
13278 and altivec registers, just check for the size being different instead of
13279 trying to check whether the modes are vector modes. Otherwise it won't
13280 allow say DF and DI to change classes. For types like TFmode and TDmode
13281 that take 2 64-bit registers, rather than a single 128-bit register, don't
13282 allow subregs of those types to other 128 bit types. */
13283 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
13284 {
13285 unsigned num_regs = (from_size + 15) / 16;
13286 if (hard_regno_nregs (FIRST_FPR_REGNO, to) > num_regs
13287 || hard_regno_nregs (FIRST_FPR_REGNO, from) > num_regs)
13288 return false;
13289
13290 return (from_size == 8 || from_size == 16);
13291 }
13292
13293 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
13294 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
13295 return false;
13296
13297 return true;
13298 }
13299
13300 /* Debug version of rs6000_can_change_mode_class. */
13301 static bool
13302 rs6000_debug_can_change_mode_class (machine_mode from,
13303 machine_mode to,
13304 reg_class_t rclass)
13305 {
13306 bool ret = rs6000_can_change_mode_class (from, to, rclass);
13307
13308 fprintf (stderr,
13309 "rs6000_can_change_mode_class, return %s, from = %s, "
13310 "to = %s, rclass = %s\n",
13311 ret ? "true" : "false",
13312 GET_MODE_NAME (from), GET_MODE_NAME (to),
13313 reg_class_names[rclass]);
13314
13315 return ret;
13316 }
13317 \f
13318 /* Return a string to do a move operation of 128 bits of data. */
13319
13320 const char *
13321 rs6000_output_move_128bit (rtx operands[])
13322 {
13323 rtx dest = operands[0];
13324 rtx src = operands[1];
13325 machine_mode mode = GET_MODE (dest);
13326 int dest_regno;
13327 int src_regno;
13328 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
13329 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
13330
13331 if (REG_P (dest))
13332 {
13333 dest_regno = REGNO (dest);
13334 dest_gpr_p = INT_REGNO_P (dest_regno);
13335 dest_fp_p = FP_REGNO_P (dest_regno);
13336 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
13337 dest_vsx_p = dest_fp_p | dest_vmx_p;
13338 }
13339 else
13340 {
13341 dest_regno = -1;
13342 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
13343 }
13344
13345 if (REG_P (src))
13346 {
13347 src_regno = REGNO (src);
13348 src_gpr_p = INT_REGNO_P (src_regno);
13349 src_fp_p = FP_REGNO_P (src_regno);
13350 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
13351 src_vsx_p = src_fp_p | src_vmx_p;
13352 }
13353 else
13354 {
13355 src_regno = -1;
13356 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
13357 }
13358
13359 /* Register moves. */
13360 if (dest_regno >= 0 && src_regno >= 0)
13361 {
13362 if (dest_gpr_p)
13363 {
13364 if (src_gpr_p)
13365 return "#";
13366
13367 if (TARGET_DIRECT_MOVE_128 && src_vsx_p)
13368 return (WORDS_BIG_ENDIAN
13369 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
13370 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
13371
13372 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
13373 return "#";
13374 }
13375
13376 else if (TARGET_VSX && dest_vsx_p)
13377 {
13378 if (src_vsx_p)
13379 return "xxlor %x0,%x1,%x1";
13380
13381 else if (TARGET_DIRECT_MOVE_128 && src_gpr_p)
13382 return (WORDS_BIG_ENDIAN
13383 ? "mtvsrdd %x0,%1,%L1"
13384 : "mtvsrdd %x0,%L1,%1");
13385
13386 else if (TARGET_DIRECT_MOVE && src_gpr_p)
13387 return "#";
13388 }
13389
13390 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
13391 return "vor %0,%1,%1";
13392
13393 else if (dest_fp_p && src_fp_p)
13394 return "#";
13395 }
13396
13397 /* Loads. */
13398 else if (dest_regno >= 0 && MEM_P (src))
13399 {
13400 if (dest_gpr_p)
13401 {
13402 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
13403 return "lq %0,%1";
13404 else
13405 return "#";
13406 }
13407
13408 else if (TARGET_ALTIVEC && dest_vmx_p
13409 && altivec_indexed_or_indirect_operand (src, mode))
13410 return "lvx %0,%y1";
13411
13412 else if (TARGET_VSX && dest_vsx_p)
13413 {
13414 if (mode_supports_dq_form (mode)
13415 && quad_address_p (XEXP (src, 0), mode, true))
13416 return "lxv %x0,%1";
13417
13418 else if (TARGET_P9_VECTOR)
13419 return "lxvx %x0,%y1";
13420
13421 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
13422 return "lxvw4x %x0,%y1";
13423
13424 else
13425 return "lxvd2x %x0,%y1";
13426 }
13427
13428 else if (TARGET_ALTIVEC && dest_vmx_p)
13429 return "lvx %0,%y1";
13430
13431 else if (dest_fp_p)
13432 return "#";
13433 }
13434
13435 /* Stores. */
13436 else if (src_regno >= 0 && MEM_P (dest))
13437 {
13438 if (src_gpr_p)
13439 {
13440 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
13441 return "stq %1,%0";
13442 else
13443 return "#";
13444 }
13445
13446 else if (TARGET_ALTIVEC && src_vmx_p
13447 && altivec_indexed_or_indirect_operand (dest, mode))
13448 return "stvx %1,%y0";
13449
13450 else if (TARGET_VSX && src_vsx_p)
13451 {
13452 if (mode_supports_dq_form (mode)
13453 && quad_address_p (XEXP (dest, 0), mode, true))
13454 return "stxv %x1,%0";
13455
13456 else if (TARGET_P9_VECTOR)
13457 return "stxvx %x1,%y0";
13458
13459 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
13460 return "stxvw4x %x1,%y0";
13461
13462 else
13463 return "stxvd2x %x1,%y0";
13464 }
13465
13466 else if (TARGET_ALTIVEC && src_vmx_p)
13467 return "stvx %1,%y0";
13468
13469 else if (src_fp_p)
13470 return "#";
13471 }
13472
13473 /* Constants. */
13474 else if (dest_regno >= 0
13475 && (CONST_INT_P (src)
13476 || CONST_WIDE_INT_P (src)
13477 || CONST_DOUBLE_P (src)
13478 || GET_CODE (src) == CONST_VECTOR))
13479 {
13480 if (dest_gpr_p)
13481 return "#";
13482
13483 else if ((dest_vmx_p && TARGET_ALTIVEC)
13484 || (dest_vsx_p && TARGET_VSX))
13485 return output_vec_const_move (operands);
13486 }
13487
13488 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
13489 }
13490
13491 /* Validate a 128-bit move. */
13492 bool
13493 rs6000_move_128bit_ok_p (rtx operands[])
13494 {
13495 machine_mode mode = GET_MODE (operands[0]);
13496 return (gpc_reg_operand (operands[0], mode)
13497 || gpc_reg_operand (operands[1], mode));
13498 }
13499
13500 /* Return true if a 128-bit move needs to be split. */
13501 bool
13502 rs6000_split_128bit_ok_p (rtx operands[])
13503 {
13504 if (!reload_completed)
13505 return false;
13506
13507 if (!gpr_or_gpr_p (operands[0], operands[1]))
13508 return false;
13509
13510 if (quad_load_store_p (operands[0], operands[1]))
13511 return false;
13512
13513 return true;
13514 }
13515
13516 \f
13517 /* Given a comparison operation, return the bit number in CCR to test. We
13518 know this is a valid comparison.
13519
13520 SCC_P is 1 if this is for an scc. That means that %D will have been
13521 used instead of %C, so the bits will be in different places.
13522
13523 Return -1 if OP isn't a valid comparison for some reason. */
13524
13525 int
13526 ccr_bit (rtx op, int scc_p)
13527 {
13528 enum rtx_code code = GET_CODE (op);
13529 machine_mode cc_mode;
13530 int cc_regnum;
13531 int base_bit;
13532 rtx reg;
13533
13534 if (!COMPARISON_P (op))
13535 return -1;
13536
13537 reg = XEXP (op, 0);
13538
13539 if (!REG_P (reg) || !CR_REGNO_P (REGNO (reg)))
13540 return -1;
13541
13542 cc_mode = GET_MODE (reg);
13543 cc_regnum = REGNO (reg);
13544 base_bit = 4 * (cc_regnum - CR0_REGNO);
13545
13546 validate_condition_mode (code, cc_mode);
13547
13548 /* When generating a sCOND operation, only positive conditions are
13549 allowed. */
13550 if (scc_p)
13551 switch (code)
13552 {
13553 case EQ:
13554 case GT:
13555 case LT:
13556 case UNORDERED:
13557 case GTU:
13558 case LTU:
13559 break;
13560 default:
13561 return -1;
13562 }
13563
13564 switch (code)
13565 {
13566 case NE:
13567 return scc_p ? base_bit + 3 : base_bit + 2;
13568 case EQ:
13569 return base_bit + 2;
13570 case GT: case GTU: case UNLE:
13571 return base_bit + 1;
13572 case LT: case LTU: case UNGE:
13573 return base_bit;
13574 case ORDERED: case UNORDERED:
13575 return base_bit + 3;
13576
13577 case GE: case GEU:
13578 /* If scc, we will have done a cror to put the bit in the
13579 unordered position. So test that bit. For integer, this is ! LT
13580 unless this is an scc insn. */
13581 return scc_p ? base_bit + 3 : base_bit;
13582
13583 case LE: case LEU:
13584 return scc_p ? base_bit + 3 : base_bit + 1;
13585
13586 default:
13587 return -1;
13588 }
13589 }
13590 \f
13591 /* Return the GOT register. */
13592
13593 rtx
13594 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
13595 {
13596 /* The second flow pass currently (June 1999) can't update
13597 regs_ever_live without disturbing other parts of the compiler, so
13598 update it here to make the prolog/epilogue code happy. */
13599 if (!can_create_pseudo_p ()
13600 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
13601 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
13602
13603 crtl->uses_pic_offset_table = 1;
13604
13605 return pic_offset_table_rtx;
13606 }
13607 \f
13608 #define INT_P(X) (CONST_INT_P (X) && GET_MODE (X) == VOIDmode)
13609
13610 /* Write out a function code label. */
13611
13612 void
13613 rs6000_output_function_entry (FILE *file, const char *fname)
13614 {
13615 if (fname[0] != '.')
13616 {
13617 switch (DEFAULT_ABI)
13618 {
13619 default:
13620 gcc_unreachable ();
13621
13622 case ABI_AIX:
13623 if (DOT_SYMBOLS)
13624 putc ('.', file);
13625 else
13626 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
13627 break;
13628
13629 case ABI_ELFv2:
13630 case ABI_V4:
13631 case ABI_DARWIN:
13632 break;
13633 }
13634 }
13635
13636 RS6000_OUTPUT_BASENAME (file, fname);
13637 }
13638
13639 /* Print an operand. Recognize special options, documented below. */
13640
13641 #if TARGET_ELF
13642 /* Access to .sdata2 through r2 (see -msdata=eabi in invoke.texi) is
13643 only introduced by the linker, when applying the sda21
13644 relocation. */
13645 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
13646 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
13647 #else
13648 #define SMALL_DATA_RELOC "sda21"
13649 #define SMALL_DATA_REG 0
13650 #endif
13651
13652 void
13653 print_operand (FILE *file, rtx x, int code)
13654 {
13655 int i;
13656 unsigned HOST_WIDE_INT uval;
13657
13658 switch (code)
13659 {
13660 /* %a is output_address. */
13661
13662 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
13663 output_operand. */
13664
13665 case 'A':
13666 /* Write the MMA accumulator number associated with VSX register X. */
13667 if (!REG_P (x) || !FP_REGNO_P (REGNO (x)) || (REGNO (x) % 4) != 0)
13668 output_operand_lossage ("invalid %%A value");
13669 else
13670 fprintf (file, "%d", (REGNO (x) - FIRST_FPR_REGNO) / 4);
13671 return;
13672
13673 case 'D':
13674 /* Like 'J' but get to the GT bit only. */
13675 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13676 {
13677 output_operand_lossage ("invalid %%D value");
13678 return;
13679 }
13680
13681 /* Bit 1 is GT bit. */
13682 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
13683
13684 /* Add one for shift count in rlinm for scc. */
13685 fprintf (file, "%d", i + 1);
13686 return;
13687
13688 case 'e':
13689 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
13690 if (! INT_P (x))
13691 {
13692 output_operand_lossage ("invalid %%e value");
13693 return;
13694 }
13695
13696 uval = INTVAL (x);
13697 if ((uval & 0xffff) == 0 && uval != 0)
13698 putc ('s', file);
13699 return;
13700
13701 case 'E':
13702 /* X is a CR register. Print the number of the EQ bit of the CR */
13703 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13704 output_operand_lossage ("invalid %%E value");
13705 else
13706 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
13707 return;
13708
13709 case 'f':
13710 /* X is a CR register. Print the shift count needed to move it
13711 to the high-order four bits. */
13712 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13713 output_operand_lossage ("invalid %%f value");
13714 else
13715 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
13716 return;
13717
13718 case 'F':
13719 /* Similar, but print the count for the rotate in the opposite
13720 direction. */
13721 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13722 output_operand_lossage ("invalid %%F value");
13723 else
13724 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
13725 return;
13726
13727 case 'G':
13728 /* X is a constant integer. If it is negative, print "m",
13729 otherwise print "z". This is to make an aze or ame insn. */
13730 if (!CONST_INT_P (x))
13731 output_operand_lossage ("invalid %%G value");
13732 else if (INTVAL (x) >= 0)
13733 putc ('z', file);
13734 else
13735 putc ('m', file);
13736 return;
13737
13738 case 'h':
13739 /* If constant, output low-order five bits. Otherwise, write
13740 normally. */
13741 if (INT_P (x))
13742 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
13743 else
13744 print_operand (file, x, 0);
13745 return;
13746
13747 case 'H':
13748 /* If constant, output low-order six bits. Otherwise, write
13749 normally. */
13750 if (INT_P (x))
13751 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
13752 else
13753 print_operand (file, x, 0);
13754 return;
13755
13756 case 'I':
13757 /* Print `i' if this is a constant, else nothing. */
13758 if (INT_P (x))
13759 putc ('i', file);
13760 return;
13761
13762 case 'j':
13763 /* Write the bit number in CCR for jump. */
13764 i = ccr_bit (x, 0);
13765 if (i == -1)
13766 output_operand_lossage ("invalid %%j code");
13767 else
13768 fprintf (file, "%d", i);
13769 return;
13770
13771 case 'J':
13772 /* Similar, but add one for shift count in rlinm for scc and pass
13773 scc flag to `ccr_bit'. */
13774 i = ccr_bit (x, 1);
13775 if (i == -1)
13776 output_operand_lossage ("invalid %%J code");
13777 else
13778 /* If we want bit 31, write a shift count of zero, not 32. */
13779 fprintf (file, "%d", i == 31 ? 0 : i + 1);
13780 return;
13781
13782 case 'k':
13783 /* X must be a constant. Write the 1's complement of the
13784 constant. */
13785 if (! INT_P (x))
13786 output_operand_lossage ("invalid %%k value");
13787 else
13788 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
13789 return;
13790
13791 case 'K':
13792 /* X must be a symbolic constant on ELF. Write an
13793 expression suitable for an 'addi' that adds in the low 16
13794 bits of the MEM. */
13795 if (GET_CODE (x) == CONST)
13796 {
13797 if (GET_CODE (XEXP (x, 0)) != PLUS
13798 || (!SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
13799 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
13800 || !CONST_INT_P (XEXP (XEXP (x, 0), 1)))
13801 output_operand_lossage ("invalid %%K value");
13802 }
13803 print_operand_address (file, x);
13804 fputs ("@l", file);
13805 return;
13806
13807 /* %l is output_asm_label. */
13808
13809 case 'L':
13810 /* Write second word of DImode or DFmode reference. Works on register
13811 or non-indexed memory only. */
13812 if (REG_P (x))
13813 fputs (reg_names[REGNO (x) + 1], file);
13814 else if (MEM_P (x))
13815 {
13816 machine_mode mode = GET_MODE (x);
13817 /* Handle possible auto-increment. Since it is pre-increment and
13818 we have already done it, we can just use an offset of word. */
13819 if (GET_CODE (XEXP (x, 0)) == PRE_INC
13820 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
13821 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
13822 UNITS_PER_WORD));
13823 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
13824 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
13825 UNITS_PER_WORD));
13826 else
13827 output_address (mode, XEXP (adjust_address_nv (x, SImode,
13828 UNITS_PER_WORD),
13829 0));
13830
13831 if (small_data_operand (x, GET_MODE (x)))
13832 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
13833 reg_names[SMALL_DATA_REG]);
13834 }
13835 return;
13836
13837 case 'N': /* Unused */
13838 /* Write the number of elements in the vector times 4. */
13839 if (GET_CODE (x) != PARALLEL)
13840 output_operand_lossage ("invalid %%N value");
13841 else
13842 fprintf (file, "%d", XVECLEN (x, 0) * 4);
13843 return;
13844
13845 case 'O': /* Unused */
13846 /* Similar, but subtract 1 first. */
13847 if (GET_CODE (x) != PARALLEL)
13848 output_operand_lossage ("invalid %%O value");
13849 else
13850 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
13851 return;
13852
13853 case 'p':
13854 /* X is a CONST_INT that is a power of two. Output the logarithm. */
13855 if (! INT_P (x)
13856 || INTVAL (x) < 0
13857 || (i = exact_log2 (INTVAL (x))) < 0)
13858 output_operand_lossage ("invalid %%p value");
13859 else
13860 fprintf (file, "%d", i);
13861 return;
13862
13863 case 'P':
13864 /* The operand must be an indirect memory reference. The result
13865 is the register name. */
13866 if (!MEM_P (x) || !REG_P (XEXP (x, 0))
13867 || REGNO (XEXP (x, 0)) >= 32)
13868 output_operand_lossage ("invalid %%P value");
13869 else
13870 fputs (reg_names[REGNO (XEXP (x, 0))], file);
13871 return;
13872
13873 case 'q':
13874 /* This outputs the logical code corresponding to a boolean
13875 expression. The expression may have one or both operands
13876 negated (if one, only the first one). For condition register
13877 logical operations, it will also treat the negated
13878 CR codes as NOTs, but not handle NOTs of them. */
13879 {
13880 const char *const *t = 0;
13881 const char *s;
13882 enum rtx_code code = GET_CODE (x);
13883 static const char * const tbl[3][3] = {
13884 { "and", "andc", "nor" },
13885 { "or", "orc", "nand" },
13886 { "xor", "eqv", "xor" } };
13887
13888 if (code == AND)
13889 t = tbl[0];
13890 else if (code == IOR)
13891 t = tbl[1];
13892 else if (code == XOR)
13893 t = tbl[2];
13894 else
13895 output_operand_lossage ("invalid %%q value");
13896
13897 if (GET_CODE (XEXP (x, 0)) != NOT)
13898 s = t[0];
13899 else
13900 {
13901 if (GET_CODE (XEXP (x, 1)) == NOT)
13902 s = t[2];
13903 else
13904 s = t[1];
13905 }
13906
13907 fputs (s, file);
13908 }
13909 return;
13910
13911 case 'Q':
13912 if (! TARGET_MFCRF)
13913 return;
13914 fputc (',', file);
13915 /* FALLTHRU */
13916
13917 case 'R':
13918 /* X is a CR register. Print the mask for `mtcrf'. */
13919 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13920 output_operand_lossage ("invalid %%R value");
13921 else
13922 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
13923 return;
13924
13925 case 's':
13926 /* Low 5 bits of 32 - value */
13927 if (! INT_P (x))
13928 output_operand_lossage ("invalid %%s value");
13929 else
13930 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
13931 return;
13932
13933 case 't':
13934 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
13935 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13936 {
13937 output_operand_lossage ("invalid %%t value");
13938 return;
13939 }
13940
13941 /* Bit 3 is OV bit. */
13942 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
13943
13944 /* If we want bit 31, write a shift count of zero, not 32. */
13945 fprintf (file, "%d", i == 31 ? 0 : i + 1);
13946 return;
13947
13948 case 'T':
13949 /* Print the symbolic name of a branch target register. */
13950 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
13951 x = XVECEXP (x, 0, 0);
13952 if (!REG_P (x) || (REGNO (x) != LR_REGNO
13953 && REGNO (x) != CTR_REGNO))
13954 output_operand_lossage ("invalid %%T value");
13955 else if (REGNO (x) == LR_REGNO)
13956 fputs ("lr", file);
13957 else
13958 fputs ("ctr", file);
13959 return;
13960
13961 case 'u':
13962 /* High-order or low-order 16 bits of constant, whichever is non-zero,
13963 for use in unsigned operand. */
13964 if (! INT_P (x))
13965 {
13966 output_operand_lossage ("invalid %%u value");
13967 return;
13968 }
13969
13970 uval = INTVAL (x);
13971 if ((uval & 0xffff) == 0)
13972 uval >>= 16;
13973
13974 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
13975 return;
13976
13977 case 'v':
13978 /* High-order 16 bits of constant for use in signed operand. */
13979 if (! INT_P (x))
13980 output_operand_lossage ("invalid %%v value");
13981 else
13982 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
13983 (INTVAL (x) >> 16) & 0xffff);
13984 return;
13985
13986 case 'U':
13987 /* Print `u' if this has an auto-increment or auto-decrement. */
13988 if (MEM_P (x)
13989 && (GET_CODE (XEXP (x, 0)) == PRE_INC
13990 || GET_CODE (XEXP (x, 0)) == PRE_DEC
13991 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
13992 putc ('u', file);
13993 return;
13994
13995 case 'V':
13996 /* Print the trap code for this operand. */
13997 switch (GET_CODE (x))
13998 {
13999 case EQ:
14000 fputs ("eq", file); /* 4 */
14001 break;
14002 case NE:
14003 fputs ("ne", file); /* 24 */
14004 break;
14005 case LT:
14006 fputs ("lt", file); /* 16 */
14007 break;
14008 case LE:
14009 fputs ("le", file); /* 20 */
14010 break;
14011 case GT:
14012 fputs ("gt", file); /* 8 */
14013 break;
14014 case GE:
14015 fputs ("ge", file); /* 12 */
14016 break;
14017 case LTU:
14018 fputs ("llt", file); /* 2 */
14019 break;
14020 case LEU:
14021 fputs ("lle", file); /* 6 */
14022 break;
14023 case GTU:
14024 fputs ("lgt", file); /* 1 */
14025 break;
14026 case GEU:
14027 fputs ("lge", file); /* 5 */
14028 break;
14029 default:
14030 output_operand_lossage ("invalid %%V value");
14031 }
14032 break;
14033
14034 case 'w':
14035 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
14036 normally. */
14037 if (INT_P (x))
14038 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
14039 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
14040 else
14041 print_operand (file, x, 0);
14042 return;
14043
14044 case 'x':
14045 /* X is a FPR or Altivec register used in a VSX context. */
14046 if (!REG_P (x) || !VSX_REGNO_P (REGNO (x)))
14047 output_operand_lossage ("invalid %%x value");
14048 else
14049 {
14050 int reg = REGNO (x);
14051 int vsx_reg = (FP_REGNO_P (reg)
14052 ? reg - 32
14053 : reg - FIRST_ALTIVEC_REGNO + 32);
14054
14055 #ifdef TARGET_REGNAMES
14056 if (TARGET_REGNAMES)
14057 fprintf (file, "%%vs%d", vsx_reg);
14058 else
14059 #endif
14060 fprintf (file, "%d", vsx_reg);
14061 }
14062 return;
14063
14064 case 'X':
14065 if (MEM_P (x)
14066 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
14067 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
14068 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
14069 putc ('x', file);
14070 return;
14071
14072 case 'Y':
14073 /* Like 'L', for third word of TImode/PTImode */
14074 if (REG_P (x))
14075 fputs (reg_names[REGNO (x) + 2], file);
14076 else if (MEM_P (x))
14077 {
14078 machine_mode mode = GET_MODE (x);
14079 if (GET_CODE (XEXP (x, 0)) == PRE_INC
14080 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
14081 output_address (mode, plus_constant (Pmode,
14082 XEXP (XEXP (x, 0), 0), 8));
14083 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
14084 output_address (mode, plus_constant (Pmode,
14085 XEXP (XEXP (x, 0), 0), 8));
14086 else
14087 output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0));
14088 if (small_data_operand (x, GET_MODE (x)))
14089 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
14090 reg_names[SMALL_DATA_REG]);
14091 }
14092 return;
14093
14094 case 'z':
14095 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
14096 x = XVECEXP (x, 0, 1);
14097 /* X is a SYMBOL_REF. Write out the name preceded by a
14098 period and without any trailing data in brackets. Used for function
14099 names. If we are configured for System V (or the embedded ABI) on
14100 the PowerPC, do not emit the period, since those systems do not use
14101 TOCs and the like. */
14102 if (!SYMBOL_REF_P (x))
14103 {
14104 output_operand_lossage ("invalid %%z value");
14105 return;
14106 }
14107
14108 /* For macho, check to see if we need a stub. */
14109 if (TARGET_MACHO)
14110 {
14111 const char *name = XSTR (x, 0);
14112 #if TARGET_MACHO
14113 if (darwin_symbol_stubs
14114 && MACHOPIC_INDIRECT
14115 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14116 name = machopic_indirection_name (x, /*stub_p=*/true);
14117 #endif
14118 assemble_name (file, name);
14119 }
14120 else if (!DOT_SYMBOLS)
14121 assemble_name (file, XSTR (x, 0));
14122 else
14123 rs6000_output_function_entry (file, XSTR (x, 0));
14124 return;
14125
14126 case 'Z':
14127 /* Like 'L', for last word of TImode/PTImode. */
14128 if (REG_P (x))
14129 fputs (reg_names[REGNO (x) + 3], file);
14130 else if (MEM_P (x))
14131 {
14132 machine_mode mode = GET_MODE (x);
14133 if (GET_CODE (XEXP (x, 0)) == PRE_INC
14134 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
14135 output_address (mode, plus_constant (Pmode,
14136 XEXP (XEXP (x, 0), 0), 12));
14137 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
14138 output_address (mode, plus_constant (Pmode,
14139 XEXP (XEXP (x, 0), 0), 12));
14140 else
14141 output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0));
14142 if (small_data_operand (x, GET_MODE (x)))
14143 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
14144 reg_names[SMALL_DATA_REG]);
14145 }
14146 return;
14147
14148 /* Print AltiVec memory operand. */
14149 case 'y':
14150 {
14151 rtx tmp;
14152
14153 gcc_assert (MEM_P (x));
14154
14155 tmp = XEXP (x, 0);
14156
14157 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (GET_MODE (x))
14158 && GET_CODE (tmp) == AND
14159 && CONST_INT_P (XEXP (tmp, 1))
14160 && INTVAL (XEXP (tmp, 1)) == -16)
14161 tmp = XEXP (tmp, 0);
14162 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
14163 && GET_CODE (tmp) == PRE_MODIFY)
14164 tmp = XEXP (tmp, 1);
14165 if (REG_P (tmp))
14166 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
14167 else
14168 {
14169 if (GET_CODE (tmp) != PLUS
14170 || !REG_P (XEXP (tmp, 0))
14171 || !REG_P (XEXP (tmp, 1)))
14172 {
14173 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
14174 break;
14175 }
14176
14177 if (REGNO (XEXP (tmp, 0)) == 0)
14178 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
14179 reg_names[ REGNO (XEXP (tmp, 0)) ]);
14180 else
14181 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
14182 reg_names[ REGNO (XEXP (tmp, 1)) ]);
14183 }
14184 break;
14185 }
14186
14187 case 0:
14188 if (REG_P (x))
14189 fprintf (file, "%s", reg_names[REGNO (x)]);
14190 else if (MEM_P (x))
14191 {
14192 /* We need to handle PRE_INC and PRE_DEC here, since we need to
14193 know the width from the mode. */
14194 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
14195 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
14196 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
14197 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
14198 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
14199 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
14200 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
14201 output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
14202 else
14203 output_address (GET_MODE (x), XEXP (x, 0));
14204 }
14205 else if (toc_relative_expr_p (x, false,
14206 &tocrel_base_oac, &tocrel_offset_oac))
14207 /* This hack along with a corresponding hack in
14208 rs6000_output_addr_const_extra arranges to output addends
14209 where the assembler expects to find them. eg.
14210 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
14211 without this hack would be output as "x@toc+4". We
14212 want "x+4@toc". */
14213 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
14214 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
14215 output_addr_const (file, XVECEXP (x, 0, 0));
14216 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
14217 output_addr_const (file, XVECEXP (x, 0, 1));
14218 else
14219 output_addr_const (file, x);
14220 return;
14221
14222 case '&':
14223 if (const char *name = get_some_local_dynamic_name ())
14224 assemble_name (file, name);
14225 else
14226 output_operand_lossage ("'%%&' used without any "
14227 "local dynamic TLS references");
14228 return;
14229
14230 default:
14231 output_operand_lossage ("invalid %%xn code");
14232 }
14233 }
14234 \f
14235 /* Print the address of an operand. */
14236
14237 void
14238 print_operand_address (FILE *file, rtx x)
14239 {
14240 if (REG_P (x))
14241 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
14242
14243 /* Is it a PC-relative address? */
14244 else if (TARGET_PCREL && pcrel_local_or_external_address (x, VOIDmode))
14245 {
14246 HOST_WIDE_INT offset;
14247
14248 if (GET_CODE (x) == CONST)
14249 x = XEXP (x, 0);
14250
14251 if (GET_CODE (x) == PLUS)
14252 {
14253 offset = INTVAL (XEXP (x, 1));
14254 x = XEXP (x, 0);
14255 }
14256 else
14257 offset = 0;
14258
14259 output_addr_const (file, x);
14260
14261 if (offset)
14262 fprintf (file, "%+" PRId64, offset);
14263
14264 if (SYMBOL_REF_P (x) && !SYMBOL_REF_LOCAL_P (x))
14265 fprintf (file, "@got");
14266
14267 fprintf (file, "@pcrel");
14268 }
14269 else if (SYMBOL_REF_P (x) || GET_CODE (x) == CONST
14270 || GET_CODE (x) == LABEL_REF)
14271 {
14272 output_addr_const (file, x);
14273 if (small_data_operand (x, GET_MODE (x)))
14274 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
14275 reg_names[SMALL_DATA_REG]);
14276 else
14277 gcc_assert (!TARGET_TOC);
14278 }
14279 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
14280 && REG_P (XEXP (x, 1)))
14281 {
14282 if (REGNO (XEXP (x, 0)) == 0)
14283 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
14284 reg_names[ REGNO (XEXP (x, 0)) ]);
14285 else
14286 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
14287 reg_names[ REGNO (XEXP (x, 1)) ]);
14288 }
14289 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
14290 && CONST_INT_P (XEXP (x, 1)))
14291 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
14292 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
14293 #if TARGET_MACHO
14294 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
14295 && CONSTANT_P (XEXP (x, 1)))
14296 {
14297 fprintf (file, "lo16(");
14298 output_addr_const (file, XEXP (x, 1));
14299 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
14300 }
14301 #endif
14302 #if TARGET_ELF
14303 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
14304 && CONSTANT_P (XEXP (x, 1)))
14305 {
14306 output_addr_const (file, XEXP (x, 1));
14307 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
14308 }
14309 #endif
14310 else if (toc_relative_expr_p (x, false, &tocrel_base_oac, &tocrel_offset_oac))
14311 {
14312 /* This hack along with a corresponding hack in
14313 rs6000_output_addr_const_extra arranges to output addends
14314 where the assembler expects to find them. eg.
14315 (lo_sum (reg 9)
14316 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
14317 without this hack would be output as "x@toc+8@l(9)". We
14318 want "x+8@toc@l(9)". */
14319 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
14320 if (GET_CODE (x) == LO_SUM)
14321 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
14322 else
14323 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base_oac, 0, 1))]);
14324 }
14325 else
14326 output_addr_const (file, x);
14327 }
14328 \f
14329 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14330
14331 bool
14332 rs6000_output_addr_const_extra (FILE *file, rtx x)
14333 {
14334 if (GET_CODE (x) == UNSPEC)
14335 switch (XINT (x, 1))
14336 {
14337 case UNSPEC_TOCREL:
14338 gcc_checking_assert (SYMBOL_REF_P (XVECEXP (x, 0, 0))
14339 && REG_P (XVECEXP (x, 0, 1))
14340 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
14341 output_addr_const (file, XVECEXP (x, 0, 0));
14342 if (x == tocrel_base_oac && tocrel_offset_oac != const0_rtx)
14343 {
14344 if (INTVAL (tocrel_offset_oac) >= 0)
14345 fprintf (file, "+");
14346 output_addr_const (file, CONST_CAST_RTX (tocrel_offset_oac));
14347 }
14348 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
14349 {
14350 putc ('-', file);
14351 assemble_name (file, toc_label_name);
14352 need_toc_init = 1;
14353 }
14354 else if (TARGET_ELF)
14355 fputs ("@toc", file);
14356 return true;
14357
14358 #if TARGET_MACHO
14359 case UNSPEC_MACHOPIC_OFFSET:
14360 output_addr_const (file, XVECEXP (x, 0, 0));
14361 putc ('-', file);
14362 machopic_output_function_base_name (file);
14363 return true;
14364 #endif
14365 }
14366 return false;
14367 }
14368 \f
14369 /* Target hook for assembling integer objects. The PowerPC version has
14370 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
14371 is defined. It also needs to handle DI-mode objects on 64-bit
14372 targets. */
14373
14374 static bool
14375 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
14376 {
14377 #ifdef RELOCATABLE_NEEDS_FIXUP
14378 /* Special handling for SI values. */
14379 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
14380 {
14381 static int recurse = 0;
14382
14383 /* For -mrelocatable, we mark all addresses that need to be fixed up in
14384 the .fixup section. Since the TOC section is already relocated, we
14385 don't need to mark it here. We used to skip the text section, but it
14386 should never be valid for relocated addresses to be placed in the text
14387 section. */
14388 if (DEFAULT_ABI == ABI_V4
14389 && (TARGET_RELOCATABLE || flag_pic > 1)
14390 && in_section != toc_section
14391 && !recurse
14392 && !CONST_SCALAR_INT_P (x)
14393 && CONSTANT_P (x))
14394 {
14395 char buf[256];
14396
14397 recurse = 1;
14398 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
14399 fixuplabelno++;
14400 ASM_OUTPUT_LABEL (asm_out_file, buf);
14401 fprintf (asm_out_file, "\t.long\t(");
14402 output_addr_const (asm_out_file, x);
14403 fprintf (asm_out_file, ")@fixup\n");
14404 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
14405 ASM_OUTPUT_ALIGN (asm_out_file, 2);
14406 fprintf (asm_out_file, "\t.long\t");
14407 assemble_name (asm_out_file, buf);
14408 fprintf (asm_out_file, "\n\t.previous\n");
14409 recurse = 0;
14410 return true;
14411 }
14412 /* Remove initial .'s to turn a -mcall-aixdesc function
14413 address into the address of the descriptor, not the function
14414 itself. */
14415 else if (SYMBOL_REF_P (x)
14416 && XSTR (x, 0)[0] == '.'
14417 && DEFAULT_ABI == ABI_AIX)
14418 {
14419 const char *name = XSTR (x, 0);
14420 while (*name == '.')
14421 name++;
14422
14423 fprintf (asm_out_file, "\t.long\t%s\n", name);
14424 return true;
14425 }
14426 }
14427 #endif /* RELOCATABLE_NEEDS_FIXUP */
14428 return default_assemble_integer (x, size, aligned_p);
14429 }
14430
14431 /* Return a template string for assembly to emit when making an
14432 external call. FUNOP is the call mem argument operand number. */
14433
14434 static const char *
14435 rs6000_call_template_1 (rtx *operands, unsigned int funop, bool sibcall)
14436 {
14437 /* -Wformat-overflow workaround, without which gcc thinks that %u
14438 might produce 10 digits. */
14439 gcc_assert (funop <= MAX_RECOG_OPERANDS);
14440
14441 char arg[12];
14442 arg[0] = 0;
14443 if (GET_CODE (operands[funop + 1]) == UNSPEC)
14444 {
14445 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
14446 sprintf (arg, "(%%%u@tlsgd)", funop + 1);
14447 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
14448 sprintf (arg, "(%%&@tlsld)");
14449 }
14450
14451 /* The magic 32768 offset here corresponds to the offset of
14452 r30 in .got2, as given by LCTOC1. See sysv4.h:toc_section. */
14453 char z[11];
14454 sprintf (z, "%%z%u%s", funop,
14455 (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic == 2
14456 ? "+32768" : ""));
14457
14458 static char str[32]; /* 1 spare */
14459 if (rs6000_pcrel_p ())
14460 sprintf (str, "b%s %s@notoc%s", sibcall ? "" : "l", z, arg);
14461 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
14462 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
14463 sibcall ? "" : "\n\tnop");
14464 else if (DEFAULT_ABI == ABI_V4)
14465 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
14466 flag_pic ? "@plt" : "");
14467 #if TARGET_MACHO
14468 /* If/when we remove the mlongcall opt, we can share the AIX/ELGv2 case. */
14469 else if (DEFAULT_ABI == ABI_DARWIN)
14470 {
14471 /* The cookie is in operand func+2. */
14472 gcc_checking_assert (GET_CODE (operands[funop + 2]) == CONST_INT);
14473 int cookie = INTVAL (operands[funop + 2]);
14474 if (cookie & CALL_LONG)
14475 {
14476 tree funname = get_identifier (XSTR (operands[funop], 0));
14477 tree labelname = get_prev_label (funname);
14478 gcc_checking_assert (labelname && !sibcall);
14479
14480 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
14481 instruction will reach 'foo', otherwise link as 'bl L42'".
14482 "L42" should be a 'branch island', that will do a far jump to
14483 'foo'. Branch islands are generated in
14484 macho_branch_islands(). */
14485 sprintf (str, "jbsr %%z%u,%.10s", funop,
14486 IDENTIFIER_POINTER (labelname));
14487 }
14488 else
14489 /* Same as AIX or ELFv2, except to keep backwards compat, no nop
14490 after the call. */
14491 sprintf (str, "b%s %s%s", sibcall ? "" : "l", z, arg);
14492 }
14493 #endif
14494 else
14495 gcc_unreachable ();
14496 return str;
14497 }
14498
14499 const char *
14500 rs6000_call_template (rtx *operands, unsigned int funop)
14501 {
14502 return rs6000_call_template_1 (operands, funop, false);
14503 }
14504
14505 const char *
14506 rs6000_sibcall_template (rtx *operands, unsigned int funop)
14507 {
14508 return rs6000_call_template_1 (operands, funop, true);
14509 }
14510
14511 /* As above, for indirect calls. */
14512
14513 static const char *
14514 rs6000_indirect_call_template_1 (rtx *operands, unsigned int funop,
14515 bool sibcall)
14516 {
14517 /* -Wformat-overflow workaround, without which gcc thinks that %u
14518 might produce 10 digits. Note that -Wformat-overflow will not
14519 currently warn here for str[], so do not rely on a warning to
14520 ensure str[] is correctly sized. */
14521 gcc_assert (funop <= MAX_RECOG_OPERANDS);
14522
14523 /* Currently, funop is either 0 or 1. The maximum string is always
14524 a !speculate 64-bit __tls_get_addr call.
14525
14526 ABI_ELFv2, pcrel:
14527 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14528 . 35 .reloc .,R_PPC64_PLTSEQ_NOTOC,%z1\n\t
14529 . 9 crset 2\n\t
14530 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14531 . 36 .reloc .,R_PPC64_PLTCALL_NOTOC,%z1\n\t
14532 . 8 beq%T1l-
14533 .---
14534 .142
14535
14536 ABI_AIX:
14537 . 9 ld 2,%3\n\t
14538 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14539 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
14540 . 9 crset 2\n\t
14541 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14542 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
14543 . 10 beq%T1l-\n\t
14544 . 10 ld 2,%4(1)
14545 .---
14546 .151
14547
14548 ABI_ELFv2:
14549 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14550 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
14551 . 9 crset 2\n\t
14552 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14553 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
14554 . 10 beq%T1l-\n\t
14555 . 10 ld 2,%3(1)
14556 .---
14557 .142
14558
14559 ABI_V4:
14560 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14561 . 35 .reloc .,R_PPC64_PLTSEQ,%z1+32768\n\t
14562 . 9 crset 2\n\t
14563 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14564 . 36 .reloc .,R_PPC64_PLTCALL,%z1+32768\n\t
14565 . 8 beq%T1l-
14566 .---
14567 .141 */
14568 static char str[160]; /* 8 spare */
14569 char *s = str;
14570 const char *ptrload = TARGET_64BIT ? "d" : "wz";
14571
14572 if (DEFAULT_ABI == ABI_AIX)
14573 s += sprintf (s,
14574 "l%s 2,%%%u\n\t",
14575 ptrload, funop + 3);
14576
14577 /* We don't need the extra code to stop indirect call speculation if
14578 calling via LR. */
14579 bool speculate = (TARGET_MACHO
14580 || rs6000_speculate_indirect_jumps
14581 || (REG_P (operands[funop])
14582 && REGNO (operands[funop]) == LR_REGNO));
14583
14584 if (TARGET_PLTSEQ && GET_CODE (operands[funop]) == UNSPEC)
14585 {
14586 const char *rel64 = TARGET_64BIT ? "64" : "";
14587 char tls[29];
14588 tls[0] = 0;
14589 if (GET_CODE (operands[funop + 1]) == UNSPEC)
14590 {
14591 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
14592 sprintf (tls, ".reloc .,R_PPC%s_TLSGD,%%%u\n\t",
14593 rel64, funop + 1);
14594 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
14595 sprintf (tls, ".reloc .,R_PPC%s_TLSLD,%%&\n\t",
14596 rel64);
14597 }
14598
14599 const char *notoc = rs6000_pcrel_p () ? "_NOTOC" : "";
14600 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
14601 && flag_pic == 2 ? "+32768" : "");
14602 if (!speculate)
14603 {
14604 s += sprintf (s,
14605 "%s.reloc .,R_PPC%s_PLTSEQ%s,%%z%u%s\n\t",
14606 tls, rel64, notoc, funop, addend);
14607 s += sprintf (s, "crset 2\n\t");
14608 }
14609 s += sprintf (s,
14610 "%s.reloc .,R_PPC%s_PLTCALL%s,%%z%u%s\n\t",
14611 tls, rel64, notoc, funop, addend);
14612 }
14613 else if (!speculate)
14614 s += sprintf (s, "crset 2\n\t");
14615
14616 if (rs6000_pcrel_p ())
14617 {
14618 if (speculate)
14619 sprintf (s, "b%%T%ul", funop);
14620 else
14621 sprintf (s, "beq%%T%ul-", funop);
14622 }
14623 else if (DEFAULT_ABI == ABI_AIX)
14624 {
14625 if (speculate)
14626 sprintf (s,
14627 "b%%T%ul\n\t"
14628 "l%s 2,%%%u(1)",
14629 funop, ptrload, funop + 4);
14630 else
14631 sprintf (s,
14632 "beq%%T%ul-\n\t"
14633 "l%s 2,%%%u(1)",
14634 funop, ptrload, funop + 4);
14635 }
14636 else if (DEFAULT_ABI == ABI_ELFv2)
14637 {
14638 if (speculate)
14639 sprintf (s,
14640 "b%%T%ul\n\t"
14641 "l%s 2,%%%u(1)",
14642 funop, ptrload, funop + 3);
14643 else
14644 sprintf (s,
14645 "beq%%T%ul-\n\t"
14646 "l%s 2,%%%u(1)",
14647 funop, ptrload, funop + 3);
14648 }
14649 else
14650 {
14651 if (speculate)
14652 sprintf (s,
14653 "b%%T%u%s",
14654 funop, sibcall ? "" : "l");
14655 else
14656 sprintf (s,
14657 "beq%%T%u%s-%s",
14658 funop, sibcall ? "" : "l", sibcall ? "\n\tb $" : "");
14659 }
14660 return str;
14661 }
14662
14663 const char *
14664 rs6000_indirect_call_template (rtx *operands, unsigned int funop)
14665 {
14666 return rs6000_indirect_call_template_1 (operands, funop, false);
14667 }
14668
14669 const char *
14670 rs6000_indirect_sibcall_template (rtx *operands, unsigned int funop)
14671 {
14672 return rs6000_indirect_call_template_1 (operands, funop, true);
14673 }
14674
14675 #if HAVE_AS_PLTSEQ
14676 /* Output indirect call insns. WHICH identifies the type of sequence. */
14677 const char *
14678 rs6000_pltseq_template (rtx *operands, int which)
14679 {
14680 const char *rel64 = TARGET_64BIT ? "64" : "";
14681 char tls[30];
14682 tls[0] = 0;
14683 if (GET_CODE (operands[3]) == UNSPEC)
14684 {
14685 char off = which == RS6000_PLTSEQ_PLT_PCREL34 ? '8' : '4';
14686 if (XINT (operands[3], 1) == UNSPEC_TLSGD)
14687 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSGD,%%3\n\t",
14688 off, rel64);
14689 else if (XINT (operands[3], 1) == UNSPEC_TLSLD)
14690 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSLD,%%&\n\t",
14691 off, rel64);
14692 }
14693
14694 gcc_assert (DEFAULT_ABI == ABI_ELFv2 || DEFAULT_ABI == ABI_V4);
14695 static char str[96]; /* 10 spare */
14696 char off = WORDS_BIG_ENDIAN ? '2' : '4';
14697 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
14698 && flag_pic == 2 ? "+32768" : "");
14699 switch (which)
14700 {
14701 case RS6000_PLTSEQ_TOCSAVE:
14702 sprintf (str,
14703 "st%s\n\t"
14704 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2",
14705 TARGET_64BIT ? "d 2,24(1)" : "w 2,12(1)",
14706 tls, rel64);
14707 break;
14708 case RS6000_PLTSEQ_PLT16_HA:
14709 if (DEFAULT_ABI == ABI_V4 && !flag_pic)
14710 sprintf (str,
14711 "lis %%0,0\n\t"
14712 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2",
14713 tls, off, rel64);
14714 else
14715 sprintf (str,
14716 "addis %%0,%%1,0\n\t"
14717 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2%s",
14718 tls, off, rel64, addend);
14719 break;
14720 case RS6000_PLTSEQ_PLT16_LO:
14721 sprintf (str,
14722 "l%s %%0,0(%%1)\n\t"
14723 "%s.reloc .-%c,R_PPC%s_PLT16_LO%s,%%z2%s",
14724 TARGET_64BIT ? "d" : "wz",
14725 tls, off, rel64, TARGET_64BIT ? "_DS" : "", addend);
14726 break;
14727 case RS6000_PLTSEQ_MTCTR:
14728 sprintf (str,
14729 "mtctr %%1\n\t"
14730 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2%s",
14731 tls, rel64, addend);
14732 break;
14733 case RS6000_PLTSEQ_PLT_PCREL34:
14734 sprintf (str,
14735 "pl%s %%0,0(0),1\n\t"
14736 "%s.reloc .-8,R_PPC%s_PLT_PCREL34_NOTOC,%%z2",
14737 TARGET_64BIT ? "d" : "wz",
14738 tls, rel64);
14739 break;
14740 default:
14741 gcc_unreachable ();
14742 }
14743 return str;
14744 }
14745 #endif
14746 \f
14747 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
14748 /* Emit an assembler directive to set symbol visibility for DECL to
14749 VISIBILITY_TYPE. */
14750
14751 static void
14752 rs6000_assemble_visibility (tree decl, int vis)
14753 {
14754 if (TARGET_XCOFF)
14755 return;
14756
14757 /* Functions need to have their entry point symbol visibility set as
14758 well as their descriptor symbol visibility. */
14759 if (DEFAULT_ABI == ABI_AIX
14760 && DOT_SYMBOLS
14761 && TREE_CODE (decl) == FUNCTION_DECL)
14762 {
14763 static const char * const visibility_types[] = {
14764 NULL, "protected", "hidden", "internal"
14765 };
14766
14767 const char *name, *type;
14768
14769 name = ((* targetm.strip_name_encoding)
14770 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
14771 type = visibility_types[vis];
14772
14773 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
14774 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
14775 }
14776 else
14777 default_assemble_visibility (decl, vis);
14778 }
14779 #endif
14780 \f
14781 /* Write PATCH_AREA_SIZE NOPs into the asm outfile FILE around a function
14782 entry. If RECORD_P is true and the target supports named sections,
14783 the location of the NOPs will be recorded in a special object section
14784 called "__patchable_function_entries". This routine may be called
14785 twice per function to put NOPs before and after the function
14786 entry. */
14787
14788 void
14789 rs6000_print_patchable_function_entry (FILE *file,
14790 unsigned HOST_WIDE_INT patch_area_size,
14791 bool record_p)
14792 {
14793 unsigned int flags = SECTION_WRITE | SECTION_RELRO;
14794 /* When .opd section is emitted, the function symbol
14795 default_print_patchable_function_entry_1 is emitted into the .opd section
14796 while the patchable area is emitted into the function section.
14797 Don't use SECTION_LINK_ORDER in that case. */
14798 if (!(TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
14799 && HAVE_GAS_SECTION_LINK_ORDER)
14800 flags |= SECTION_LINK_ORDER;
14801 default_print_patchable_function_entry_1 (file, patch_area_size, record_p,
14802 flags);
14803 }
14804 \f
14805 enum rtx_code
14806 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
14807 {
14808 /* Reversal of FP compares takes care -- an ordered compare
14809 becomes an unordered compare and vice versa. */
14810 if (mode == CCFPmode
14811 && (!flag_finite_math_only
14812 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
14813 || code == UNEQ || code == LTGT))
14814 return reverse_condition_maybe_unordered (code);
14815 else
14816 return reverse_condition (code);
14817 }
14818
14819 /* Generate a compare for CODE. Return a brand-new rtx that
14820 represents the result of the compare. */
14821
14822 static rtx
14823 rs6000_generate_compare (rtx cmp, machine_mode mode)
14824 {
14825 machine_mode comp_mode;
14826 rtx compare_result;
14827 enum rtx_code code = GET_CODE (cmp);
14828 rtx op0 = XEXP (cmp, 0);
14829 rtx op1 = XEXP (cmp, 1);
14830
14831 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
14832 comp_mode = CCmode;
14833 else if (FLOAT_MODE_P (mode))
14834 comp_mode = CCFPmode;
14835 else if (code == GTU || code == LTU
14836 || code == GEU || code == LEU)
14837 comp_mode = CCUNSmode;
14838 else if ((code == EQ || code == NE)
14839 && unsigned_reg_p (op0)
14840 && (unsigned_reg_p (op1)
14841 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
14842 /* These are unsigned values, perhaps there will be a later
14843 ordering compare that can be shared with this one. */
14844 comp_mode = CCUNSmode;
14845 else
14846 comp_mode = CCmode;
14847
14848 /* If we have an unsigned compare, make sure we don't have a signed value as
14849 an immediate. */
14850 if (comp_mode == CCUNSmode && CONST_INT_P (op1)
14851 && INTVAL (op1) < 0)
14852 {
14853 op0 = copy_rtx_if_shared (op0);
14854 op1 = force_reg (GET_MODE (op0), op1);
14855 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
14856 }
14857
14858 /* First, the compare. */
14859 compare_result = gen_reg_rtx (comp_mode);
14860
14861 /* IEEE 128-bit support in VSX registers when we do not have hardware
14862 support. */
14863 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
14864 {
14865 rtx libfunc = NULL_RTX;
14866 bool check_nan = false;
14867 rtx dest;
14868
14869 switch (code)
14870 {
14871 case EQ:
14872 case NE:
14873 libfunc = optab_libfunc (eq_optab, mode);
14874 break;
14875
14876 case GT:
14877 case GE:
14878 libfunc = optab_libfunc (ge_optab, mode);
14879 break;
14880
14881 case LT:
14882 case LE:
14883 libfunc = optab_libfunc (le_optab, mode);
14884 break;
14885
14886 case UNORDERED:
14887 case ORDERED:
14888 libfunc = optab_libfunc (unord_optab, mode);
14889 code = (code == UNORDERED) ? NE : EQ;
14890 break;
14891
14892 case UNGE:
14893 case UNGT:
14894 check_nan = true;
14895 libfunc = optab_libfunc (ge_optab, mode);
14896 code = (code == UNGE) ? GE : GT;
14897 break;
14898
14899 case UNLE:
14900 case UNLT:
14901 check_nan = true;
14902 libfunc = optab_libfunc (le_optab, mode);
14903 code = (code == UNLE) ? LE : LT;
14904 break;
14905
14906 case UNEQ:
14907 case LTGT:
14908 check_nan = true;
14909 libfunc = optab_libfunc (eq_optab, mode);
14910 code = (code = UNEQ) ? EQ : NE;
14911 break;
14912
14913 default:
14914 gcc_unreachable ();
14915 }
14916
14917 gcc_assert (libfunc);
14918
14919 if (!check_nan)
14920 dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
14921 SImode, op0, mode, op1, mode);
14922
14923 /* The library signals an exception for signalling NaNs, so we need to
14924 handle isgreater, etc. by first checking isordered. */
14925 else
14926 {
14927 rtx ne_rtx, normal_dest, unord_dest;
14928 rtx unord_func = optab_libfunc (unord_optab, mode);
14929 rtx join_label = gen_label_rtx ();
14930 rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label);
14931 rtx unord_cmp = gen_reg_rtx (comp_mode);
14932
14933
14934 /* Test for either value being a NaN. */
14935 gcc_assert (unord_func);
14936 unord_dest = emit_library_call_value (unord_func, NULL_RTX, LCT_CONST,
14937 SImode, op0, mode, op1, mode);
14938
14939 /* Set value (0) if either value is a NaN, and jump to the join
14940 label. */
14941 dest = gen_reg_rtx (SImode);
14942 emit_move_insn (dest, const1_rtx);
14943 emit_insn (gen_rtx_SET (unord_cmp,
14944 gen_rtx_COMPARE (comp_mode, unord_dest,
14945 const0_rtx)));
14946
14947 ne_rtx = gen_rtx_NE (comp_mode, unord_cmp, const0_rtx);
14948 emit_jump_insn (gen_rtx_SET (pc_rtx,
14949 gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
14950 join_ref,
14951 pc_rtx)));
14952
14953 /* Do the normal comparison, knowing that the values are not
14954 NaNs. */
14955 normal_dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
14956 SImode, op0, mode, op1, mode);
14957
14958 emit_insn (gen_cstoresi4 (dest,
14959 gen_rtx_fmt_ee (code, SImode, normal_dest,
14960 const0_rtx),
14961 normal_dest, const0_rtx));
14962
14963 /* Join NaN and non-Nan paths. Compare dest against 0. */
14964 emit_label (join_label);
14965 code = NE;
14966 }
14967
14968 emit_insn (gen_rtx_SET (compare_result,
14969 gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
14970 }
14971
14972 else
14973 {
14974 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
14975 CLOBBERs to match cmptf_internal2 pattern. */
14976 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
14977 && FLOAT128_IBM_P (GET_MODE (op0))
14978 && TARGET_HARD_FLOAT)
14979 emit_insn (gen_rtx_PARALLEL (VOIDmode,
14980 gen_rtvec (10,
14981 gen_rtx_SET (compare_result,
14982 gen_rtx_COMPARE (comp_mode, op0, op1)),
14983 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14984 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14985 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14986 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14987 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14988 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14989 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14990 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14991 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
14992 else if (GET_CODE (op1) == UNSPEC
14993 && XINT (op1, 1) == UNSPEC_SP_TEST)
14994 {
14995 rtx op1b = XVECEXP (op1, 0, 0);
14996 comp_mode = CCEQmode;
14997 compare_result = gen_reg_rtx (CCEQmode);
14998 if (TARGET_64BIT)
14999 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
15000 else
15001 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
15002 }
15003 else
15004 emit_insn (gen_rtx_SET (compare_result,
15005 gen_rtx_COMPARE (comp_mode, op0, op1)));
15006 }
15007
15008 validate_condition_mode (code, GET_MODE (compare_result));
15009
15010 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
15011 }
15012
15013 \f
15014 /* Return the diagnostic message string if the binary operation OP is
15015 not permitted on TYPE1 and TYPE2, NULL otherwise. */
15016
15017 static const char*
15018 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED,
15019 const_tree type1,
15020 const_tree type2)
15021 {
15022 machine_mode mode1 = TYPE_MODE (type1);
15023 machine_mode mode2 = TYPE_MODE (type2);
15024
15025 /* For complex modes, use the inner type. */
15026 if (COMPLEX_MODE_P (mode1))
15027 mode1 = GET_MODE_INNER (mode1);
15028
15029 if (COMPLEX_MODE_P (mode2))
15030 mode2 = GET_MODE_INNER (mode2);
15031
15032 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
15033 double to intermix unless -mfloat128-convert. */
15034 if (mode1 == mode2)
15035 return NULL;
15036
15037 if (!TARGET_FLOAT128_CVT)
15038 {
15039 if ((FLOAT128_IEEE_P (mode1) && FLOAT128_IBM_P (mode2))
15040 || (FLOAT128_IBM_P (mode1) && FLOAT128_IEEE_P (mode2)))
15041 return N_("Invalid mixing of IEEE 128-bit and IBM 128-bit floating "
15042 "point types");
15043 }
15044
15045 return NULL;
15046 }
15047
15048 \f
15049 /* Expand floating point conversion to/from __float128 and __ibm128. */
15050
15051 void
15052 rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
15053 {
15054 machine_mode dest_mode = GET_MODE (dest);
15055 machine_mode src_mode = GET_MODE (src);
15056 convert_optab cvt = unknown_optab;
15057 bool do_move = false;
15058 rtx libfunc = NULL_RTX;
15059 rtx dest2;
15060 typedef rtx (*rtx_2func_t) (rtx, rtx);
15061 rtx_2func_t hw_convert = (rtx_2func_t)0;
15062 size_t kf_or_tf;
15063
15064 struct hw_conv_t {
15065 rtx_2func_t from_df;
15066 rtx_2func_t from_sf;
15067 rtx_2func_t from_si_sign;
15068 rtx_2func_t from_si_uns;
15069 rtx_2func_t from_di_sign;
15070 rtx_2func_t from_di_uns;
15071 rtx_2func_t to_df;
15072 rtx_2func_t to_sf;
15073 rtx_2func_t to_si_sign;
15074 rtx_2func_t to_si_uns;
15075 rtx_2func_t to_di_sign;
15076 rtx_2func_t to_di_uns;
15077 } hw_conversions[2] = {
15078 /* convertions to/from KFmode */
15079 {
15080 gen_extenddfkf2_hw, /* KFmode <- DFmode. */
15081 gen_extendsfkf2_hw, /* KFmode <- SFmode. */
15082 gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */
15083 gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */
15084 gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */
15085 gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */
15086 gen_trunckfdf2_hw, /* DFmode <- KFmode. */
15087 gen_trunckfsf2_hw, /* SFmode <- KFmode. */
15088 gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */
15089 gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */
15090 gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */
15091 gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */
15092 },
15093
15094 /* convertions to/from TFmode */
15095 {
15096 gen_extenddftf2_hw, /* TFmode <- DFmode. */
15097 gen_extendsftf2_hw, /* TFmode <- SFmode. */
15098 gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */
15099 gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */
15100 gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */
15101 gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */
15102 gen_trunctfdf2_hw, /* DFmode <- TFmode. */
15103 gen_trunctfsf2_hw, /* SFmode <- TFmode. */
15104 gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */
15105 gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */
15106 gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */
15107 gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */
15108 },
15109 };
15110
15111 if (dest_mode == src_mode)
15112 gcc_unreachable ();
15113
15114 /* Eliminate memory operations. */
15115 if (MEM_P (src))
15116 src = force_reg (src_mode, src);
15117
15118 if (MEM_P (dest))
15119 {
15120 rtx tmp = gen_reg_rtx (dest_mode);
15121 rs6000_expand_float128_convert (tmp, src, unsigned_p);
15122 rs6000_emit_move (dest, tmp, dest_mode);
15123 return;
15124 }
15125
15126 /* Convert to IEEE 128-bit floating point. */
15127 if (FLOAT128_IEEE_P (dest_mode))
15128 {
15129 if (dest_mode == KFmode)
15130 kf_or_tf = 0;
15131 else if (dest_mode == TFmode)
15132 kf_or_tf = 1;
15133 else
15134 gcc_unreachable ();
15135
15136 switch (src_mode)
15137 {
15138 case E_DFmode:
15139 cvt = sext_optab;
15140 hw_convert = hw_conversions[kf_or_tf].from_df;
15141 break;
15142
15143 case E_SFmode:
15144 cvt = sext_optab;
15145 hw_convert = hw_conversions[kf_or_tf].from_sf;
15146 break;
15147
15148 case E_KFmode:
15149 case E_IFmode:
15150 case E_TFmode:
15151 if (FLOAT128_IBM_P (src_mode))
15152 cvt = sext_optab;
15153 else
15154 do_move = true;
15155 break;
15156
15157 case E_SImode:
15158 if (unsigned_p)
15159 {
15160 cvt = ufloat_optab;
15161 hw_convert = hw_conversions[kf_or_tf].from_si_uns;
15162 }
15163 else
15164 {
15165 cvt = sfloat_optab;
15166 hw_convert = hw_conversions[kf_or_tf].from_si_sign;
15167 }
15168 break;
15169
15170 case E_DImode:
15171 if (unsigned_p)
15172 {
15173 cvt = ufloat_optab;
15174 hw_convert = hw_conversions[kf_or_tf].from_di_uns;
15175 }
15176 else
15177 {
15178 cvt = sfloat_optab;
15179 hw_convert = hw_conversions[kf_or_tf].from_di_sign;
15180 }
15181 break;
15182
15183 default:
15184 gcc_unreachable ();
15185 }
15186 }
15187
15188 /* Convert from IEEE 128-bit floating point. */
15189 else if (FLOAT128_IEEE_P (src_mode))
15190 {
15191 if (src_mode == KFmode)
15192 kf_or_tf = 0;
15193 else if (src_mode == TFmode)
15194 kf_or_tf = 1;
15195 else
15196 gcc_unreachable ();
15197
15198 switch (dest_mode)
15199 {
15200 case E_DFmode:
15201 cvt = trunc_optab;
15202 hw_convert = hw_conversions[kf_or_tf].to_df;
15203 break;
15204
15205 case E_SFmode:
15206 cvt = trunc_optab;
15207 hw_convert = hw_conversions[kf_or_tf].to_sf;
15208 break;
15209
15210 case E_KFmode:
15211 case E_IFmode:
15212 case E_TFmode:
15213 if (FLOAT128_IBM_P (dest_mode))
15214 cvt = trunc_optab;
15215 else
15216 do_move = true;
15217 break;
15218
15219 case E_SImode:
15220 if (unsigned_p)
15221 {
15222 cvt = ufix_optab;
15223 hw_convert = hw_conversions[kf_or_tf].to_si_uns;
15224 }
15225 else
15226 {
15227 cvt = sfix_optab;
15228 hw_convert = hw_conversions[kf_or_tf].to_si_sign;
15229 }
15230 break;
15231
15232 case E_DImode:
15233 if (unsigned_p)
15234 {
15235 cvt = ufix_optab;
15236 hw_convert = hw_conversions[kf_or_tf].to_di_uns;
15237 }
15238 else
15239 {
15240 cvt = sfix_optab;
15241 hw_convert = hw_conversions[kf_or_tf].to_di_sign;
15242 }
15243 break;
15244
15245 default:
15246 gcc_unreachable ();
15247 }
15248 }
15249
15250 /* Both IBM format. */
15251 else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode))
15252 do_move = true;
15253
15254 else
15255 gcc_unreachable ();
15256
15257 /* Handle conversion between TFmode/KFmode/IFmode. */
15258 if (do_move)
15259 emit_insn (gen_rtx_SET (dest, gen_rtx_FLOAT_EXTEND (dest_mode, src)));
15260
15261 /* Handle conversion if we have hardware support. */
15262 else if (TARGET_FLOAT128_HW && hw_convert)
15263 emit_insn ((hw_convert) (dest, src));
15264
15265 /* Call an external function to do the conversion. */
15266 else if (cvt != unknown_optab)
15267 {
15268 libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
15269 gcc_assert (libfunc != NULL_RTX);
15270
15271 dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode,
15272 src, src_mode);
15273
15274 gcc_assert (dest2 != NULL_RTX);
15275 if (!rtx_equal_p (dest, dest2))
15276 emit_move_insn (dest, dest2);
15277 }
15278
15279 else
15280 gcc_unreachable ();
15281
15282 return;
15283 }
15284
15285 \f
15286 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
15287 can be used as that dest register. Return the dest register. */
15288
15289 rtx
15290 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
15291 {
15292 if (op2 == const0_rtx)
15293 return op1;
15294
15295 if (GET_CODE (scratch) == SCRATCH)
15296 scratch = gen_reg_rtx (mode);
15297
15298 if (logical_operand (op2, mode))
15299 emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
15300 else
15301 emit_insn (gen_rtx_SET (scratch,
15302 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
15303
15304 return scratch;
15305 }
15306
15307 /* Emit code doing a cror of two CR bits, for FP comparisons with a CODE that
15308 requires this. The result is mode MODE. */
15309 rtx
15310 rs6000_emit_fp_cror (rtx_code code, machine_mode mode, rtx x)
15311 {
15312 rtx cond[2];
15313 int n = 0;
15314 if (code == LTGT || code == LE || code == UNLT)
15315 cond[n++] = gen_rtx_fmt_ee (LT, mode, x, const0_rtx);
15316 if (code == LTGT || code == GE || code == UNGT)
15317 cond[n++] = gen_rtx_fmt_ee (GT, mode, x, const0_rtx);
15318 if (code == LE || code == GE || code == UNEQ)
15319 cond[n++] = gen_rtx_fmt_ee (EQ, mode, x, const0_rtx);
15320 if (code == UNLT || code == UNGT || code == UNEQ)
15321 cond[n++] = gen_rtx_fmt_ee (UNORDERED, mode, x, const0_rtx);
15322
15323 gcc_assert (n == 2);
15324
15325 rtx cc = gen_reg_rtx (CCEQmode);
15326 rtx logical = gen_rtx_IOR (mode, cond[0], cond[1]);
15327 emit_insn (gen_cceq_ior_compare (mode, cc, logical, cond[0], x, cond[1], x));
15328
15329 return cc;
15330 }
15331
15332 void
15333 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
15334 {
15335 rtx condition_rtx = rs6000_generate_compare (operands[1], mode);
15336 rtx_code cond_code = GET_CODE (condition_rtx);
15337
15338 if (FLOAT_MODE_P (mode) && HONOR_NANS (mode)
15339 && !(FLOAT128_VECTOR_P (mode) && !TARGET_FLOAT128_HW))
15340 ;
15341 else if (cond_code == NE
15342 || cond_code == GE || cond_code == LE
15343 || cond_code == GEU || cond_code == LEU
15344 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
15345 {
15346 rtx not_result = gen_reg_rtx (CCEQmode);
15347 rtx not_op, rev_cond_rtx;
15348 machine_mode cc_mode;
15349
15350 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
15351
15352 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
15353 SImode, XEXP (condition_rtx, 0), const0_rtx);
15354 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
15355 emit_insn (gen_rtx_SET (not_result, not_op));
15356 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
15357 }
15358
15359 machine_mode op_mode = GET_MODE (XEXP (operands[1], 0));
15360 if (op_mode == VOIDmode)
15361 op_mode = GET_MODE (XEXP (operands[1], 1));
15362
15363 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
15364 {
15365 PUT_MODE (condition_rtx, DImode);
15366 convert_move (operands[0], condition_rtx, 0);
15367 }
15368 else
15369 {
15370 PUT_MODE (condition_rtx, SImode);
15371 emit_insn (gen_rtx_SET (operands[0], condition_rtx));
15372 }
15373 }
15374
15375 /* Emit a branch of kind CODE to location LOC. */
15376
15377 void
15378 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
15379 {
15380 rtx condition_rtx = rs6000_generate_compare (operands[0], mode);
15381 rtx loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
15382 rtx ite = gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx, loc_ref, pc_rtx);
15383 emit_jump_insn (gen_rtx_SET (pc_rtx, ite));
15384 }
15385
15386 /* Return the string to output a conditional branch to LABEL, which is
15387 the operand template of the label, or NULL if the branch is really a
15388 conditional return.
15389
15390 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
15391 condition code register and its mode specifies what kind of
15392 comparison we made.
15393
15394 REVERSED is nonzero if we should reverse the sense of the comparison.
15395
15396 INSN is the insn. */
15397
15398 char *
15399 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
15400 {
15401 static char string[64];
15402 enum rtx_code code = GET_CODE (op);
15403 rtx cc_reg = XEXP (op, 0);
15404 machine_mode mode = GET_MODE (cc_reg);
15405 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
15406 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
15407 int really_reversed = reversed ^ need_longbranch;
15408 char *s = string;
15409 const char *ccode;
15410 const char *pred;
15411 rtx note;
15412
15413 validate_condition_mode (code, mode);
15414
15415 /* Work out which way this really branches. We could use
15416 reverse_condition_maybe_unordered here always but this
15417 makes the resulting assembler clearer. */
15418 if (really_reversed)
15419 {
15420 /* Reversal of FP compares takes care -- an ordered compare
15421 becomes an unordered compare and vice versa. */
15422 if (mode == CCFPmode)
15423 code = reverse_condition_maybe_unordered (code);
15424 else
15425 code = reverse_condition (code);
15426 }
15427
15428 switch (code)
15429 {
15430 /* Not all of these are actually distinct opcodes, but
15431 we distinguish them for clarity of the resulting assembler. */
15432 case NE: case LTGT:
15433 ccode = "ne"; break;
15434 case EQ: case UNEQ:
15435 ccode = "eq"; break;
15436 case GE: case GEU:
15437 ccode = "ge"; break;
15438 case GT: case GTU: case UNGT:
15439 ccode = "gt"; break;
15440 case LE: case LEU:
15441 ccode = "le"; break;
15442 case LT: case LTU: case UNLT:
15443 ccode = "lt"; break;
15444 case UNORDERED: ccode = "un"; break;
15445 case ORDERED: ccode = "nu"; break;
15446 case UNGE: ccode = "nl"; break;
15447 case UNLE: ccode = "ng"; break;
15448 default:
15449 gcc_unreachable ();
15450 }
15451
15452 /* Maybe we have a guess as to how likely the branch is. */
15453 pred = "";
15454 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
15455 if (note != NULL_RTX)
15456 {
15457 /* PROB is the difference from 50%. */
15458 int prob = profile_probability::from_reg_br_prob_note (XINT (note, 0))
15459 .to_reg_br_prob_base () - REG_BR_PROB_BASE / 2;
15460
15461 /* Only hint for highly probable/improbable branches on newer cpus when
15462 we have real profile data, as static prediction overrides processor
15463 dynamic prediction. For older cpus we may as well always hint, but
15464 assume not taken for branches that are very close to 50% as a
15465 mispredicted taken branch is more expensive than a
15466 mispredicted not-taken branch. */
15467 if (rs6000_always_hint
15468 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
15469 && (profile_status_for_fn (cfun) != PROFILE_GUESSED)
15470 && br_prob_note_reliable_p (note)))
15471 {
15472 if (abs (prob) > REG_BR_PROB_BASE / 20
15473 && ((prob > 0) ^ need_longbranch))
15474 pred = "+";
15475 else
15476 pred = "-";
15477 }
15478 }
15479
15480 if (label == NULL)
15481 s += sprintf (s, "b%slr%s ", ccode, pred);
15482 else
15483 s += sprintf (s, "b%s%s ", ccode, pred);
15484
15485 /* We need to escape any '%' characters in the reg_names string.
15486 Assume they'd only be the first character.... */
15487 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
15488 *s++ = '%';
15489 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
15490
15491 if (label != NULL)
15492 {
15493 /* If the branch distance was too far, we may have to use an
15494 unconditional branch to go the distance. */
15495 if (need_longbranch)
15496 s += sprintf (s, ",$+8\n\tb %s", label);
15497 else
15498 s += sprintf (s, ",%s", label);
15499 }
15500
15501 return string;
15502 }
15503
15504 /* Return insn for VSX or Altivec comparisons. */
15505
15506 static rtx
15507 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
15508 {
15509 rtx mask;
15510 machine_mode mode = GET_MODE (op0);
15511
15512 switch (code)
15513 {
15514 default:
15515 break;
15516
15517 case GE:
15518 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15519 return NULL_RTX;
15520 /* FALLTHRU */
15521
15522 case EQ:
15523 case GT:
15524 case GTU:
15525 case ORDERED:
15526 case UNORDERED:
15527 case UNEQ:
15528 case LTGT:
15529 mask = gen_reg_rtx (mode);
15530 emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
15531 return mask;
15532 }
15533
15534 return NULL_RTX;
15535 }
15536
15537 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
15538 DMODE is expected destination mode. This is a recursive function. */
15539
15540 static rtx
15541 rs6000_emit_vector_compare (enum rtx_code rcode,
15542 rtx op0, rtx op1,
15543 machine_mode dmode)
15544 {
15545 rtx mask;
15546 bool swap_operands = false;
15547 bool try_again = false;
15548
15549 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
15550 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
15551
15552 /* See if the comparison works as is. */
15553 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
15554 if (mask)
15555 return mask;
15556
15557 switch (rcode)
15558 {
15559 case LT:
15560 rcode = GT;
15561 swap_operands = true;
15562 try_again = true;
15563 break;
15564 case LTU:
15565 rcode = GTU;
15566 swap_operands = true;
15567 try_again = true;
15568 break;
15569 case NE:
15570 case UNLE:
15571 case UNLT:
15572 case UNGE:
15573 case UNGT:
15574 /* Invert condition and try again.
15575 e.g., A != B becomes ~(A==B). */
15576 {
15577 enum rtx_code rev_code;
15578 enum insn_code nor_code;
15579 rtx mask2;
15580
15581 rev_code = reverse_condition_maybe_unordered (rcode);
15582 if (rev_code == UNKNOWN)
15583 return NULL_RTX;
15584
15585 nor_code = optab_handler (one_cmpl_optab, dmode);
15586 if (nor_code == CODE_FOR_nothing)
15587 return NULL_RTX;
15588
15589 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
15590 if (!mask2)
15591 return NULL_RTX;
15592
15593 mask = gen_reg_rtx (dmode);
15594 emit_insn (GEN_FCN (nor_code) (mask, mask2));
15595 return mask;
15596 }
15597 break;
15598 case GE:
15599 case GEU:
15600 case LE:
15601 case LEU:
15602 /* Try GT/GTU/LT/LTU OR EQ */
15603 {
15604 rtx c_rtx, eq_rtx;
15605 enum insn_code ior_code;
15606 enum rtx_code new_code;
15607
15608 switch (rcode)
15609 {
15610 case GE:
15611 new_code = GT;
15612 break;
15613
15614 case GEU:
15615 new_code = GTU;
15616 break;
15617
15618 case LE:
15619 new_code = LT;
15620 break;
15621
15622 case LEU:
15623 new_code = LTU;
15624 break;
15625
15626 default:
15627 gcc_unreachable ();
15628 }
15629
15630 ior_code = optab_handler (ior_optab, dmode);
15631 if (ior_code == CODE_FOR_nothing)
15632 return NULL_RTX;
15633
15634 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
15635 if (!c_rtx)
15636 return NULL_RTX;
15637
15638 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
15639 if (!eq_rtx)
15640 return NULL_RTX;
15641
15642 mask = gen_reg_rtx (dmode);
15643 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
15644 return mask;
15645 }
15646 break;
15647 default:
15648 return NULL_RTX;
15649 }
15650
15651 if (try_again)
15652 {
15653 if (swap_operands)
15654 std::swap (op0, op1);
15655
15656 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
15657 if (mask)
15658 return mask;
15659 }
15660
15661 /* You only get two chances. */
15662 return NULL_RTX;
15663 }
15664
15665 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
15666 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
15667 operands for the relation operation COND. */
15668
15669 int
15670 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
15671 rtx cond, rtx cc_op0, rtx cc_op1)
15672 {
15673 machine_mode dest_mode = GET_MODE (dest);
15674 machine_mode mask_mode = GET_MODE (cc_op0);
15675 enum rtx_code rcode = GET_CODE (cond);
15676 rtx mask;
15677 bool invert_move = false;
15678
15679 if (VECTOR_UNIT_NONE_P (dest_mode))
15680 return 0;
15681
15682 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
15683 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
15684
15685 switch (rcode)
15686 {
15687 /* Swap operands if we can, and fall back to doing the operation as
15688 specified, and doing a NOR to invert the test. */
15689 case NE:
15690 case UNLE:
15691 case UNLT:
15692 case UNGE:
15693 case UNGT:
15694 /* Invert condition and try again.
15695 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
15696 invert_move = true;
15697 rcode = reverse_condition_maybe_unordered (rcode);
15698 if (rcode == UNKNOWN)
15699 return 0;
15700 break;
15701
15702 case GE:
15703 case LE:
15704 if (GET_MODE_CLASS (mask_mode) == MODE_VECTOR_INT)
15705 {
15706 /* Invert condition to avoid compound test. */
15707 invert_move = true;
15708 rcode = reverse_condition (rcode);
15709 }
15710 break;
15711
15712 case GTU:
15713 case GEU:
15714 case LTU:
15715 case LEU:
15716
15717 /* Invert condition to avoid compound test if necessary. */
15718 if (rcode == GEU || rcode == LEU)
15719 {
15720 invert_move = true;
15721 rcode = reverse_condition (rcode);
15722 }
15723 break;
15724
15725 default:
15726 break;
15727 }
15728
15729 /* Get the vector mask for the given relational operations. */
15730 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
15731
15732 if (!mask)
15733 return 0;
15734
15735 if (mask_mode != dest_mode)
15736 mask = simplify_gen_subreg (dest_mode, mask, mask_mode, 0);
15737
15738 if (invert_move)
15739 std::swap (op_true, op_false);
15740
15741 /* Optimize vec1 == vec2, to know the mask generates -1/0. */
15742 if (GET_MODE_CLASS (dest_mode) == MODE_VECTOR_INT
15743 && (GET_CODE (op_true) == CONST_VECTOR
15744 || GET_CODE (op_false) == CONST_VECTOR))
15745 {
15746 rtx constant_0 = CONST0_RTX (dest_mode);
15747 rtx constant_m1 = CONSTM1_RTX (dest_mode);
15748
15749 if (op_true == constant_m1 && op_false == constant_0)
15750 {
15751 emit_move_insn (dest, mask);
15752 return 1;
15753 }
15754
15755 else if (op_true == constant_0 && op_false == constant_m1)
15756 {
15757 emit_insn (gen_rtx_SET (dest, gen_rtx_NOT (dest_mode, mask)));
15758 return 1;
15759 }
15760
15761 /* If we can't use the vector comparison directly, perhaps we can use
15762 the mask for the true or false fields, instead of loading up a
15763 constant. */
15764 if (op_true == constant_m1)
15765 op_true = mask;
15766
15767 if (op_false == constant_0)
15768 op_false = mask;
15769 }
15770
15771 if (!REG_P (op_true) && !SUBREG_P (op_true))
15772 op_true = force_reg (dest_mode, op_true);
15773
15774 if (!REG_P (op_false) && !SUBREG_P (op_false))
15775 op_false = force_reg (dest_mode, op_false);
15776
15777 rtx tmp = gen_rtx_IOR (dest_mode,
15778 gen_rtx_AND (dest_mode, gen_rtx_NOT (dest_mode, mask),
15779 op_false),
15780 gen_rtx_AND (dest_mode, mask, op_true));
15781 emit_insn (gen_rtx_SET (dest, tmp));
15782 return 1;
15783 }
15784
15785 /* Possibly emit the xsmaxc{dp,qp} and xsminc{dp,qp} instructions to emit a
15786 maximum or minimum with "C" semantics.
15787
15788 Unless you use -ffast-math, you can't use these instructions to replace
15789 conditions that implicitly reverse the condition because the comparison
15790 might generate a NaN or signed zer0.
15791
15792 I.e. the following can be replaced all of the time
15793 ret = (op1 > op2) ? op1 : op2 ; generate xsmaxcdp
15794 ret = (op1 >= op2) ? op1 : op2 ; generate xsmaxcdp
15795 ret = (op1 < op2) ? op1 : op2; ; generate xsmincdp
15796 ret = (op1 <= op2) ? op1 : op2; ; generate xsmincdp
15797
15798 The following can be replaced only if -ffast-math is used:
15799 ret = (op1 < op2) ? op2 : op1 ; generate xsmaxcdp
15800 ret = (op1 <= op2) ? op2 : op1 ; generate xsmaxcdp
15801 ret = (op1 > op2) ? op2 : op1; ; generate xsmincdp
15802 ret = (op1 >= op2) ? op2 : op1; ; generate xsmincdp
15803
15804 Move TRUE_COND to DEST if OP of the operands of the last comparison is
15805 nonzero/true, FALSE_COND if it is zero/false.
15806
15807 Return false if we can't generate the appropriate minimum or maximum, and
15808 true if we can did the minimum or maximum. */
15809
15810 static bool
15811 rs6000_maybe_emit_maxc_minc (rtx dest, rtx op, rtx true_cond, rtx false_cond)
15812 {
15813 enum rtx_code code = GET_CODE (op);
15814 rtx op0 = XEXP (op, 0);
15815 rtx op1 = XEXP (op, 1);
15816 machine_mode compare_mode = GET_MODE (op0);
15817 machine_mode result_mode = GET_MODE (dest);
15818 bool max_p = false;
15819
15820 if (result_mode != compare_mode)
15821 return false;
15822
15823 if (code == GE || code == GT)
15824 max_p = true;
15825 else if (code == LE || code == LT)
15826 max_p = false;
15827 else
15828 return false;
15829
15830 if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond))
15831 ;
15832
15833 /* Only when NaNs and signed-zeros are not in effect, smax could be
15834 used for `op0 < op1 ? op1 : op0`, and smin could be used for
15835 `op0 > op1 ? op1 : op0`. */
15836 else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond)
15837 && !HONOR_NANS (compare_mode) && !HONOR_SIGNED_ZEROS (compare_mode))
15838 max_p = !max_p;
15839
15840 else
15841 return false;
15842
15843 rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1);
15844 return true;
15845 }
15846
15847 /* Possibly emit a floating point conditional move by generating a compare that
15848 sets a mask instruction and a XXSEL select instruction.
15849
15850 Move TRUE_COND to DEST if OP of the operands of the last comparison is
15851 nonzero/true, FALSE_COND if it is zero/false.
15852
15853 Return false if the operation cannot be generated, and true if we could
15854 generate the instruction. */
15855
15856 static bool
15857 rs6000_maybe_emit_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
15858 {
15859 enum rtx_code code = GET_CODE (op);
15860 rtx op0 = XEXP (op, 0);
15861 rtx op1 = XEXP (op, 1);
15862 machine_mode compare_mode = GET_MODE (op0);
15863 machine_mode result_mode = GET_MODE (dest);
15864 rtx compare_rtx;
15865 rtx cmove_rtx;
15866 rtx clobber_rtx;
15867
15868 if (!can_create_pseudo_p ())
15869 return 0;
15870
15871 /* We allow the comparison to be either SFmode/DFmode and the true/false
15872 condition to be either SFmode/DFmode. I.e. we allow:
15873
15874 float a, b;
15875 double c, d, r;
15876
15877 r = (a == b) ? c : d;
15878
15879 and:
15880
15881 double a, b;
15882 float c, d, r;
15883
15884 r = (a == b) ? c : d;
15885
15886 but we don't allow intermixing the IEEE 128-bit floating point types with
15887 the 32/64-bit scalar types. */
15888
15889 if (!(compare_mode == result_mode
15890 || (compare_mode == SFmode && result_mode == DFmode)
15891 || (compare_mode == DFmode && result_mode == SFmode)))
15892 return false;
15893
15894 switch (code)
15895 {
15896 case EQ:
15897 case GE:
15898 case GT:
15899 break;
15900
15901 case NE:
15902 case LT:
15903 case LE:
15904 code = swap_condition (code);
15905 std::swap (op0, op1);
15906 break;
15907
15908 default:
15909 return false;
15910 }
15911
15912 /* Generate: [(parallel [(set (dest)
15913 (if_then_else (op (cmp1) (cmp2))
15914 (true)
15915 (false)))
15916 (clobber (scratch))])]. */
15917
15918 compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1);
15919 cmove_rtx = gen_rtx_SET (dest,
15920 gen_rtx_IF_THEN_ELSE (result_mode,
15921 compare_rtx,
15922 true_cond,
15923 false_cond));
15924
15925 clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode));
15926 emit_insn (gen_rtx_PARALLEL (VOIDmode,
15927 gen_rtvec (2, cmove_rtx, clobber_rtx)));
15928
15929 return true;
15930 }
15931
15932 /* Helper function to return true if the target has instructions to do a
15933 compare and set mask instruction that can be used with XXSEL to implement a
15934 conditional move. It is also assumed that such a target also supports the
15935 "C" minimum and maximum instructions. */
15936
15937 static bool
15938 have_compare_and_set_mask (machine_mode mode)
15939 {
15940 switch (mode)
15941 {
15942 case E_SFmode:
15943 case E_DFmode:
15944 return TARGET_P9_MINMAX;
15945
15946 case E_KFmode:
15947 case E_TFmode:
15948 return TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode);
15949
15950 default:
15951 break;
15952 }
15953
15954 return false;
15955 }
15956
15957 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
15958 operands of the last comparison is nonzero/true, FALSE_COND if it
15959 is zero/false. Return 0 if the hardware has no such operation. */
15960
15961 bool
15962 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
15963 {
15964 enum rtx_code code = GET_CODE (op);
15965 rtx op0 = XEXP (op, 0);
15966 rtx op1 = XEXP (op, 1);
15967 machine_mode compare_mode = GET_MODE (op0);
15968 machine_mode result_mode = GET_MODE (dest);
15969 rtx temp;
15970 bool is_against_zero;
15971
15972 /* These modes should always match. */
15973 if (GET_MODE (op1) != compare_mode
15974 /* In the isel case however, we can use a compare immediate, so
15975 op1 may be a small constant. */
15976 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
15977 return false;
15978 if (GET_MODE (true_cond) != result_mode)
15979 return false;
15980 if (GET_MODE (false_cond) != result_mode)
15981 return false;
15982
15983 /* See if we can use the "C" minimum, "C" maximum, and compare and set mask
15984 instructions. */
15985 if (have_compare_and_set_mask (compare_mode)
15986 && have_compare_and_set_mask (result_mode))
15987 {
15988 if (rs6000_maybe_emit_maxc_minc (dest, op, true_cond, false_cond))
15989 return true;
15990
15991 if (rs6000_maybe_emit_fp_cmove (dest, op, true_cond, false_cond))
15992 return true;
15993 }
15994
15995 /* Don't allow using floating point comparisons for integer results for
15996 now. */
15997 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
15998 return false;
15999
16000 /* First, work out if the hardware can do this at all, or
16001 if it's too slow.... */
16002 if (!FLOAT_MODE_P (compare_mode))
16003 {
16004 if (TARGET_ISEL)
16005 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
16006 return false;
16007 }
16008
16009 is_against_zero = op1 == CONST0_RTX (compare_mode);
16010
16011 /* A floating-point subtract might overflow, underflow, or produce
16012 an inexact result, thus changing the floating-point flags, so it
16013 can't be generated if we care about that. It's safe if one side
16014 of the construct is zero, since then no subtract will be
16015 generated. */
16016 if (SCALAR_FLOAT_MODE_P (compare_mode)
16017 && flag_trapping_math && ! is_against_zero)
16018 return false;
16019
16020 /* Eliminate half of the comparisons by switching operands, this
16021 makes the remaining code simpler. */
16022 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
16023 || code == LTGT || code == LT || code == UNLE)
16024 {
16025 code = reverse_condition_maybe_unordered (code);
16026 temp = true_cond;
16027 true_cond = false_cond;
16028 false_cond = temp;
16029 }
16030
16031 /* UNEQ and LTGT take four instructions for a comparison with zero,
16032 it'll probably be faster to use a branch here too. */
16033 if (code == UNEQ && HONOR_NANS (compare_mode))
16034 return false;
16035
16036 /* We're going to try to implement comparisons by performing
16037 a subtract, then comparing against zero. Unfortunately,
16038 Inf - Inf is NaN which is not zero, and so if we don't
16039 know that the operand is finite and the comparison
16040 would treat EQ different to UNORDERED, we can't do it. */
16041 if (HONOR_INFINITIES (compare_mode)
16042 && code != GT && code != UNGE
16043 && (!CONST_DOUBLE_P (op1)
16044 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1)))
16045 /* Constructs of the form (a OP b ? a : b) are safe. */
16046 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
16047 || (! rtx_equal_p (op0, true_cond)
16048 && ! rtx_equal_p (op1, true_cond))))
16049 return false;
16050
16051 /* At this point we know we can use fsel. */
16052
16053 /* Don't allow compare_mode other than SFmode or DFmode, for others there
16054 is no fsel instruction. */
16055 if (compare_mode != SFmode && compare_mode != DFmode)
16056 return false;
16057
16058 /* Reduce the comparison to a comparison against zero. */
16059 if (! is_against_zero)
16060 {
16061 temp = gen_reg_rtx (compare_mode);
16062 emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
16063 op0 = temp;
16064 op1 = CONST0_RTX (compare_mode);
16065 }
16066
16067 /* If we don't care about NaNs we can reduce some of the comparisons
16068 down to faster ones. */
16069 if (! HONOR_NANS (compare_mode))
16070 switch (code)
16071 {
16072 case GT:
16073 code = LE;
16074 temp = true_cond;
16075 true_cond = false_cond;
16076 false_cond = temp;
16077 break;
16078 case UNGE:
16079 code = GE;
16080 break;
16081 case UNEQ:
16082 code = EQ;
16083 break;
16084 default:
16085 break;
16086 }
16087
16088 /* Now, reduce everything down to a GE. */
16089 switch (code)
16090 {
16091 case GE:
16092 break;
16093
16094 case LE:
16095 temp = gen_reg_rtx (compare_mode);
16096 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
16097 op0 = temp;
16098 break;
16099
16100 case ORDERED:
16101 temp = gen_reg_rtx (compare_mode);
16102 emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
16103 op0 = temp;
16104 break;
16105
16106 case EQ:
16107 temp = gen_reg_rtx (compare_mode);
16108 emit_insn (gen_rtx_SET (temp,
16109 gen_rtx_NEG (compare_mode,
16110 gen_rtx_ABS (compare_mode, op0))));
16111 op0 = temp;
16112 break;
16113
16114 case UNGE:
16115 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
16116 temp = gen_reg_rtx (result_mode);
16117 emit_insn (gen_rtx_SET (temp,
16118 gen_rtx_IF_THEN_ELSE (result_mode,
16119 gen_rtx_GE (VOIDmode,
16120 op0, op1),
16121 true_cond, false_cond)));
16122 false_cond = true_cond;
16123 true_cond = temp;
16124
16125 temp = gen_reg_rtx (compare_mode);
16126 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
16127 op0 = temp;
16128 break;
16129
16130 case GT:
16131 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
16132 temp = gen_reg_rtx (result_mode);
16133 emit_insn (gen_rtx_SET (temp,
16134 gen_rtx_IF_THEN_ELSE (result_mode,
16135 gen_rtx_GE (VOIDmode,
16136 op0, op1),
16137 true_cond, false_cond)));
16138 true_cond = false_cond;
16139 false_cond = temp;
16140
16141 temp = gen_reg_rtx (compare_mode);
16142 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
16143 op0 = temp;
16144 break;
16145
16146 default:
16147 gcc_unreachable ();
16148 }
16149
16150 emit_insn (gen_rtx_SET (dest,
16151 gen_rtx_IF_THEN_ELSE (result_mode,
16152 gen_rtx_GE (VOIDmode,
16153 op0, op1),
16154 true_cond, false_cond)));
16155 return true;
16156 }
16157
16158 /* Same as above, but for ints (isel). */
16159
16160 bool
16161 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
16162 {
16163 rtx condition_rtx, cr;
16164 machine_mode mode = GET_MODE (dest);
16165 enum rtx_code cond_code;
16166 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
16167 bool signedp;
16168
16169 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
16170 return false;
16171
16172 /* We still have to do the compare, because isel doesn't do a
16173 compare, it just looks at the CRx bits set by a previous compare
16174 instruction. */
16175 condition_rtx = rs6000_generate_compare (op, mode);
16176 cond_code = GET_CODE (condition_rtx);
16177 cr = XEXP (condition_rtx, 0);
16178 signedp = GET_MODE (cr) == CCmode;
16179
16180 isel_func = (mode == SImode
16181 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
16182 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
16183
16184 switch (cond_code)
16185 {
16186 case LT: case GT: case LTU: case GTU: case EQ:
16187 /* isel handles these directly. */
16188 break;
16189
16190 default:
16191 /* We need to swap the sense of the comparison. */
16192 {
16193 std::swap (false_cond, true_cond);
16194 PUT_CODE (condition_rtx, reverse_condition (cond_code));
16195 }
16196 break;
16197 }
16198
16199 false_cond = force_reg (mode, false_cond);
16200 if (true_cond != const0_rtx)
16201 true_cond = force_reg (mode, true_cond);
16202
16203 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
16204
16205 return true;
16206 }
16207
16208 void
16209 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
16210 {
16211 machine_mode mode = GET_MODE (op0);
16212 enum rtx_code c;
16213 rtx target;
16214
16215 /* VSX/altivec have direct min/max insns. */
16216 if ((code == SMAX || code == SMIN)
16217 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
16218 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))
16219 || (TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode))))
16220 {
16221 emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
16222 return;
16223 }
16224
16225 if (code == SMAX || code == SMIN)
16226 c = GE;
16227 else
16228 c = GEU;
16229
16230 if (code == SMAX || code == UMAX)
16231 target = emit_conditional_move (dest, { c, op0, op1, mode },
16232 op0, op1, mode, 0);
16233 else
16234 target = emit_conditional_move (dest, { c, op0, op1, mode },
16235 op1, op0, mode, 0);
16236 gcc_assert (target);
16237 if (target != dest)
16238 emit_move_insn (dest, target);
16239 }
16240
16241 /* A subroutine of the atomic operation splitters. Jump to LABEL if
16242 COND is true. Mark the jump as unlikely to be taken. */
16243
16244 static void
16245 emit_unlikely_jump (rtx cond, rtx label)
16246 {
16247 rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
16248 rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
16249 add_reg_br_prob_note (insn, profile_probability::very_unlikely ());
16250 }
16251
16252 /* A subroutine of the atomic operation splitters. Emit a load-locked
16253 instruction in MODE. For QI/HImode, possibly use a pattern than includes
16254 the zero_extend operation. */
16255
16256 static void
16257 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
16258 {
16259 rtx (*fn) (rtx, rtx) = NULL;
16260
16261 switch (mode)
16262 {
16263 case E_QImode:
16264 fn = gen_load_lockedqi;
16265 break;
16266 case E_HImode:
16267 fn = gen_load_lockedhi;
16268 break;
16269 case E_SImode:
16270 if (GET_MODE (mem) == QImode)
16271 fn = gen_load_lockedqi_si;
16272 else if (GET_MODE (mem) == HImode)
16273 fn = gen_load_lockedhi_si;
16274 else
16275 fn = gen_load_lockedsi;
16276 break;
16277 case E_DImode:
16278 fn = gen_load_lockeddi;
16279 break;
16280 case E_TImode:
16281 fn = gen_load_lockedti;
16282 break;
16283 default:
16284 gcc_unreachable ();
16285 }
16286 emit_insn (fn (reg, mem));
16287 }
16288
16289 /* A subroutine of the atomic operation splitters. Emit a store-conditional
16290 instruction in MODE. */
16291
16292 static void
16293 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
16294 {
16295 rtx (*fn) (rtx, rtx, rtx) = NULL;
16296
16297 switch (mode)
16298 {
16299 case E_QImode:
16300 fn = gen_store_conditionalqi;
16301 break;
16302 case E_HImode:
16303 fn = gen_store_conditionalhi;
16304 break;
16305 case E_SImode:
16306 fn = gen_store_conditionalsi;
16307 break;
16308 case E_DImode:
16309 fn = gen_store_conditionaldi;
16310 break;
16311 case E_TImode:
16312 fn = gen_store_conditionalti;
16313 break;
16314 default:
16315 gcc_unreachable ();
16316 }
16317
16318 /* Emit sync before stwcx. to address PPC405 Erratum. */
16319 if (PPC405_ERRATUM77)
16320 emit_insn (gen_hwsync ());
16321
16322 emit_insn (fn (res, mem, val));
16323 }
16324
16325 /* Expand barriers before and after a load_locked/store_cond sequence. */
16326
16327 static rtx
16328 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
16329 {
16330 rtx addr = XEXP (mem, 0);
16331
16332 if (!legitimate_indirect_address_p (addr, reload_completed)
16333 && !legitimate_indexed_address_p (addr, reload_completed))
16334 {
16335 addr = force_reg (Pmode, addr);
16336 mem = replace_equiv_address_nv (mem, addr);
16337 }
16338
16339 switch (model)
16340 {
16341 case MEMMODEL_RELAXED:
16342 case MEMMODEL_CONSUME:
16343 case MEMMODEL_ACQUIRE:
16344 break;
16345 case MEMMODEL_RELEASE:
16346 case MEMMODEL_ACQ_REL:
16347 emit_insn (gen_lwsync ());
16348 break;
16349 case MEMMODEL_SEQ_CST:
16350 emit_insn (gen_hwsync ());
16351 break;
16352 default:
16353 gcc_unreachable ();
16354 }
16355 return mem;
16356 }
16357
16358 static void
16359 rs6000_post_atomic_barrier (enum memmodel model)
16360 {
16361 switch (model)
16362 {
16363 case MEMMODEL_RELAXED:
16364 case MEMMODEL_CONSUME:
16365 case MEMMODEL_RELEASE:
16366 break;
16367 case MEMMODEL_ACQUIRE:
16368 case MEMMODEL_ACQ_REL:
16369 case MEMMODEL_SEQ_CST:
16370 emit_insn (gen_isync ());
16371 break;
16372 default:
16373 gcc_unreachable ();
16374 }
16375 }
16376
16377 /* A subroutine of the various atomic expanders. For sub-word operations,
16378 we must adjust things to operate on SImode. Given the original MEM,
16379 return a new aligned memory. Also build and return the quantities by
16380 which to shift and mask. */
16381
16382 static rtx
16383 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
16384 {
16385 rtx addr, align, shift, mask, mem;
16386 HOST_WIDE_INT shift_mask;
16387 machine_mode mode = GET_MODE (orig_mem);
16388
16389 /* For smaller modes, we have to implement this via SImode. */
16390 shift_mask = (mode == QImode ? 0x18 : 0x10);
16391
16392 addr = XEXP (orig_mem, 0);
16393 addr = force_reg (GET_MODE (addr), addr);
16394
16395 /* Aligned memory containing subword. Generate a new memory. We
16396 do not want any of the existing MEM_ATTR data, as we're now
16397 accessing memory outside the original object. */
16398 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
16399 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16400 mem = gen_rtx_MEM (SImode, align);
16401 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
16402 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
16403 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
16404
16405 /* Shift amount for subword relative to aligned word. */
16406 shift = gen_reg_rtx (SImode);
16407 addr = gen_lowpart (SImode, addr);
16408 rtx tmp = gen_reg_rtx (SImode);
16409 emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
16410 emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
16411 if (BYTES_BIG_ENDIAN)
16412 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
16413 shift, 1, OPTAB_LIB_WIDEN);
16414 *pshift = shift;
16415
16416 /* Mask for insertion. */
16417 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
16418 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
16419 *pmask = mask;
16420
16421 return mem;
16422 }
16423
16424 /* A subroutine of the various atomic expanders. For sub-word operands,
16425 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
16426
16427 static rtx
16428 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
16429 {
16430 rtx x;
16431
16432 x = gen_reg_rtx (SImode);
16433 emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
16434 gen_rtx_NOT (SImode, mask),
16435 oldval)));
16436
16437 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
16438
16439 return x;
16440 }
16441
16442 /* A subroutine of the various atomic expanders. For sub-word operands,
16443 extract WIDE to NARROW via SHIFT. */
16444
16445 static void
16446 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
16447 {
16448 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
16449 wide, 1, OPTAB_LIB_WIDEN);
16450 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
16451 }
16452
16453 /* Expand an atomic compare and swap operation. */
16454
16455 void
16456 rs6000_expand_atomic_compare_and_swap (rtx operands[])
16457 {
16458 rtx boolval, retval, mem, oldval, newval, cond;
16459 rtx label1, label2, x, mask, shift;
16460 machine_mode mode, orig_mode;
16461 enum memmodel mod_s, mod_f;
16462 bool is_weak;
16463
16464 boolval = operands[0];
16465 retval = operands[1];
16466 mem = operands[2];
16467 oldval = operands[3];
16468 newval = operands[4];
16469 is_weak = (INTVAL (operands[5]) != 0);
16470 mod_s = memmodel_base (INTVAL (operands[6]));
16471 mod_f = memmodel_base (INTVAL (operands[7]));
16472 orig_mode = mode = GET_MODE (mem);
16473
16474 mask = shift = NULL_RTX;
16475 if (mode == QImode || mode == HImode)
16476 {
16477 /* Before power8, we didn't have access to lbarx/lharx, so generate a
16478 lwarx and shift/mask operations. With power8, we need to do the
16479 comparison in SImode, but the store is still done in QI/HImode. */
16480 oldval = convert_modes (SImode, mode, oldval, 1);
16481
16482 if (!TARGET_SYNC_HI_QI)
16483 {
16484 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
16485
16486 /* Shift and mask OLDVAL into position with the word. */
16487 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
16488 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16489
16490 /* Shift and mask NEWVAL into position within the word. */
16491 newval = convert_modes (SImode, mode, newval, 1);
16492 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
16493 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16494 }
16495
16496 /* Prepare to adjust the return value. */
16497 retval = gen_reg_rtx (SImode);
16498 mode = SImode;
16499 }
16500 else if (reg_overlap_mentioned_p (retval, oldval))
16501 oldval = copy_to_reg (oldval);
16502
16503 if (mode != TImode && !reg_or_short_operand (oldval, mode))
16504 oldval = copy_to_mode_reg (mode, oldval);
16505
16506 if (reg_overlap_mentioned_p (retval, newval))
16507 newval = copy_to_reg (newval);
16508
16509 mem = rs6000_pre_atomic_barrier (mem, mod_s);
16510
16511 label1 = NULL_RTX;
16512 if (!is_weak)
16513 {
16514 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
16515 emit_label (XEXP (label1, 0));
16516 }
16517 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
16518
16519 emit_load_locked (mode, retval, mem);
16520
16521 x = retval;
16522 if (mask)
16523 x = expand_simple_binop (SImode, AND, retval, mask,
16524 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16525
16526 cond = gen_reg_rtx (CCmode);
16527 /* If we have TImode, synthesize a comparison. */
16528 if (mode != TImode)
16529 x = gen_rtx_COMPARE (CCmode, x, oldval);
16530 else
16531 {
16532 rtx xor1_result = gen_reg_rtx (DImode);
16533 rtx xor2_result = gen_reg_rtx (DImode);
16534 rtx or_result = gen_reg_rtx (DImode);
16535 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
16536 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
16537 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
16538 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
16539
16540 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
16541 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
16542 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
16543 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
16544 }
16545
16546 emit_insn (gen_rtx_SET (cond, x));
16547
16548 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
16549 emit_unlikely_jump (x, label2);
16550
16551 x = newval;
16552 if (mask)
16553 x = rs6000_mask_atomic_subword (retval, newval, mask);
16554
16555 emit_store_conditional (orig_mode, cond, mem, x);
16556
16557 if (!is_weak)
16558 {
16559 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
16560 emit_unlikely_jump (x, label1);
16561 }
16562
16563 if (!is_mm_relaxed (mod_f))
16564 emit_label (XEXP (label2, 0));
16565
16566 rs6000_post_atomic_barrier (mod_s);
16567
16568 if (is_mm_relaxed (mod_f))
16569 emit_label (XEXP (label2, 0));
16570
16571 if (shift)
16572 rs6000_finish_atomic_subword (operands[1], retval, shift);
16573 else if (mode != GET_MODE (operands[1]))
16574 convert_move (operands[1], retval, 1);
16575
16576 /* In all cases, CR0 contains EQ on success, and NE on failure. */
16577 x = gen_rtx_EQ (SImode, cond, const0_rtx);
16578 emit_insn (gen_rtx_SET (boolval, x));
16579 }
16580
16581 /* Expand an atomic exchange operation. */
16582
16583 void
16584 rs6000_expand_atomic_exchange (rtx operands[])
16585 {
16586 rtx retval, mem, val, cond;
16587 machine_mode mode;
16588 enum memmodel model;
16589 rtx label, x, mask, shift;
16590
16591 retval = operands[0];
16592 mem = operands[1];
16593 val = operands[2];
16594 model = memmodel_base (INTVAL (operands[3]));
16595 mode = GET_MODE (mem);
16596
16597 mask = shift = NULL_RTX;
16598 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
16599 {
16600 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
16601
16602 /* Shift and mask VAL into position with the word. */
16603 val = convert_modes (SImode, mode, val, 1);
16604 val = expand_simple_binop (SImode, ASHIFT, val, shift,
16605 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16606
16607 /* Prepare to adjust the return value. */
16608 retval = gen_reg_rtx (SImode);
16609 mode = SImode;
16610 }
16611
16612 mem = rs6000_pre_atomic_barrier (mem, model);
16613
16614 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
16615 emit_label (XEXP (label, 0));
16616
16617 emit_load_locked (mode, retval, mem);
16618
16619 x = val;
16620 if (mask)
16621 x = rs6000_mask_atomic_subword (retval, val, mask);
16622
16623 cond = gen_reg_rtx (CCmode);
16624 emit_store_conditional (mode, cond, mem, x);
16625
16626 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
16627 emit_unlikely_jump (x, label);
16628
16629 rs6000_post_atomic_barrier (model);
16630
16631 if (shift)
16632 rs6000_finish_atomic_subword (operands[0], retval, shift);
16633 }
16634
16635 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
16636 to perform. MEM is the memory on which to operate. VAL is the second
16637 operand of the binary operator. BEFORE and AFTER are optional locations to
16638 return the value of MEM either before of after the operation. MODEL_RTX
16639 is a CONST_INT containing the memory model to use. */
16640
16641 void
16642 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
16643 rtx orig_before, rtx orig_after, rtx model_rtx)
16644 {
16645 enum memmodel model = memmodel_base (INTVAL (model_rtx));
16646 machine_mode mode = GET_MODE (mem);
16647 machine_mode store_mode = mode;
16648 rtx label, x, cond, mask, shift;
16649 rtx before = orig_before, after = orig_after;
16650
16651 mask = shift = NULL_RTX;
16652 /* On power8, we want to use SImode for the operation. On previous systems,
16653 use the operation in a subword and shift/mask to get the proper byte or
16654 halfword. */
16655 if (mode == QImode || mode == HImode)
16656 {
16657 if (TARGET_SYNC_HI_QI)
16658 {
16659 val = convert_modes (SImode, mode, val, 1);
16660
16661 /* Prepare to adjust the return value. */
16662 before = gen_reg_rtx (SImode);
16663 if (after)
16664 after = gen_reg_rtx (SImode);
16665 mode = SImode;
16666 }
16667 else
16668 {
16669 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
16670
16671 /* Shift and mask VAL into position with the word. */
16672 val = convert_modes (SImode, mode, val, 1);
16673 val = expand_simple_binop (SImode, ASHIFT, val, shift,
16674 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16675
16676 switch (code)
16677 {
16678 case IOR:
16679 case XOR:
16680 /* We've already zero-extended VAL. That is sufficient to
16681 make certain that it does not affect other bits. */
16682 mask = NULL;
16683 break;
16684
16685 case AND:
16686 /* If we make certain that all of the other bits in VAL are
16687 set, that will be sufficient to not affect other bits. */
16688 x = gen_rtx_NOT (SImode, mask);
16689 x = gen_rtx_IOR (SImode, x, val);
16690 emit_insn (gen_rtx_SET (val, x));
16691 mask = NULL;
16692 break;
16693
16694 case NOT:
16695 case PLUS:
16696 case MINUS:
16697 /* These will all affect bits outside the field and need
16698 adjustment via MASK within the loop. */
16699 break;
16700
16701 default:
16702 gcc_unreachable ();
16703 }
16704
16705 /* Prepare to adjust the return value. */
16706 before = gen_reg_rtx (SImode);
16707 if (after)
16708 after = gen_reg_rtx (SImode);
16709 store_mode = mode = SImode;
16710 }
16711 }
16712
16713 mem = rs6000_pre_atomic_barrier (mem, model);
16714
16715 label = gen_label_rtx ();
16716 emit_label (label);
16717 label = gen_rtx_LABEL_REF (VOIDmode, label);
16718
16719 if (before == NULL_RTX)
16720 before = gen_reg_rtx (mode);
16721
16722 emit_load_locked (mode, before, mem);
16723
16724 if (code == NOT)
16725 {
16726 x = expand_simple_binop (mode, AND, before, val,
16727 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16728 after = expand_simple_unop (mode, NOT, x, after, 1);
16729 }
16730 else
16731 {
16732 after = expand_simple_binop (mode, code, before, val,
16733 after, 1, OPTAB_LIB_WIDEN);
16734 }
16735
16736 x = after;
16737 if (mask)
16738 {
16739 x = expand_simple_binop (SImode, AND, after, mask,
16740 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16741 x = rs6000_mask_atomic_subword (before, x, mask);
16742 }
16743 else if (store_mode != mode)
16744 x = convert_modes (store_mode, mode, x, 1);
16745
16746 cond = gen_reg_rtx (CCmode);
16747 emit_store_conditional (store_mode, cond, mem, x);
16748
16749 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
16750 emit_unlikely_jump (x, label);
16751
16752 rs6000_post_atomic_barrier (model);
16753
16754 if (shift)
16755 {
16756 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
16757 then do the calcuations in a SImode register. */
16758 if (orig_before)
16759 rs6000_finish_atomic_subword (orig_before, before, shift);
16760 if (orig_after)
16761 rs6000_finish_atomic_subword (orig_after, after, shift);
16762 }
16763 else if (store_mode != mode)
16764 {
16765 /* QImode/HImode on machines with lbarx/lharx where we do the native
16766 operation and then do the calcuations in a SImode register. */
16767 if (orig_before)
16768 convert_move (orig_before, before, 1);
16769 if (orig_after)
16770 convert_move (orig_after, after, 1);
16771 }
16772 else if (orig_after && after != orig_after)
16773 emit_move_insn (orig_after, after);
16774 }
16775
16776 static GTY(()) alias_set_type TOC_alias_set = -1;
16777
16778 alias_set_type
16779 get_TOC_alias_set (void)
16780 {
16781 if (TOC_alias_set == -1)
16782 TOC_alias_set = new_alias_set ();
16783 return TOC_alias_set;
16784 }
16785
16786 /* The mode the ABI uses for a word. This is not the same as word_mode
16787 for -m32 -mpowerpc64. This is used to implement various target hooks. */
16788
16789 static scalar_int_mode
16790 rs6000_abi_word_mode (void)
16791 {
16792 return TARGET_32BIT ? SImode : DImode;
16793 }
16794
16795 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
16796 static char *
16797 rs6000_offload_options (void)
16798 {
16799 if (TARGET_64BIT)
16800 return xstrdup ("-foffload-abi=lp64");
16801 else
16802 return xstrdup ("-foffload-abi=ilp32");
16803 }
16804
16805 \f
16806 /* A quick summary of the various types of 'constant-pool tables'
16807 under PowerPC:
16808
16809 Target Flags Name One table per
16810 AIX (none) AIX TOC object file
16811 AIX -mfull-toc AIX TOC object file
16812 AIX -mminimal-toc AIX minimal TOC translation unit
16813 SVR4/EABI (none) SVR4 SDATA object file
16814 SVR4/EABI -fpic SVR4 pic object file
16815 SVR4/EABI -fPIC SVR4 PIC translation unit
16816 SVR4/EABI -mrelocatable EABI TOC function
16817 SVR4/EABI -maix AIX TOC object file
16818 SVR4/EABI -maix -mminimal-toc
16819 AIX minimal TOC translation unit
16820
16821 Name Reg. Set by entries contains:
16822 made by addrs? fp? sum?
16823
16824 AIX TOC 2 crt0 as Y option option
16825 AIX minimal TOC 30 prolog gcc Y Y option
16826 SVR4 SDATA 13 crt0 gcc N Y N
16827 SVR4 pic 30 prolog ld Y not yet N
16828 SVR4 PIC 30 prolog gcc Y option option
16829 EABI TOC 30 prolog gcc Y option option
16830
16831 */
16832
16833 /* Hash functions for the hash table. */
16834
16835 static unsigned
16836 rs6000_hash_constant (rtx k)
16837 {
16838 enum rtx_code code = GET_CODE (k);
16839 machine_mode mode = GET_MODE (k);
16840 unsigned result = (code << 3) ^ mode;
16841 const char *format;
16842 int flen, fidx;
16843
16844 format = GET_RTX_FORMAT (code);
16845 flen = strlen (format);
16846 fidx = 0;
16847
16848 switch (code)
16849 {
16850 case LABEL_REF:
16851 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
16852
16853 case CONST_WIDE_INT:
16854 {
16855 int i;
16856 flen = CONST_WIDE_INT_NUNITS (k);
16857 for (i = 0; i < flen; i++)
16858 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
16859 return result;
16860 }
16861
16862 case CONST_DOUBLE:
16863 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
16864
16865 case CODE_LABEL:
16866 fidx = 3;
16867 break;
16868
16869 default:
16870 break;
16871 }
16872
16873 for (; fidx < flen; fidx++)
16874 switch (format[fidx])
16875 {
16876 case 's':
16877 {
16878 unsigned i, len;
16879 const char *str = XSTR (k, fidx);
16880 len = strlen (str);
16881 result = result * 613 + len;
16882 for (i = 0; i < len; i++)
16883 result = result * 613 + (unsigned) str[i];
16884 break;
16885 }
16886 case 'u':
16887 case 'e':
16888 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
16889 break;
16890 case 'i':
16891 case 'n':
16892 result = result * 613 + (unsigned) XINT (k, fidx);
16893 break;
16894 case 'w':
16895 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
16896 result = result * 613 + (unsigned) XWINT (k, fidx);
16897 else
16898 {
16899 size_t i;
16900 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
16901 result = result * 613 + (unsigned) (XWINT (k, fidx)
16902 >> CHAR_BIT * i);
16903 }
16904 break;
16905 case '0':
16906 break;
16907 default:
16908 gcc_unreachable ();
16909 }
16910
16911 return result;
16912 }
16913
16914 hashval_t
16915 toc_hasher::hash (toc_hash_struct *thc)
16916 {
16917 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
16918 }
16919
16920 /* Compare H1 and H2 for equivalence. */
16921
16922 bool
16923 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
16924 {
16925 rtx r1 = h1->key;
16926 rtx r2 = h2->key;
16927
16928 if (h1->key_mode != h2->key_mode)
16929 return 0;
16930
16931 return rtx_equal_p (r1, r2);
16932 }
16933
16934 /* These are the names given by the C++ front-end to vtables, and
16935 vtable-like objects. Ideally, this logic should not be here;
16936 instead, there should be some programmatic way of inquiring as
16937 to whether or not an object is a vtable. */
16938
16939 #define VTABLE_NAME_P(NAME) \
16940 (startswith (name, "_vt.") \
16941 || startswith (name, "_ZTV") \
16942 || startswith (name, "_ZTT") \
16943 || startswith (name, "_ZTI") \
16944 || startswith (name, "_ZTC"))
16945
16946 #ifdef NO_DOLLAR_IN_LABEL
16947 /* Return a GGC-allocated character string translating dollar signs in
16948 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
16949
16950 const char *
16951 rs6000_xcoff_strip_dollar (const char *name)
16952 {
16953 char *strip, *p;
16954 const char *q;
16955 size_t len;
16956
16957 q = (const char *) strchr (name, '$');
16958
16959 if (q == 0 || q == name)
16960 return name;
16961
16962 len = strlen (name);
16963 strip = XALLOCAVEC (char, len + 1);
16964 strcpy (strip, name);
16965 p = strip + (q - name);
16966 while (p)
16967 {
16968 *p = '_';
16969 p = strchr (p + 1, '$');
16970 }
16971
16972 return ggc_alloc_string (strip, len);
16973 }
16974 #endif
16975
16976 void
16977 rs6000_output_symbol_ref (FILE *file, rtx x)
16978 {
16979 const char *name = XSTR (x, 0);
16980
16981 /* Currently C++ toc references to vtables can be emitted before it
16982 is decided whether the vtable is public or private. If this is
16983 the case, then the linker will eventually complain that there is
16984 a reference to an unknown section. Thus, for vtables only,
16985 we emit the TOC reference to reference the identifier and not the
16986 symbol. */
16987 if (VTABLE_NAME_P (name))
16988 {
16989 RS6000_OUTPUT_BASENAME (file, name);
16990 }
16991 else
16992 assemble_name (file, name);
16993 }
16994
16995 /* Output a TOC entry. We derive the entry name from what is being
16996 written. */
16997
16998 void
16999 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
17000 {
17001 char buf[256];
17002 const char *name = buf;
17003 rtx base = x;
17004 HOST_WIDE_INT offset = 0;
17005
17006 gcc_assert (!TARGET_NO_TOC_OR_PCREL);
17007
17008 /* When the linker won't eliminate them, don't output duplicate
17009 TOC entries (this happens on AIX if there is any kind of TOC,
17010 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
17011 CODE_LABELs. */
17012 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
17013 {
17014 struct toc_hash_struct *h;
17015
17016 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
17017 time because GGC is not initialized at that point. */
17018 if (toc_hash_table == NULL)
17019 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
17020
17021 h = ggc_alloc<toc_hash_struct> ();
17022 h->key = x;
17023 h->key_mode = mode;
17024 h->labelno = labelno;
17025
17026 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
17027 if (*found == NULL)
17028 *found = h;
17029 else /* This is indeed a duplicate.
17030 Set this label equal to that label. */
17031 {
17032 fputs ("\t.set ", file);
17033 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
17034 fprintf (file, "%d,", labelno);
17035 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
17036 fprintf (file, "%d\n", ((*found)->labelno));
17037
17038 #ifdef HAVE_AS_TLS
17039 if (TARGET_XCOFF && SYMBOL_REF_P (x)
17040 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
17041 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
17042 {
17043 fputs ("\t.set ", file);
17044 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
17045 fprintf (file, "%d,", labelno);
17046 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
17047 fprintf (file, "%d\n", ((*found)->labelno));
17048 }
17049 #endif
17050 return;
17051 }
17052 }
17053
17054 /* If we're going to put a double constant in the TOC, make sure it's
17055 aligned properly when strict alignment is on. */
17056 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
17057 && STRICT_ALIGNMENT
17058 && GET_MODE_BITSIZE (mode) >= 64
17059 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
17060 ASM_OUTPUT_ALIGN (file, 3);
17061 }
17062
17063 (*targetm.asm_out.internal_label) (file, "LC", labelno);
17064
17065 /* Handle FP constants specially. Note that if we have a minimal
17066 TOC, things we put here aren't actually in the TOC, so we can allow
17067 FP constants. */
17068 if (CONST_DOUBLE_P (x)
17069 && (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode
17070 || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode))
17071 {
17072 long k[4];
17073
17074 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
17075 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k);
17076 else
17077 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
17078
17079 if (TARGET_64BIT)
17080 {
17081 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17082 fputs (DOUBLE_INT_ASM_OP, file);
17083 else
17084 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
17085 k[0] & 0xffffffff, k[1] & 0xffffffff,
17086 k[2] & 0xffffffff, k[3] & 0xffffffff);
17087 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
17088 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
17089 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
17090 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
17091 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
17092 return;
17093 }
17094 else
17095 {
17096 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17097 fputs ("\t.long ", file);
17098 else
17099 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
17100 k[0] & 0xffffffff, k[1] & 0xffffffff,
17101 k[2] & 0xffffffff, k[3] & 0xffffffff);
17102 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
17103 k[0] & 0xffffffff, k[1] & 0xffffffff,
17104 k[2] & 0xffffffff, k[3] & 0xffffffff);
17105 return;
17106 }
17107 }
17108 else if (CONST_DOUBLE_P (x)
17109 && (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
17110 {
17111 long k[2];
17112
17113 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
17114 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k);
17115 else
17116 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
17117
17118 if (TARGET_64BIT)
17119 {
17120 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17121 fputs (DOUBLE_INT_ASM_OP, file);
17122 else
17123 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
17124 k[0] & 0xffffffff, k[1] & 0xffffffff);
17125 fprintf (file, "0x%lx%08lx\n",
17126 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
17127 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
17128 return;
17129 }
17130 else
17131 {
17132 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17133 fputs ("\t.long ", file);
17134 else
17135 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
17136 k[0] & 0xffffffff, k[1] & 0xffffffff);
17137 fprintf (file, "0x%lx,0x%lx\n",
17138 k[0] & 0xffffffff, k[1] & 0xffffffff);
17139 return;
17140 }
17141 }
17142 else if (CONST_DOUBLE_P (x)
17143 && (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
17144 {
17145 long l;
17146
17147 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
17148 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l);
17149 else
17150 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
17151
17152 if (TARGET_64BIT)
17153 {
17154 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17155 fputs (DOUBLE_INT_ASM_OP, file);
17156 else
17157 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
17158 if (WORDS_BIG_ENDIAN)
17159 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
17160 else
17161 fprintf (file, "0x%lx\n", l & 0xffffffff);
17162 return;
17163 }
17164 else
17165 {
17166 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17167 fputs ("\t.long ", file);
17168 else
17169 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
17170 fprintf (file, "0x%lx\n", l & 0xffffffff);
17171 return;
17172 }
17173 }
17174 else if (GET_MODE (x) == VOIDmode && CONST_INT_P (x))
17175 {
17176 unsigned HOST_WIDE_INT low;
17177 HOST_WIDE_INT high;
17178
17179 low = INTVAL (x) & 0xffffffff;
17180 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
17181
17182 /* TOC entries are always Pmode-sized, so when big-endian
17183 smaller integer constants in the TOC need to be padded.
17184 (This is still a win over putting the constants in
17185 a separate constant pool, because then we'd have
17186 to have both a TOC entry _and_ the actual constant.)
17187
17188 For a 32-bit target, CONST_INT values are loaded and shifted
17189 entirely within `low' and can be stored in one TOC entry. */
17190
17191 /* It would be easy to make this work, but it doesn't now. */
17192 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
17193
17194 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
17195 {
17196 low |= high << 32;
17197 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
17198 high = (HOST_WIDE_INT) low >> 32;
17199 low &= 0xffffffff;
17200 }
17201
17202 if (TARGET_64BIT)
17203 {
17204 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17205 fputs (DOUBLE_INT_ASM_OP, file);
17206 else
17207 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
17208 (long) high & 0xffffffff, (long) low & 0xffffffff);
17209 fprintf (file, "0x%lx%08lx\n",
17210 (long) high & 0xffffffff, (long) low & 0xffffffff);
17211 return;
17212 }
17213 else
17214 {
17215 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
17216 {
17217 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17218 fputs ("\t.long ", file);
17219 else
17220 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
17221 (long) high & 0xffffffff, (long) low & 0xffffffff);
17222 fprintf (file, "0x%lx,0x%lx\n",
17223 (long) high & 0xffffffff, (long) low & 0xffffffff);
17224 }
17225 else
17226 {
17227 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17228 fputs ("\t.long ", file);
17229 else
17230 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
17231 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
17232 }
17233 return;
17234 }
17235 }
17236
17237 if (GET_CODE (x) == CONST)
17238 {
17239 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
17240 && CONST_INT_P (XEXP (XEXP (x, 0), 1)));
17241
17242 base = XEXP (XEXP (x, 0), 0);
17243 offset = INTVAL (XEXP (XEXP (x, 0), 1));
17244 }
17245
17246 switch (GET_CODE (base))
17247 {
17248 case SYMBOL_REF:
17249 name = XSTR (base, 0);
17250 break;
17251
17252 case LABEL_REF:
17253 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
17254 CODE_LABEL_NUMBER (XEXP (base, 0)));
17255 break;
17256
17257 case CODE_LABEL:
17258 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
17259 break;
17260
17261 default:
17262 gcc_unreachable ();
17263 }
17264
17265 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17266 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
17267 else
17268 {
17269 fputs ("\t.tc ", file);
17270 RS6000_OUTPUT_BASENAME (file, name);
17271
17272 if (offset < 0)
17273 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
17274 else if (offset)
17275 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
17276
17277 /* Mark large TOC symbols on AIX with [TE] so they are mapped
17278 after other TOC symbols, reducing overflow of small TOC access
17279 to [TC] symbols. */
17280 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
17281 ? "[TE]," : "[TC],", file);
17282 }
17283
17284 /* Currently C++ toc references to vtables can be emitted before it
17285 is decided whether the vtable is public or private. If this is
17286 the case, then the linker will eventually complain that there is
17287 a TOC reference to an unknown section. Thus, for vtables only,
17288 we emit the TOC reference to reference the symbol and not the
17289 section. */
17290 if (VTABLE_NAME_P (name))
17291 {
17292 RS6000_OUTPUT_BASENAME (file, name);
17293 if (offset < 0)
17294 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
17295 else if (offset > 0)
17296 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
17297 }
17298 else
17299 output_addr_const (file, x);
17300
17301 #if HAVE_AS_TLS
17302 if (TARGET_XCOFF && SYMBOL_REF_P (base))
17303 {
17304 switch (SYMBOL_REF_TLS_MODEL (base))
17305 {
17306 case 0:
17307 break;
17308 case TLS_MODEL_LOCAL_EXEC:
17309 fputs ("@le", file);
17310 break;
17311 case TLS_MODEL_INITIAL_EXEC:
17312 fputs ("@ie", file);
17313 break;
17314 /* Use global-dynamic for local-dynamic. */
17315 case TLS_MODEL_GLOBAL_DYNAMIC:
17316 case TLS_MODEL_LOCAL_DYNAMIC:
17317 putc ('\n', file);
17318 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
17319 fputs ("\t.tc .", file);
17320 RS6000_OUTPUT_BASENAME (file, name);
17321 fputs ("[TC],", file);
17322 output_addr_const (file, x);
17323 fputs ("@m", file);
17324 break;
17325 default:
17326 gcc_unreachable ();
17327 }
17328 }
17329 #endif
17330
17331 putc ('\n', file);
17332 }
17333 \f
17334 /* Output an assembler pseudo-op to write an ASCII string of N characters
17335 starting at P to FILE.
17336
17337 On the RS/6000, we have to do this using the .byte operation and
17338 write out special characters outside the quoted string.
17339 Also, the assembler is broken; very long strings are truncated,
17340 so we must artificially break them up early. */
17341
17342 void
17343 output_ascii (FILE *file, const char *p, int n)
17344 {
17345 char c;
17346 int i, count_string;
17347 const char *for_string = "\t.byte \"";
17348 const char *for_decimal = "\t.byte ";
17349 const char *to_close = NULL;
17350
17351 count_string = 0;
17352 for (i = 0; i < n; i++)
17353 {
17354 c = *p++;
17355 if (c >= ' ' && c < 0177)
17356 {
17357 if (for_string)
17358 fputs (for_string, file);
17359 putc (c, file);
17360
17361 /* Write two quotes to get one. */
17362 if (c == '"')
17363 {
17364 putc (c, file);
17365 ++count_string;
17366 }
17367
17368 for_string = NULL;
17369 for_decimal = "\"\n\t.byte ";
17370 to_close = "\"\n";
17371 ++count_string;
17372
17373 if (count_string >= 512)
17374 {
17375 fputs (to_close, file);
17376
17377 for_string = "\t.byte \"";
17378 for_decimal = "\t.byte ";
17379 to_close = NULL;
17380 count_string = 0;
17381 }
17382 }
17383 else
17384 {
17385 if (for_decimal)
17386 fputs (for_decimal, file);
17387 fprintf (file, "%d", c);
17388
17389 for_string = "\n\t.byte \"";
17390 for_decimal = ", ";
17391 to_close = "\n";
17392 count_string = 0;
17393 }
17394 }
17395
17396 /* Now close the string if we have written one. Then end the line. */
17397 if (to_close)
17398 fputs (to_close, file);
17399 }
17400 \f
17401 /* Generate a unique section name for FILENAME for a section type
17402 represented by SECTION_DESC. Output goes into BUF.
17403
17404 SECTION_DESC can be any string, as long as it is different for each
17405 possible section type.
17406
17407 We name the section in the same manner as xlc. The name begins with an
17408 underscore followed by the filename (after stripping any leading directory
17409 names) with the last period replaced by the string SECTION_DESC. If
17410 FILENAME does not contain a period, SECTION_DESC is appended to the end of
17411 the name. */
17412
17413 void
17414 rs6000_gen_section_name (char **buf, const char *filename,
17415 const char *section_desc)
17416 {
17417 const char *q, *after_last_slash, *last_period = 0;
17418 char *p;
17419 int len;
17420
17421 after_last_slash = filename;
17422 for (q = filename; *q; q++)
17423 {
17424 if (*q == '/')
17425 after_last_slash = q + 1;
17426 else if (*q == '.')
17427 last_period = q;
17428 }
17429
17430 len = strlen (after_last_slash) + strlen (section_desc) + 2;
17431 *buf = (char *) xmalloc (len);
17432
17433 p = *buf;
17434 *p++ = '_';
17435
17436 for (q = after_last_slash; *q; q++)
17437 {
17438 if (q == last_period)
17439 {
17440 strcpy (p, section_desc);
17441 p += strlen (section_desc);
17442 break;
17443 }
17444
17445 else if (ISALNUM (*q))
17446 *p++ = *q;
17447 }
17448
17449 if (last_period == 0)
17450 strcpy (p, section_desc);
17451 else
17452 *p = '\0';
17453 }
17454 \f
17455 /* Emit profile function. */
17456
17457 void
17458 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
17459 {
17460 /* Non-standard profiling for kernels, which just saves LR then calls
17461 _mcount without worrying about arg saves. The idea is to change
17462 the function prologue as little as possible as it isn't easy to
17463 account for arg save/restore code added just for _mcount. */
17464 if (TARGET_PROFILE_KERNEL)
17465 return;
17466
17467 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
17468 {
17469 #ifndef NO_PROFILE_COUNTERS
17470 # define NO_PROFILE_COUNTERS 0
17471 #endif
17472 if (NO_PROFILE_COUNTERS)
17473 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
17474 LCT_NORMAL, VOIDmode);
17475 else
17476 {
17477 char buf[30];
17478 const char *label_name;
17479 rtx fun;
17480
17481 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
17482 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
17483 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
17484
17485 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
17486 LCT_NORMAL, VOIDmode, fun, Pmode);
17487 }
17488 }
17489 else if (DEFAULT_ABI == ABI_DARWIN)
17490 {
17491 const char *mcount_name = RS6000_MCOUNT;
17492 int caller_addr_regno = LR_REGNO;
17493
17494 /* Be conservative and always set this, at least for now. */
17495 crtl->uses_pic_offset_table = 1;
17496
17497 #if TARGET_MACHO
17498 /* For PIC code, set up a stub and collect the caller's address
17499 from r0, which is where the prologue puts it. */
17500 if (MACHOPIC_INDIRECT
17501 && crtl->uses_pic_offset_table)
17502 caller_addr_regno = 0;
17503 #endif
17504 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
17505 LCT_NORMAL, VOIDmode,
17506 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
17507 }
17508 }
17509
17510 /* Write function profiler code. */
17511
17512 void
17513 output_function_profiler (FILE *file, int labelno)
17514 {
17515 char buf[100];
17516
17517 switch (DEFAULT_ABI)
17518 {
17519 default:
17520 gcc_unreachable ();
17521
17522 case ABI_V4:
17523 if (!TARGET_32BIT)
17524 {
17525 warning (0, "no profiling of 64-bit code for this ABI");
17526 return;
17527 }
17528 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
17529 fprintf (file, "\tmflr %s\n", reg_names[0]);
17530 if (NO_PROFILE_COUNTERS)
17531 {
17532 asm_fprintf (file, "\tstw %s,4(%s)\n",
17533 reg_names[0], reg_names[1]);
17534 }
17535 else if (TARGET_SECURE_PLT && flag_pic)
17536 {
17537 if (TARGET_LINK_STACK)
17538 {
17539 char name[32];
17540 get_ppc476_thunk_name (name);
17541 asm_fprintf (file, "\tbl %s\n", name);
17542 }
17543 else
17544 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
17545 asm_fprintf (file, "\tstw %s,4(%s)\n",
17546 reg_names[0], reg_names[1]);
17547 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
17548 asm_fprintf (file, "\taddis %s,%s,",
17549 reg_names[12], reg_names[12]);
17550 assemble_name (file, buf);
17551 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
17552 assemble_name (file, buf);
17553 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
17554 }
17555 else if (flag_pic == 1)
17556 {
17557 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
17558 asm_fprintf (file, "\tstw %s,4(%s)\n",
17559 reg_names[0], reg_names[1]);
17560 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
17561 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
17562 assemble_name (file, buf);
17563 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
17564 }
17565 else if (flag_pic > 1)
17566 {
17567 asm_fprintf (file, "\tstw %s,4(%s)\n",
17568 reg_names[0], reg_names[1]);
17569 /* Now, we need to get the address of the label. */
17570 if (TARGET_LINK_STACK)
17571 {
17572 char name[32];
17573 get_ppc476_thunk_name (name);
17574 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
17575 assemble_name (file, buf);
17576 fputs ("-.\n1:", file);
17577 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
17578 asm_fprintf (file, "\taddi %s,%s,4\n",
17579 reg_names[11], reg_names[11]);
17580 }
17581 else
17582 {
17583 fputs ("\tbcl 20,31,1f\n\t.long ", file);
17584 assemble_name (file, buf);
17585 fputs ("-.\n1:", file);
17586 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
17587 }
17588 asm_fprintf (file, "\tlwz %s,0(%s)\n",
17589 reg_names[0], reg_names[11]);
17590 asm_fprintf (file, "\tadd %s,%s,%s\n",
17591 reg_names[0], reg_names[0], reg_names[11]);
17592 }
17593 else
17594 {
17595 asm_fprintf (file, "\tlis %s,", reg_names[12]);
17596 assemble_name (file, buf);
17597 fputs ("@ha\n", file);
17598 asm_fprintf (file, "\tstw %s,4(%s)\n",
17599 reg_names[0], reg_names[1]);
17600 asm_fprintf (file, "\tla %s,", reg_names[0]);
17601 assemble_name (file, buf);
17602 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
17603 }
17604
17605 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
17606 fprintf (file, "\tbl %s%s\n",
17607 RS6000_MCOUNT, flag_pic ? "@plt" : "");
17608 break;
17609
17610 case ABI_AIX:
17611 case ABI_ELFv2:
17612 case ABI_DARWIN:
17613 /* Don't do anything, done in output_profile_hook (). */
17614 break;
17615 }
17616 }
17617
17618 \f
17619
17620 /* The following variable value is the last issued insn. */
17621
17622 static rtx_insn *last_scheduled_insn;
17623
17624 /* The following variable helps to balance issuing of load and
17625 store instructions */
17626
17627 static int load_store_pendulum;
17628
17629 /* The following variable helps pair divide insns during scheduling. */
17630 static int divide_cnt;
17631 /* The following variable helps pair and alternate vector and vector load
17632 insns during scheduling. */
17633 static int vec_pairing;
17634
17635
17636 /* Power4 load update and store update instructions are cracked into a
17637 load or store and an integer insn which are executed in the same cycle.
17638 Branches have their own dispatch slot which does not count against the
17639 GCC issue rate, but it changes the program flow so there are no other
17640 instructions to issue in this cycle. */
17641
17642 static int
17643 rs6000_variable_issue_1 (rtx_insn *insn, int more)
17644 {
17645 last_scheduled_insn = insn;
17646 if (GET_CODE (PATTERN (insn)) == USE
17647 || GET_CODE (PATTERN (insn)) == CLOBBER)
17648 {
17649 cached_can_issue_more = more;
17650 return cached_can_issue_more;
17651 }
17652
17653 if (insn_terminates_group_p (insn, current_group))
17654 {
17655 cached_can_issue_more = 0;
17656 return cached_can_issue_more;
17657 }
17658
17659 /* If no reservation, but reach here */
17660 if (recog_memoized (insn) < 0)
17661 return more;
17662
17663 if (rs6000_sched_groups)
17664 {
17665 if (is_microcoded_insn (insn))
17666 cached_can_issue_more = 0;
17667 else if (is_cracked_insn (insn))
17668 cached_can_issue_more = more > 2 ? more - 2 : 0;
17669 else
17670 cached_can_issue_more = more - 1;
17671
17672 return cached_can_issue_more;
17673 }
17674
17675 if (rs6000_tune == PROCESSOR_CELL && is_nonpipeline_insn (insn))
17676 return 0;
17677
17678 cached_can_issue_more = more - 1;
17679 return cached_can_issue_more;
17680 }
17681
17682 static int
17683 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
17684 {
17685 int r = rs6000_variable_issue_1 (insn, more);
17686 if (verbose)
17687 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
17688 return r;
17689 }
17690
17691 /* Adjust the cost of a scheduling dependency. Return the new cost of
17692 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
17693
17694 static int
17695 rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
17696 unsigned int)
17697 {
17698 enum attr_type attr_type;
17699
17700 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
17701 return cost;
17702
17703 switch (dep_type)
17704 {
17705 case REG_DEP_TRUE:
17706 {
17707 /* Data dependency; DEP_INSN writes a register that INSN reads
17708 some cycles later. */
17709
17710 /* Separate a load from a narrower, dependent store. */
17711 if ((rs6000_sched_groups || rs6000_tune == PROCESSOR_POWER9
17712 || rs6000_tune == PROCESSOR_POWER10)
17713 && GET_CODE (PATTERN (insn)) == SET
17714 && GET_CODE (PATTERN (dep_insn)) == SET
17715 && MEM_P (XEXP (PATTERN (insn), 1))
17716 && MEM_P (XEXP (PATTERN (dep_insn), 0))
17717 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
17718 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
17719 return cost + 14;
17720
17721 attr_type = get_attr_type (insn);
17722
17723 switch (attr_type)
17724 {
17725 case TYPE_JMPREG:
17726 /* Tell the first scheduling pass about the latency between
17727 a mtctr and bctr (and mtlr and br/blr). The first
17728 scheduling pass will not know about this latency since
17729 the mtctr instruction, which has the latency associated
17730 to it, will be generated by reload. */
17731 return 4;
17732 case TYPE_BRANCH:
17733 /* Leave some extra cycles between a compare and its
17734 dependent branch, to inhibit expensive mispredicts. */
17735 if ((rs6000_tune == PROCESSOR_PPC603
17736 || rs6000_tune == PROCESSOR_PPC604
17737 || rs6000_tune == PROCESSOR_PPC604e
17738 || rs6000_tune == PROCESSOR_PPC620
17739 || rs6000_tune == PROCESSOR_PPC630
17740 || rs6000_tune == PROCESSOR_PPC750
17741 || rs6000_tune == PROCESSOR_PPC7400
17742 || rs6000_tune == PROCESSOR_PPC7450
17743 || rs6000_tune == PROCESSOR_PPCE5500
17744 || rs6000_tune == PROCESSOR_PPCE6500
17745 || rs6000_tune == PROCESSOR_POWER4
17746 || rs6000_tune == PROCESSOR_POWER5
17747 || rs6000_tune == PROCESSOR_POWER7
17748 || rs6000_tune == PROCESSOR_POWER8
17749 || rs6000_tune == PROCESSOR_POWER9
17750 || rs6000_tune == PROCESSOR_POWER10
17751 || rs6000_tune == PROCESSOR_CELL)
17752 && recog_memoized (dep_insn)
17753 && (INSN_CODE (dep_insn) >= 0))
17754
17755 switch (get_attr_type (dep_insn))
17756 {
17757 case TYPE_CMP:
17758 case TYPE_FPCOMPARE:
17759 case TYPE_CR_LOGICAL:
17760 return cost + 2;
17761 case TYPE_EXTS:
17762 case TYPE_MUL:
17763 if (get_attr_dot (dep_insn) == DOT_YES)
17764 return cost + 2;
17765 else
17766 break;
17767 case TYPE_SHIFT:
17768 if (get_attr_dot (dep_insn) == DOT_YES
17769 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
17770 return cost + 2;
17771 else
17772 break;
17773 default:
17774 break;
17775 }
17776 break;
17777
17778 case TYPE_STORE:
17779 case TYPE_FPSTORE:
17780 if ((rs6000_tune == PROCESSOR_POWER6)
17781 && recog_memoized (dep_insn)
17782 && (INSN_CODE (dep_insn) >= 0))
17783 {
17784
17785 if (GET_CODE (PATTERN (insn)) != SET)
17786 /* If this happens, we have to extend this to schedule
17787 optimally. Return default for now. */
17788 return cost;
17789
17790 /* Adjust the cost for the case where the value written
17791 by a fixed point operation is used as the address
17792 gen value on a store. */
17793 switch (get_attr_type (dep_insn))
17794 {
17795 case TYPE_LOAD:
17796 case TYPE_CNTLZ:
17797 {
17798 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17799 return get_attr_sign_extend (dep_insn)
17800 == SIGN_EXTEND_YES ? 6 : 4;
17801 break;
17802 }
17803 case TYPE_SHIFT:
17804 {
17805 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17806 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
17807 6 : 3;
17808 break;
17809 }
17810 case TYPE_INTEGER:
17811 case TYPE_ADD:
17812 case TYPE_LOGICAL:
17813 case TYPE_EXTS:
17814 case TYPE_INSERT:
17815 {
17816 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17817 return 3;
17818 break;
17819 }
17820 case TYPE_STORE:
17821 case TYPE_FPLOAD:
17822 case TYPE_FPSTORE:
17823 {
17824 if (get_attr_update (dep_insn) == UPDATE_YES
17825 && ! rs6000_store_data_bypass_p (dep_insn, insn))
17826 return 3;
17827 break;
17828 }
17829 case TYPE_MUL:
17830 {
17831 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17832 return 17;
17833 break;
17834 }
17835 case TYPE_DIV:
17836 {
17837 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17838 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
17839 break;
17840 }
17841 default:
17842 break;
17843 }
17844 }
17845 break;
17846
17847 case TYPE_LOAD:
17848 if ((rs6000_tune == PROCESSOR_POWER6)
17849 && recog_memoized (dep_insn)
17850 && (INSN_CODE (dep_insn) >= 0))
17851 {
17852
17853 /* Adjust the cost for the case where the value written
17854 by a fixed point instruction is used within the address
17855 gen portion of a subsequent load(u)(x) */
17856 switch (get_attr_type (dep_insn))
17857 {
17858 case TYPE_LOAD:
17859 case TYPE_CNTLZ:
17860 {
17861 if (set_to_load_agen (dep_insn, insn))
17862 return get_attr_sign_extend (dep_insn)
17863 == SIGN_EXTEND_YES ? 6 : 4;
17864 break;
17865 }
17866 case TYPE_SHIFT:
17867 {
17868 if (set_to_load_agen (dep_insn, insn))
17869 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
17870 6 : 3;
17871 break;
17872 }
17873 case TYPE_INTEGER:
17874 case TYPE_ADD:
17875 case TYPE_LOGICAL:
17876 case TYPE_EXTS:
17877 case TYPE_INSERT:
17878 {
17879 if (set_to_load_agen (dep_insn, insn))
17880 return 3;
17881 break;
17882 }
17883 case TYPE_STORE:
17884 case TYPE_FPLOAD:
17885 case TYPE_FPSTORE:
17886 {
17887 if (get_attr_update (dep_insn) == UPDATE_YES
17888 && set_to_load_agen (dep_insn, insn))
17889 return 3;
17890 break;
17891 }
17892 case TYPE_MUL:
17893 {
17894 if (set_to_load_agen (dep_insn, insn))
17895 return 17;
17896 break;
17897 }
17898 case TYPE_DIV:
17899 {
17900 if (set_to_load_agen (dep_insn, insn))
17901 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
17902 break;
17903 }
17904 default:
17905 break;
17906 }
17907 }
17908 break;
17909
17910 default:
17911 break;
17912 }
17913
17914 /* Fall out to return default cost. */
17915 }
17916 break;
17917
17918 case REG_DEP_OUTPUT:
17919 /* Output dependency; DEP_INSN writes a register that INSN writes some
17920 cycles later. */
17921 if ((rs6000_tune == PROCESSOR_POWER6)
17922 && recog_memoized (dep_insn)
17923 && (INSN_CODE (dep_insn) >= 0))
17924 {
17925 attr_type = get_attr_type (insn);
17926
17927 switch (attr_type)
17928 {
17929 case TYPE_FP:
17930 case TYPE_FPSIMPLE:
17931 if (get_attr_type (dep_insn) == TYPE_FP
17932 || get_attr_type (dep_insn) == TYPE_FPSIMPLE)
17933 return 1;
17934 break;
17935 default:
17936 break;
17937 }
17938 }
17939 /* Fall through, no cost for output dependency. */
17940 /* FALLTHRU */
17941
17942 case REG_DEP_ANTI:
17943 /* Anti dependency; DEP_INSN reads a register that INSN writes some
17944 cycles later. */
17945 return 0;
17946
17947 default:
17948 gcc_unreachable ();
17949 }
17950
17951 return cost;
17952 }
17953
17954 /* Debug version of rs6000_adjust_cost. */
17955
17956 static int
17957 rs6000_debug_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
17958 int cost, unsigned int dw)
17959 {
17960 int ret = rs6000_adjust_cost (insn, dep_type, dep_insn, cost, dw);
17961
17962 if (ret != cost)
17963 {
17964 const char *dep;
17965
17966 switch (dep_type)
17967 {
17968 default: dep = "unknown depencency"; break;
17969 case REG_DEP_TRUE: dep = "data dependency"; break;
17970 case REG_DEP_OUTPUT: dep = "output dependency"; break;
17971 case REG_DEP_ANTI: dep = "anti depencency"; break;
17972 }
17973
17974 fprintf (stderr,
17975 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
17976 "%s, insn:\n", ret, cost, dep);
17977
17978 debug_rtx (insn);
17979 }
17980
17981 return ret;
17982 }
17983
17984 /* The function returns a true if INSN is microcoded.
17985 Return false otherwise. */
17986
17987 static bool
17988 is_microcoded_insn (rtx_insn *insn)
17989 {
17990 if (!insn || !NONDEBUG_INSN_P (insn)
17991 || GET_CODE (PATTERN (insn)) == USE
17992 || GET_CODE (PATTERN (insn)) == CLOBBER)
17993 return false;
17994
17995 if (rs6000_tune == PROCESSOR_CELL)
17996 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
17997
17998 if (rs6000_sched_groups
17999 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
18000 {
18001 enum attr_type type = get_attr_type (insn);
18002 if ((type == TYPE_LOAD
18003 && get_attr_update (insn) == UPDATE_YES
18004 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
18005 || ((type == TYPE_LOAD || type == TYPE_STORE)
18006 && get_attr_update (insn) == UPDATE_YES
18007 && get_attr_indexed (insn) == INDEXED_YES)
18008 || type == TYPE_MFCR)
18009 return true;
18010 }
18011
18012 return false;
18013 }
18014
18015 /* The function returns true if INSN is cracked into 2 instructions
18016 by the processor (and therefore occupies 2 issue slots). */
18017
18018 static bool
18019 is_cracked_insn (rtx_insn *insn)
18020 {
18021 if (!insn || !NONDEBUG_INSN_P (insn)
18022 || GET_CODE (PATTERN (insn)) == USE
18023 || GET_CODE (PATTERN (insn)) == CLOBBER)
18024 return false;
18025
18026 if (rs6000_sched_groups
18027 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
18028 {
18029 enum attr_type type = get_attr_type (insn);
18030 if ((type == TYPE_LOAD
18031 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
18032 && get_attr_update (insn) == UPDATE_NO)
18033 || (type == TYPE_LOAD
18034 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
18035 && get_attr_update (insn) == UPDATE_YES
18036 && get_attr_indexed (insn) == INDEXED_NO)
18037 || (type == TYPE_STORE
18038 && get_attr_update (insn) == UPDATE_YES
18039 && get_attr_indexed (insn) == INDEXED_NO)
18040 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
18041 && get_attr_update (insn) == UPDATE_YES)
18042 || (type == TYPE_CR_LOGICAL
18043 && get_attr_cr_logical_3op (insn) == CR_LOGICAL_3OP_YES)
18044 || (type == TYPE_EXTS
18045 && get_attr_dot (insn) == DOT_YES)
18046 || (type == TYPE_SHIFT
18047 && get_attr_dot (insn) == DOT_YES
18048 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
18049 || (type == TYPE_MUL
18050 && get_attr_dot (insn) == DOT_YES)
18051 || type == TYPE_DIV
18052 || (type == TYPE_INSERT
18053 && get_attr_size (insn) == SIZE_32))
18054 return true;
18055 }
18056
18057 return false;
18058 }
18059
18060 /* The function returns true if INSN can be issued only from
18061 the branch slot. */
18062
18063 static bool
18064 is_branch_slot_insn (rtx_insn *insn)
18065 {
18066 if (!insn || !NONDEBUG_INSN_P (insn)
18067 || GET_CODE (PATTERN (insn)) == USE
18068 || GET_CODE (PATTERN (insn)) == CLOBBER)
18069 return false;
18070
18071 if (rs6000_sched_groups)
18072 {
18073 enum attr_type type = get_attr_type (insn);
18074 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
18075 return true;
18076 return false;
18077 }
18078
18079 return false;
18080 }
18081
18082 /* The function returns true if out_inst sets a value that is
18083 used in the address generation computation of in_insn */
18084 static bool
18085 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
18086 {
18087 rtx out_set, in_set;
18088
18089 /* For performance reasons, only handle the simple case where
18090 both loads are a single_set. */
18091 out_set = single_set (out_insn);
18092 if (out_set)
18093 {
18094 in_set = single_set (in_insn);
18095 if (in_set)
18096 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
18097 }
18098
18099 return false;
18100 }
18101
18102 /* Try to determine base/offset/size parts of the given MEM.
18103 Return true if successful, false if all the values couldn't
18104 be determined.
18105
18106 This function only looks for REG or REG+CONST address forms.
18107 REG+REG address form will return false. */
18108
18109 static bool
18110 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
18111 HOST_WIDE_INT *size)
18112 {
18113 rtx addr_rtx;
18114 if MEM_SIZE_KNOWN_P (mem)
18115 *size = MEM_SIZE (mem);
18116 else
18117 return false;
18118
18119 addr_rtx = (XEXP (mem, 0));
18120 if (GET_CODE (addr_rtx) == PRE_MODIFY)
18121 addr_rtx = XEXP (addr_rtx, 1);
18122
18123 *offset = 0;
18124 while (GET_CODE (addr_rtx) == PLUS
18125 && CONST_INT_P (XEXP (addr_rtx, 1)))
18126 {
18127 *offset += INTVAL (XEXP (addr_rtx, 1));
18128 addr_rtx = XEXP (addr_rtx, 0);
18129 }
18130 if (!REG_P (addr_rtx))
18131 return false;
18132
18133 *base = addr_rtx;
18134 return true;
18135 }
18136
18137 /* If the target storage locations of arguments MEM1 and MEM2 are
18138 adjacent, then return the argument that has the lower address.
18139 Otherwise, return NULL_RTX. */
18140
18141 static rtx
18142 adjacent_mem_locations (rtx mem1, rtx mem2)
18143 {
18144 rtx reg1, reg2;
18145 HOST_WIDE_INT off1, size1, off2, size2;
18146
18147 if (MEM_P (mem1)
18148 && MEM_P (mem2)
18149 && get_memref_parts (mem1, &reg1, &off1, &size1)
18150 && get_memref_parts (mem2, &reg2, &off2, &size2)
18151 && REGNO (reg1) == REGNO (reg2))
18152 {
18153 if (off1 + size1 == off2)
18154 return mem1;
18155 else if (off2 + size2 == off1)
18156 return mem2;
18157 }
18158
18159 return NULL_RTX;
18160 }
18161
18162 /* This function returns true if it can be determined that the two MEM
18163 locations overlap by at least 1 byte based on base reg/offset/size. */
18164
18165 static bool
18166 mem_locations_overlap (rtx mem1, rtx mem2)
18167 {
18168 rtx reg1, reg2;
18169 HOST_WIDE_INT off1, size1, off2, size2;
18170
18171 if (get_memref_parts (mem1, &reg1, &off1, &size1)
18172 && get_memref_parts (mem2, &reg2, &off2, &size2))
18173 return ((REGNO (reg1) == REGNO (reg2))
18174 && (((off1 <= off2) && (off1 + size1 > off2))
18175 || ((off2 <= off1) && (off2 + size2 > off1))));
18176
18177 return false;
18178 }
18179
18180 /* A C statement (sans semicolon) to update the integer scheduling
18181 priority INSN_PRIORITY (INSN). Increase the priority to execute the
18182 INSN earlier, reduce the priority to execute INSN later. Do not
18183 define this macro if you do not need to adjust the scheduling
18184 priorities of insns. */
18185
18186 static int
18187 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
18188 {
18189 rtx load_mem, str_mem;
18190 /* On machines (like the 750) which have asymmetric integer units,
18191 where one integer unit can do multiply and divides and the other
18192 can't, reduce the priority of multiply/divide so it is scheduled
18193 before other integer operations. */
18194
18195 #if 0
18196 if (! INSN_P (insn))
18197 return priority;
18198
18199 if (GET_CODE (PATTERN (insn)) == USE)
18200 return priority;
18201
18202 switch (rs6000_tune) {
18203 case PROCESSOR_PPC750:
18204 switch (get_attr_type (insn))
18205 {
18206 default:
18207 break;
18208
18209 case TYPE_MUL:
18210 case TYPE_DIV:
18211 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
18212 priority, priority);
18213 if (priority >= 0 && priority < 0x01000000)
18214 priority >>= 3;
18215 break;
18216 }
18217 }
18218 #endif
18219
18220 if (insn_must_be_first_in_group (insn)
18221 && reload_completed
18222 && current_sched_info->sched_max_insns_priority
18223 && rs6000_sched_restricted_insns_priority)
18224 {
18225
18226 /* Prioritize insns that can be dispatched only in the first
18227 dispatch slot. */
18228 if (rs6000_sched_restricted_insns_priority == 1)
18229 /* Attach highest priority to insn. This means that in
18230 haifa-sched.cc:ready_sort(), dispatch-slot restriction considerations
18231 precede 'priority' (critical path) considerations. */
18232 return current_sched_info->sched_max_insns_priority;
18233 else if (rs6000_sched_restricted_insns_priority == 2)
18234 /* Increase priority of insn by a minimal amount. This means that in
18235 haifa-sched.cc:ready_sort(), only 'priority' (critical path)
18236 considerations precede dispatch-slot restriction considerations. */
18237 return (priority + 1);
18238 }
18239
18240 if (rs6000_tune == PROCESSOR_POWER6
18241 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
18242 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
18243 /* Attach highest priority to insn if the scheduler has just issued two
18244 stores and this instruction is a load, or two loads and this instruction
18245 is a store. Power6 wants loads and stores scheduled alternately
18246 when possible */
18247 return current_sched_info->sched_max_insns_priority;
18248
18249 return priority;
18250 }
18251
18252 /* Return true if the instruction is nonpipelined on the Cell. */
18253 static bool
18254 is_nonpipeline_insn (rtx_insn *insn)
18255 {
18256 enum attr_type type;
18257 if (!insn || !NONDEBUG_INSN_P (insn)
18258 || GET_CODE (PATTERN (insn)) == USE
18259 || GET_CODE (PATTERN (insn)) == CLOBBER)
18260 return false;
18261
18262 type = get_attr_type (insn);
18263 if (type == TYPE_MUL
18264 || type == TYPE_DIV
18265 || type == TYPE_SDIV
18266 || type == TYPE_DDIV
18267 || type == TYPE_SSQRT
18268 || type == TYPE_DSQRT
18269 || type == TYPE_MFCR
18270 || type == TYPE_MFCRF
18271 || type == TYPE_MFJMPR)
18272 {
18273 return true;
18274 }
18275 return false;
18276 }
18277
18278
18279 /* Return how many instructions the machine can issue per cycle. */
18280
18281 static int
18282 rs6000_issue_rate (void)
18283 {
18284 /* Unless scheduling for register pressure, use issue rate of 1 for
18285 first scheduling pass to decrease degradation. */
18286 if (!reload_completed && !flag_sched_pressure)
18287 return 1;
18288
18289 switch (rs6000_tune) {
18290 case PROCESSOR_RS64A:
18291 case PROCESSOR_PPC601: /* ? */
18292 case PROCESSOR_PPC7450:
18293 return 3;
18294 case PROCESSOR_PPC440:
18295 case PROCESSOR_PPC603:
18296 case PROCESSOR_PPC750:
18297 case PROCESSOR_PPC7400:
18298 case PROCESSOR_PPC8540:
18299 case PROCESSOR_PPC8548:
18300 case PROCESSOR_CELL:
18301 case PROCESSOR_PPCE300C2:
18302 case PROCESSOR_PPCE300C3:
18303 case PROCESSOR_PPCE500MC:
18304 case PROCESSOR_PPCE500MC64:
18305 case PROCESSOR_PPCE5500:
18306 case PROCESSOR_PPCE6500:
18307 case PROCESSOR_TITAN:
18308 return 2;
18309 case PROCESSOR_PPC476:
18310 case PROCESSOR_PPC604:
18311 case PROCESSOR_PPC604e:
18312 case PROCESSOR_PPC620:
18313 case PROCESSOR_PPC630:
18314 return 4;
18315 case PROCESSOR_POWER4:
18316 case PROCESSOR_POWER5:
18317 case PROCESSOR_POWER6:
18318 case PROCESSOR_POWER7:
18319 return 5;
18320 case PROCESSOR_POWER8:
18321 return 7;
18322 case PROCESSOR_POWER9:
18323 return 6;
18324 case PROCESSOR_POWER10:
18325 return 8;
18326 default:
18327 return 1;
18328 }
18329 }
18330
18331 /* Return how many instructions to look ahead for better insn
18332 scheduling. */
18333
18334 static int
18335 rs6000_use_sched_lookahead (void)
18336 {
18337 switch (rs6000_tune)
18338 {
18339 case PROCESSOR_PPC8540:
18340 case PROCESSOR_PPC8548:
18341 return 4;
18342
18343 case PROCESSOR_CELL:
18344 return (reload_completed ? 8 : 0);
18345
18346 default:
18347 return 0;
18348 }
18349 }
18350
18351 /* We are choosing insn from the ready queue. Return zero if INSN can be
18352 chosen. */
18353 static int
18354 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
18355 {
18356 if (ready_index == 0)
18357 return 0;
18358
18359 if (rs6000_tune != PROCESSOR_CELL)
18360 return 0;
18361
18362 gcc_assert (insn != NULL_RTX && INSN_P (insn));
18363
18364 if (!reload_completed
18365 || is_nonpipeline_insn (insn)
18366 || is_microcoded_insn (insn))
18367 return 1;
18368
18369 return 0;
18370 }
18371
18372 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
18373 and return true. */
18374
18375 static bool
18376 find_mem_ref (rtx pat, rtx *mem_ref)
18377 {
18378 const char * fmt;
18379 int i, j;
18380
18381 /* stack_tie does not produce any real memory traffic. */
18382 if (tie_operand (pat, VOIDmode))
18383 return false;
18384
18385 if (MEM_P (pat))
18386 {
18387 *mem_ref = pat;
18388 return true;
18389 }
18390
18391 /* Recursively process the pattern. */
18392 fmt = GET_RTX_FORMAT (GET_CODE (pat));
18393
18394 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
18395 {
18396 if (fmt[i] == 'e')
18397 {
18398 if (find_mem_ref (XEXP (pat, i), mem_ref))
18399 return true;
18400 }
18401 else if (fmt[i] == 'E')
18402 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
18403 {
18404 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
18405 return true;
18406 }
18407 }
18408
18409 return false;
18410 }
18411
18412 /* Determine if PAT is a PATTERN of a load insn. */
18413
18414 static bool
18415 is_load_insn1 (rtx pat, rtx *load_mem)
18416 {
18417 if (!pat || pat == NULL_RTX)
18418 return false;
18419
18420 if (GET_CODE (pat) == SET)
18421 {
18422 if (REG_P (SET_DEST (pat)))
18423 return find_mem_ref (SET_SRC (pat), load_mem);
18424 else
18425 return false;
18426 }
18427
18428 if (GET_CODE (pat) == PARALLEL)
18429 {
18430 int i;
18431
18432 for (i = 0; i < XVECLEN (pat, 0); i++)
18433 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
18434 return true;
18435 }
18436
18437 return false;
18438 }
18439
18440 /* Determine if INSN loads from memory. */
18441
18442 static bool
18443 is_load_insn (rtx insn, rtx *load_mem)
18444 {
18445 if (!insn || !INSN_P (insn))
18446 return false;
18447
18448 if (CALL_P (insn))
18449 return false;
18450
18451 return is_load_insn1 (PATTERN (insn), load_mem);
18452 }
18453
18454 /* Determine if PAT is a PATTERN of a store insn. */
18455
18456 static bool
18457 is_store_insn1 (rtx pat, rtx *str_mem)
18458 {
18459 if (!pat || pat == NULL_RTX)
18460 return false;
18461
18462 if (GET_CODE (pat) == SET)
18463 {
18464 if (REG_P (SET_SRC (pat)) || SUBREG_P (SET_SRC (pat)))
18465 return find_mem_ref (SET_DEST (pat), str_mem);
18466 else
18467 return false;
18468 }
18469
18470 if (GET_CODE (pat) == PARALLEL)
18471 {
18472 int i;
18473
18474 for (i = 0; i < XVECLEN (pat, 0); i++)
18475 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
18476 return true;
18477 }
18478
18479 return false;
18480 }
18481
18482 /* Determine if INSN stores to memory. */
18483
18484 static bool
18485 is_store_insn (rtx insn, rtx *str_mem)
18486 {
18487 if (!insn || !INSN_P (insn))
18488 return false;
18489
18490 return is_store_insn1 (PATTERN (insn), str_mem);
18491 }
18492
18493 /* Return whether TYPE is a Power9 pairable vector instruction type. */
18494
18495 static bool
18496 is_power9_pairable_vec_type (enum attr_type type)
18497 {
18498 switch (type)
18499 {
18500 case TYPE_VECSIMPLE:
18501 case TYPE_VECCOMPLEX:
18502 case TYPE_VECDIV:
18503 case TYPE_VECCMP:
18504 case TYPE_VECPERM:
18505 case TYPE_VECFLOAT:
18506 case TYPE_VECFDIV:
18507 case TYPE_VECDOUBLE:
18508 return true;
18509 default:
18510 break;
18511 }
18512 return false;
18513 }
18514
18515 /* Returns whether the dependence between INSN and NEXT is considered
18516 costly by the given target. */
18517
18518 static bool
18519 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
18520 {
18521 rtx insn;
18522 rtx next;
18523 rtx load_mem, str_mem;
18524
18525 /* If the flag is not enabled - no dependence is considered costly;
18526 allow all dependent insns in the same group.
18527 This is the most aggressive option. */
18528 if (rs6000_sched_costly_dep == no_dep_costly)
18529 return false;
18530
18531 /* If the flag is set to 1 - a dependence is always considered costly;
18532 do not allow dependent instructions in the same group.
18533 This is the most conservative option. */
18534 if (rs6000_sched_costly_dep == all_deps_costly)
18535 return true;
18536
18537 insn = DEP_PRO (dep);
18538 next = DEP_CON (dep);
18539
18540 if (rs6000_sched_costly_dep == store_to_load_dep_costly
18541 && is_load_insn (next, &load_mem)
18542 && is_store_insn (insn, &str_mem))
18543 /* Prevent load after store in the same group. */
18544 return true;
18545
18546 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
18547 && is_load_insn (next, &load_mem)
18548 && is_store_insn (insn, &str_mem)
18549 && DEP_TYPE (dep) == REG_DEP_TRUE
18550 && mem_locations_overlap(str_mem, load_mem))
18551 /* Prevent load after store in the same group if it is a true
18552 dependence. */
18553 return true;
18554
18555 /* The flag is set to X; dependences with latency >= X are considered costly,
18556 and will not be scheduled in the same group. */
18557 if (rs6000_sched_costly_dep <= max_dep_latency
18558 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
18559 return true;
18560
18561 return false;
18562 }
18563
18564 /* Return the next insn after INSN that is found before TAIL is reached,
18565 skipping any "non-active" insns - insns that will not actually occupy
18566 an issue slot. Return NULL_RTX if such an insn is not found. */
18567
18568 static rtx_insn *
18569 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
18570 {
18571 if (insn == NULL_RTX || insn == tail)
18572 return NULL;
18573
18574 while (1)
18575 {
18576 insn = NEXT_INSN (insn);
18577 if (insn == NULL_RTX || insn == tail)
18578 return NULL;
18579
18580 if (CALL_P (insn)
18581 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
18582 || (NONJUMP_INSN_P (insn)
18583 && GET_CODE (PATTERN (insn)) != USE
18584 && GET_CODE (PATTERN (insn)) != CLOBBER
18585 && INSN_CODE (insn) != CODE_FOR_stack_tie))
18586 break;
18587 }
18588 return insn;
18589 }
18590
18591 /* Move instruction at POS to the end of the READY list. */
18592
18593 static void
18594 move_to_end_of_ready (rtx_insn **ready, int pos, int lastpos)
18595 {
18596 rtx_insn *tmp;
18597 int i;
18598
18599 tmp = ready[pos];
18600 for (i = pos; i < lastpos; i++)
18601 ready[i] = ready[i + 1];
18602 ready[lastpos] = tmp;
18603 }
18604
18605 /* Do Power6 specific sched_reorder2 reordering of ready list. */
18606
18607 static int
18608 power6_sched_reorder2 (rtx_insn **ready, int lastpos)
18609 {
18610 /* For Power6, we need to handle some special cases to try and keep the
18611 store queue from overflowing and triggering expensive flushes.
18612
18613 This code monitors how load and store instructions are being issued
18614 and skews the ready list one way or the other to increase the likelihood
18615 that a desired instruction is issued at the proper time.
18616
18617 A couple of things are done. First, we maintain a "load_store_pendulum"
18618 to track the current state of load/store issue.
18619
18620 - If the pendulum is at zero, then no loads or stores have been
18621 issued in the current cycle so we do nothing.
18622
18623 - If the pendulum is 1, then a single load has been issued in this
18624 cycle and we attempt to locate another load in the ready list to
18625 issue with it.
18626
18627 - If the pendulum is -2, then two stores have already been
18628 issued in this cycle, so we increase the priority of the first load
18629 in the ready list to increase it's likelihood of being chosen first
18630 in the next cycle.
18631
18632 - If the pendulum is -1, then a single store has been issued in this
18633 cycle and we attempt to locate another store in the ready list to
18634 issue with it, preferring a store to an adjacent memory location to
18635 facilitate store pairing in the store queue.
18636
18637 - If the pendulum is 2, then two loads have already been
18638 issued in this cycle, so we increase the priority of the first store
18639 in the ready list to increase it's likelihood of being chosen first
18640 in the next cycle.
18641
18642 - If the pendulum < -2 or > 2, then do nothing.
18643
18644 Note: This code covers the most common scenarios. There exist non
18645 load/store instructions which make use of the LSU and which
18646 would need to be accounted for to strictly model the behavior
18647 of the machine. Those instructions are currently unaccounted
18648 for to help minimize compile time overhead of this code.
18649 */
18650 int pos;
18651 rtx load_mem, str_mem;
18652
18653 if (is_store_insn (last_scheduled_insn, &str_mem))
18654 /* Issuing a store, swing the load_store_pendulum to the left */
18655 load_store_pendulum--;
18656 else if (is_load_insn (last_scheduled_insn, &load_mem))
18657 /* Issuing a load, swing the load_store_pendulum to the right */
18658 load_store_pendulum++;
18659 else
18660 return cached_can_issue_more;
18661
18662 /* If the pendulum is balanced, or there is only one instruction on
18663 the ready list, then all is well, so return. */
18664 if ((load_store_pendulum == 0) || (lastpos <= 0))
18665 return cached_can_issue_more;
18666
18667 if (load_store_pendulum == 1)
18668 {
18669 /* A load has been issued in this cycle. Scan the ready list
18670 for another load to issue with it */
18671 pos = lastpos;
18672
18673 while (pos >= 0)
18674 {
18675 if (is_load_insn (ready[pos], &load_mem))
18676 {
18677 /* Found a load. Move it to the head of the ready list,
18678 and adjust it's priority so that it is more likely to
18679 stay there */
18680 move_to_end_of_ready (ready, pos, lastpos);
18681
18682 if (!sel_sched_p ()
18683 && INSN_PRIORITY_KNOWN (ready[lastpos]))
18684 INSN_PRIORITY (ready[lastpos])++;
18685 break;
18686 }
18687 pos--;
18688 }
18689 }
18690 else if (load_store_pendulum == -2)
18691 {
18692 /* Two stores have been issued in this cycle. Increase the
18693 priority of the first load in the ready list to favor it for
18694 issuing in the next cycle. */
18695 pos = lastpos;
18696
18697 while (pos >= 0)
18698 {
18699 if (is_load_insn (ready[pos], &load_mem)
18700 && !sel_sched_p ()
18701 && INSN_PRIORITY_KNOWN (ready[pos]))
18702 {
18703 INSN_PRIORITY (ready[pos])++;
18704
18705 /* Adjust the pendulum to account for the fact that a load
18706 was found and increased in priority. This is to prevent
18707 increasing the priority of multiple loads */
18708 load_store_pendulum--;
18709
18710 break;
18711 }
18712 pos--;
18713 }
18714 }
18715 else if (load_store_pendulum == -1)
18716 {
18717 /* A store has been issued in this cycle. Scan the ready list for
18718 another store to issue with it, preferring a store to an adjacent
18719 memory location */
18720 int first_store_pos = -1;
18721
18722 pos = lastpos;
18723
18724 while (pos >= 0)
18725 {
18726 if (is_store_insn (ready[pos], &str_mem))
18727 {
18728 rtx str_mem2;
18729 /* Maintain the index of the first store found on the
18730 list */
18731 if (first_store_pos == -1)
18732 first_store_pos = pos;
18733
18734 if (is_store_insn (last_scheduled_insn, &str_mem2)
18735 && adjacent_mem_locations (str_mem, str_mem2))
18736 {
18737 /* Found an adjacent store. Move it to the head of the
18738 ready list, and adjust it's priority so that it is
18739 more likely to stay there */
18740 move_to_end_of_ready (ready, pos, lastpos);
18741
18742 if (!sel_sched_p ()
18743 && INSN_PRIORITY_KNOWN (ready[lastpos]))
18744 INSN_PRIORITY (ready[lastpos])++;
18745
18746 first_store_pos = -1;
18747
18748 break;
18749 };
18750 }
18751 pos--;
18752 }
18753
18754 if (first_store_pos >= 0)
18755 {
18756 /* An adjacent store wasn't found, but a non-adjacent store was,
18757 so move the non-adjacent store to the front of the ready
18758 list, and adjust its priority so that it is more likely to
18759 stay there. */
18760 move_to_end_of_ready (ready, first_store_pos, lastpos);
18761 if (!sel_sched_p ()
18762 && INSN_PRIORITY_KNOWN (ready[lastpos]))
18763 INSN_PRIORITY (ready[lastpos])++;
18764 }
18765 }
18766 else if (load_store_pendulum == 2)
18767 {
18768 /* Two loads have been issued in this cycle. Increase the priority
18769 of the first store in the ready list to favor it for issuing in
18770 the next cycle. */
18771 pos = lastpos;
18772
18773 while (pos >= 0)
18774 {
18775 if (is_store_insn (ready[pos], &str_mem)
18776 && !sel_sched_p ()
18777 && INSN_PRIORITY_KNOWN (ready[pos]))
18778 {
18779 INSN_PRIORITY (ready[pos])++;
18780
18781 /* Adjust the pendulum to account for the fact that a store
18782 was found and increased in priority. This is to prevent
18783 increasing the priority of multiple stores */
18784 load_store_pendulum++;
18785
18786 break;
18787 }
18788 pos--;
18789 }
18790 }
18791
18792 return cached_can_issue_more;
18793 }
18794
18795 /* Do Power9 specific sched_reorder2 reordering of ready list. */
18796
18797 static int
18798 power9_sched_reorder2 (rtx_insn **ready, int lastpos)
18799 {
18800 int pos;
18801 enum attr_type type, type2;
18802
18803 type = get_attr_type (last_scheduled_insn);
18804
18805 /* Try to issue fixed point divides back-to-back in pairs so they will be
18806 routed to separate execution units and execute in parallel. */
18807 if (type == TYPE_DIV && divide_cnt == 0)
18808 {
18809 /* First divide has been scheduled. */
18810 divide_cnt = 1;
18811
18812 /* Scan the ready list looking for another divide, if found move it
18813 to the end of the list so it is chosen next. */
18814 pos = lastpos;
18815 while (pos >= 0)
18816 {
18817 if (recog_memoized (ready[pos]) >= 0
18818 && get_attr_type (ready[pos]) == TYPE_DIV)
18819 {
18820 move_to_end_of_ready (ready, pos, lastpos);
18821 break;
18822 }
18823 pos--;
18824 }
18825 }
18826 else
18827 {
18828 /* Last insn was the 2nd divide or not a divide, reset the counter. */
18829 divide_cnt = 0;
18830
18831 /* The best dispatch throughput for vector and vector load insns can be
18832 achieved by interleaving a vector and vector load such that they'll
18833 dispatch to the same superslice. If this pairing cannot be achieved
18834 then it is best to pair vector insns together and vector load insns
18835 together.
18836
18837 To aid in this pairing, vec_pairing maintains the current state with
18838 the following values:
18839
18840 0 : Initial state, no vecload/vector pairing has been started.
18841
18842 1 : A vecload or vector insn has been issued and a candidate for
18843 pairing has been found and moved to the end of the ready
18844 list. */
18845 if (type == TYPE_VECLOAD)
18846 {
18847 /* Issued a vecload. */
18848 if (vec_pairing == 0)
18849 {
18850 int vecload_pos = -1;
18851 /* We issued a single vecload, look for a vector insn to pair it
18852 with. If one isn't found, try to pair another vecload. */
18853 pos = lastpos;
18854 while (pos >= 0)
18855 {
18856 if (recog_memoized (ready[pos]) >= 0)
18857 {
18858 type2 = get_attr_type (ready[pos]);
18859 if (is_power9_pairable_vec_type (type2))
18860 {
18861 /* Found a vector insn to pair with, move it to the
18862 end of the ready list so it is scheduled next. */
18863 move_to_end_of_ready (ready, pos, lastpos);
18864 vec_pairing = 1;
18865 return cached_can_issue_more;
18866 }
18867 else if (type2 == TYPE_VECLOAD && vecload_pos == -1)
18868 /* Remember position of first vecload seen. */
18869 vecload_pos = pos;
18870 }
18871 pos--;
18872 }
18873 if (vecload_pos >= 0)
18874 {
18875 /* Didn't find a vector to pair with but did find a vecload,
18876 move it to the end of the ready list. */
18877 move_to_end_of_ready (ready, vecload_pos, lastpos);
18878 vec_pairing = 1;
18879 return cached_can_issue_more;
18880 }
18881 }
18882 }
18883 else if (is_power9_pairable_vec_type (type))
18884 {
18885 /* Issued a vector operation. */
18886 if (vec_pairing == 0)
18887 {
18888 int vec_pos = -1;
18889 /* We issued a single vector insn, look for a vecload to pair it
18890 with. If one isn't found, try to pair another vector. */
18891 pos = lastpos;
18892 while (pos >= 0)
18893 {
18894 if (recog_memoized (ready[pos]) >= 0)
18895 {
18896 type2 = get_attr_type (ready[pos]);
18897 if (type2 == TYPE_VECLOAD)
18898 {
18899 /* Found a vecload insn to pair with, move it to the
18900 end of the ready list so it is scheduled next. */
18901 move_to_end_of_ready (ready, pos, lastpos);
18902 vec_pairing = 1;
18903 return cached_can_issue_more;
18904 }
18905 else if (is_power9_pairable_vec_type (type2)
18906 && vec_pos == -1)
18907 /* Remember position of first vector insn seen. */
18908 vec_pos = pos;
18909 }
18910 pos--;
18911 }
18912 if (vec_pos >= 0)
18913 {
18914 /* Didn't find a vecload to pair with but did find a vector
18915 insn, move it to the end of the ready list. */
18916 move_to_end_of_ready (ready, vec_pos, lastpos);
18917 vec_pairing = 1;
18918 return cached_can_issue_more;
18919 }
18920 }
18921 }
18922
18923 /* We've either finished a vec/vecload pair, couldn't find an insn to
18924 continue the current pair, or the last insn had nothing to do with
18925 with pairing. In any case, reset the state. */
18926 vec_pairing = 0;
18927 }
18928
18929 return cached_can_issue_more;
18930 }
18931
18932 /* Determine if INSN is a store to memory that can be fused with a similar
18933 adjacent store. */
18934
18935 static bool
18936 is_fusable_store (rtx_insn *insn, rtx *str_mem)
18937 {
18938 /* Insn must be a non-prefixed base+disp form store. */
18939 if (is_store_insn (insn, str_mem)
18940 && get_attr_prefixed (insn) == PREFIXED_NO
18941 && get_attr_update (insn) == UPDATE_NO
18942 && get_attr_indexed (insn) == INDEXED_NO)
18943 {
18944 /* Further restrictions by mode and size. */
18945 if (!MEM_SIZE_KNOWN_P (*str_mem))
18946 return false;
18947
18948 machine_mode mode = GET_MODE (*str_mem);
18949 HOST_WIDE_INT size = MEM_SIZE (*str_mem);
18950
18951 if (INTEGRAL_MODE_P (mode))
18952 /* Must be word or dword size. */
18953 return (size == 4 || size == 8);
18954 else if (FLOAT_MODE_P (mode))
18955 /* Must be dword size. */
18956 return (size == 8);
18957 }
18958
18959 return false;
18960 }
18961
18962 /* Do Power10 specific reordering of the ready list. */
18963
18964 static int
18965 power10_sched_reorder (rtx_insn **ready, int lastpos)
18966 {
18967 rtx mem1;
18968
18969 /* Do store fusion during sched2 only. */
18970 if (!reload_completed)
18971 return cached_can_issue_more;
18972
18973 /* If the prior insn finished off a store fusion pair then simply
18974 reset the counter and return, nothing more to do. */
18975 if (load_store_pendulum != 0)
18976 {
18977 load_store_pendulum = 0;
18978 return cached_can_issue_more;
18979 }
18980
18981 /* Try to pair certain store insns to adjacent memory locations
18982 so that the hardware will fuse them to a single operation. */
18983 if (TARGET_P10_FUSION && TARGET_P10_FUSION_2STORE
18984 && is_fusable_store (last_scheduled_insn, &mem1))
18985 {
18986
18987 /* A fusable store was just scheduled. Scan the ready list for another
18988 store that it can fuse with. */
18989 int pos = lastpos;
18990 while (pos >= 0)
18991 {
18992 rtx mem2;
18993 /* GPR stores can be ascending or descending offsets, FPR/VSR stores
18994 must be ascending only. */
18995 if (is_fusable_store (ready[pos], &mem2)
18996 && ((INTEGRAL_MODE_P (GET_MODE (mem1))
18997 && adjacent_mem_locations (mem1, mem2))
18998 || (FLOAT_MODE_P (GET_MODE (mem1))
18999 && (adjacent_mem_locations (mem1, mem2) == mem1))))
19000 {
19001 /* Found a fusable store. Move it to the end of the ready list
19002 so it is scheduled next. */
19003 move_to_end_of_ready (ready, pos, lastpos);
19004
19005 load_store_pendulum = -1;
19006 break;
19007 }
19008 pos--;
19009 }
19010 }
19011
19012 return cached_can_issue_more;
19013 }
19014
19015 /* We are about to begin issuing insns for this clock cycle. */
19016
19017 static int
19018 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
19019 rtx_insn **ready ATTRIBUTE_UNUSED,
19020 int *pn_ready ATTRIBUTE_UNUSED,
19021 int clock_var ATTRIBUTE_UNUSED)
19022 {
19023 int n_ready = *pn_ready;
19024
19025 if (sched_verbose)
19026 fprintf (dump, "// rs6000_sched_reorder :\n");
19027
19028 /* Reorder the ready list, if the second to last ready insn
19029 is a nonepipeline insn. */
19030 if (rs6000_tune == PROCESSOR_CELL && n_ready > 1)
19031 {
19032 if (is_nonpipeline_insn (ready[n_ready - 1])
19033 && (recog_memoized (ready[n_ready - 2]) > 0))
19034 /* Simply swap first two insns. */
19035 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
19036 }
19037
19038 if (rs6000_tune == PROCESSOR_POWER6)
19039 load_store_pendulum = 0;
19040
19041 /* Do Power10 dependent reordering. */
19042 if (rs6000_tune == PROCESSOR_POWER10 && last_scheduled_insn)
19043 power10_sched_reorder (ready, n_ready - 1);
19044
19045 return rs6000_issue_rate ();
19046 }
19047
19048 /* Like rs6000_sched_reorder, but called after issuing each insn. */
19049
19050 static int
19051 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
19052 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
19053 {
19054 if (sched_verbose)
19055 fprintf (dump, "// rs6000_sched_reorder2 :\n");
19056
19057 /* Do Power6 dependent reordering if necessary. */
19058 if (rs6000_tune == PROCESSOR_POWER6 && last_scheduled_insn)
19059 return power6_sched_reorder2 (ready, *pn_ready - 1);
19060
19061 /* Do Power9 dependent reordering if necessary. */
19062 if (rs6000_tune == PROCESSOR_POWER9 && last_scheduled_insn
19063 && recog_memoized (last_scheduled_insn) >= 0)
19064 return power9_sched_reorder2 (ready, *pn_ready - 1);
19065
19066 /* Do Power10 dependent reordering. */
19067 if (rs6000_tune == PROCESSOR_POWER10 && last_scheduled_insn)
19068 return power10_sched_reorder (ready, *pn_ready - 1);
19069
19070 return cached_can_issue_more;
19071 }
19072
19073 /* Return whether the presence of INSN causes a dispatch group termination
19074 of group WHICH_GROUP.
19075
19076 If WHICH_GROUP == current_group, this function will return true if INSN
19077 causes the termination of the current group (i.e, the dispatch group to
19078 which INSN belongs). This means that INSN will be the last insn in the
19079 group it belongs to.
19080
19081 If WHICH_GROUP == previous_group, this function will return true if INSN
19082 causes the termination of the previous group (i.e, the dispatch group that
19083 precedes the group to which INSN belongs). This means that INSN will be
19084 the first insn in the group it belongs to). */
19085
19086 static bool
19087 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
19088 {
19089 bool first, last;
19090
19091 if (! insn)
19092 return false;
19093
19094 first = insn_must_be_first_in_group (insn);
19095 last = insn_must_be_last_in_group (insn);
19096
19097 if (first && last)
19098 return true;
19099
19100 if (which_group == current_group)
19101 return last;
19102 else if (which_group == previous_group)
19103 return first;
19104
19105 return false;
19106 }
19107
19108
19109 static bool
19110 insn_must_be_first_in_group (rtx_insn *insn)
19111 {
19112 enum attr_type type;
19113
19114 if (!insn
19115 || NOTE_P (insn)
19116 || DEBUG_INSN_P (insn)
19117 || GET_CODE (PATTERN (insn)) == USE
19118 || GET_CODE (PATTERN (insn)) == CLOBBER)
19119 return false;
19120
19121 switch (rs6000_tune)
19122 {
19123 case PROCESSOR_POWER5:
19124 if (is_cracked_insn (insn))
19125 return true;
19126 /* FALLTHRU */
19127 case PROCESSOR_POWER4:
19128 if (is_microcoded_insn (insn))
19129 return true;
19130
19131 if (!rs6000_sched_groups)
19132 return false;
19133
19134 type = get_attr_type (insn);
19135
19136 switch (type)
19137 {
19138 case TYPE_MFCR:
19139 case TYPE_MFCRF:
19140 case TYPE_MTCR:
19141 case TYPE_CR_LOGICAL:
19142 case TYPE_MTJMPR:
19143 case TYPE_MFJMPR:
19144 case TYPE_DIV:
19145 case TYPE_LOAD_L:
19146 case TYPE_STORE_C:
19147 case TYPE_ISYNC:
19148 case TYPE_SYNC:
19149 return true;
19150 default:
19151 break;
19152 }
19153 break;
19154 case PROCESSOR_POWER6:
19155 type = get_attr_type (insn);
19156
19157 switch (type)
19158 {
19159 case TYPE_EXTS:
19160 case TYPE_CNTLZ:
19161 case TYPE_TRAP:
19162 case TYPE_MUL:
19163 case TYPE_INSERT:
19164 case TYPE_FPCOMPARE:
19165 case TYPE_MFCR:
19166 case TYPE_MTCR:
19167 case TYPE_MFJMPR:
19168 case TYPE_MTJMPR:
19169 case TYPE_ISYNC:
19170 case TYPE_SYNC:
19171 case TYPE_LOAD_L:
19172 case TYPE_STORE_C:
19173 return true;
19174 case TYPE_SHIFT:
19175 if (get_attr_dot (insn) == DOT_NO
19176 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
19177 return true;
19178 else
19179 break;
19180 case TYPE_DIV:
19181 if (get_attr_size (insn) == SIZE_32)
19182 return true;
19183 else
19184 break;
19185 case TYPE_LOAD:
19186 case TYPE_STORE:
19187 case TYPE_FPLOAD:
19188 case TYPE_FPSTORE:
19189 if (get_attr_update (insn) == UPDATE_YES)
19190 return true;
19191 else
19192 break;
19193 default:
19194 break;
19195 }
19196 break;
19197 case PROCESSOR_POWER7:
19198 type = get_attr_type (insn);
19199
19200 switch (type)
19201 {
19202 case TYPE_CR_LOGICAL:
19203 case TYPE_MFCR:
19204 case TYPE_MFCRF:
19205 case TYPE_MTCR:
19206 case TYPE_DIV:
19207 case TYPE_ISYNC:
19208 case TYPE_LOAD_L:
19209 case TYPE_STORE_C:
19210 case TYPE_MFJMPR:
19211 case TYPE_MTJMPR:
19212 return true;
19213 case TYPE_MUL:
19214 case TYPE_SHIFT:
19215 case TYPE_EXTS:
19216 if (get_attr_dot (insn) == DOT_YES)
19217 return true;
19218 else
19219 break;
19220 case TYPE_LOAD:
19221 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19222 || get_attr_update (insn) == UPDATE_YES)
19223 return true;
19224 else
19225 break;
19226 case TYPE_STORE:
19227 case TYPE_FPLOAD:
19228 case TYPE_FPSTORE:
19229 if (get_attr_update (insn) == UPDATE_YES)
19230 return true;
19231 else
19232 break;
19233 default:
19234 break;
19235 }
19236 break;
19237 case PROCESSOR_POWER8:
19238 type = get_attr_type (insn);
19239
19240 switch (type)
19241 {
19242 case TYPE_CR_LOGICAL:
19243 case TYPE_MFCR:
19244 case TYPE_MFCRF:
19245 case TYPE_MTCR:
19246 case TYPE_SYNC:
19247 case TYPE_ISYNC:
19248 case TYPE_LOAD_L:
19249 case TYPE_STORE_C:
19250 case TYPE_VECSTORE:
19251 case TYPE_MFJMPR:
19252 case TYPE_MTJMPR:
19253 return true;
19254 case TYPE_SHIFT:
19255 case TYPE_EXTS:
19256 case TYPE_MUL:
19257 if (get_attr_dot (insn) == DOT_YES)
19258 return true;
19259 else
19260 break;
19261 case TYPE_LOAD:
19262 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19263 || get_attr_update (insn) == UPDATE_YES)
19264 return true;
19265 else
19266 break;
19267 case TYPE_STORE:
19268 if (get_attr_update (insn) == UPDATE_YES
19269 && get_attr_indexed (insn) == INDEXED_YES)
19270 return true;
19271 else
19272 break;
19273 default:
19274 break;
19275 }
19276 break;
19277 default:
19278 break;
19279 }
19280
19281 return false;
19282 }
19283
19284 static bool
19285 insn_must_be_last_in_group (rtx_insn *insn)
19286 {
19287 enum attr_type type;
19288
19289 if (!insn
19290 || NOTE_P (insn)
19291 || DEBUG_INSN_P (insn)
19292 || GET_CODE (PATTERN (insn)) == USE
19293 || GET_CODE (PATTERN (insn)) == CLOBBER)
19294 return false;
19295
19296 switch (rs6000_tune) {
19297 case PROCESSOR_POWER4:
19298 case PROCESSOR_POWER5:
19299 if (is_microcoded_insn (insn))
19300 return true;
19301
19302 if (is_branch_slot_insn (insn))
19303 return true;
19304
19305 break;
19306 case PROCESSOR_POWER6:
19307 type = get_attr_type (insn);
19308
19309 switch (type)
19310 {
19311 case TYPE_EXTS:
19312 case TYPE_CNTLZ:
19313 case TYPE_TRAP:
19314 case TYPE_MUL:
19315 case TYPE_FPCOMPARE:
19316 case TYPE_MFCR:
19317 case TYPE_MTCR:
19318 case TYPE_MFJMPR:
19319 case TYPE_MTJMPR:
19320 case TYPE_ISYNC:
19321 case TYPE_SYNC:
19322 case TYPE_LOAD_L:
19323 case TYPE_STORE_C:
19324 return true;
19325 case TYPE_SHIFT:
19326 if (get_attr_dot (insn) == DOT_NO
19327 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
19328 return true;
19329 else
19330 break;
19331 case TYPE_DIV:
19332 if (get_attr_size (insn) == SIZE_32)
19333 return true;
19334 else
19335 break;
19336 default:
19337 break;
19338 }
19339 break;
19340 case PROCESSOR_POWER7:
19341 type = get_attr_type (insn);
19342
19343 switch (type)
19344 {
19345 case TYPE_ISYNC:
19346 case TYPE_SYNC:
19347 case TYPE_LOAD_L:
19348 case TYPE_STORE_C:
19349 return true;
19350 case TYPE_LOAD:
19351 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19352 && get_attr_update (insn) == UPDATE_YES)
19353 return true;
19354 else
19355 break;
19356 case TYPE_STORE:
19357 if (get_attr_update (insn) == UPDATE_YES
19358 && get_attr_indexed (insn) == INDEXED_YES)
19359 return true;
19360 else
19361 break;
19362 default:
19363 break;
19364 }
19365 break;
19366 case PROCESSOR_POWER8:
19367 type = get_attr_type (insn);
19368
19369 switch (type)
19370 {
19371 case TYPE_MFCR:
19372 case TYPE_MTCR:
19373 case TYPE_ISYNC:
19374 case TYPE_SYNC:
19375 case TYPE_LOAD_L:
19376 case TYPE_STORE_C:
19377 return true;
19378 case TYPE_LOAD:
19379 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19380 && get_attr_update (insn) == UPDATE_YES)
19381 return true;
19382 else
19383 break;
19384 case TYPE_STORE:
19385 if (get_attr_update (insn) == UPDATE_YES
19386 && get_attr_indexed (insn) == INDEXED_YES)
19387 return true;
19388 else
19389 break;
19390 default:
19391 break;
19392 }
19393 break;
19394 default:
19395 break;
19396 }
19397
19398 return false;
19399 }
19400
19401 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
19402 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
19403
19404 static bool
19405 is_costly_group (rtx *group_insns, rtx next_insn)
19406 {
19407 int i;
19408 int issue_rate = rs6000_issue_rate ();
19409
19410 for (i = 0; i < issue_rate; i++)
19411 {
19412 sd_iterator_def sd_it;
19413 dep_t dep;
19414 rtx insn = group_insns[i];
19415
19416 if (!insn)
19417 continue;
19418
19419 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
19420 {
19421 rtx next = DEP_CON (dep);
19422
19423 if (next == next_insn
19424 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
19425 return true;
19426 }
19427 }
19428
19429 return false;
19430 }
19431
19432 /* Utility of the function redefine_groups.
19433 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
19434 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
19435 to keep it "far" (in a separate group) from GROUP_INSNS, following
19436 one of the following schemes, depending on the value of the flag
19437 -minsert_sched_nops = X:
19438 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
19439 in order to force NEXT_INSN into a separate group.
19440 (2) X < sched_finish_regroup_exact: insert exactly X nops.
19441 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
19442 insertion (has a group just ended, how many vacant issue slots remain in the
19443 last group, and how many dispatch groups were encountered so far). */
19444
19445 static int
19446 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
19447 rtx_insn *next_insn, bool *group_end, int can_issue_more,
19448 int *group_count)
19449 {
19450 rtx nop;
19451 bool force;
19452 int issue_rate = rs6000_issue_rate ();
19453 bool end = *group_end;
19454 int i;
19455
19456 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
19457 return can_issue_more;
19458
19459 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
19460 return can_issue_more;
19461
19462 force = is_costly_group (group_insns, next_insn);
19463 if (!force)
19464 return can_issue_more;
19465
19466 if (sched_verbose > 6)
19467 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
19468 *group_count ,can_issue_more);
19469
19470 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
19471 {
19472 if (*group_end)
19473 can_issue_more = 0;
19474
19475 /* Since only a branch can be issued in the last issue_slot, it is
19476 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
19477 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
19478 in this case the last nop will start a new group and the branch
19479 will be forced to the new group. */
19480 if (can_issue_more && !is_branch_slot_insn (next_insn))
19481 can_issue_more--;
19482
19483 /* Do we have a special group ending nop? */
19484 if (rs6000_tune == PROCESSOR_POWER6 || rs6000_tune == PROCESSOR_POWER7
19485 || rs6000_tune == PROCESSOR_POWER8)
19486 {
19487 nop = gen_group_ending_nop ();
19488 emit_insn_before (nop, next_insn);
19489 can_issue_more = 0;
19490 }
19491 else
19492 while (can_issue_more > 0)
19493 {
19494 nop = gen_nop ();
19495 emit_insn_before (nop, next_insn);
19496 can_issue_more--;
19497 }
19498
19499 *group_end = true;
19500 return 0;
19501 }
19502
19503 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
19504 {
19505 int n_nops = rs6000_sched_insert_nops;
19506
19507 /* Nops can't be issued from the branch slot, so the effective
19508 issue_rate for nops is 'issue_rate - 1'. */
19509 if (can_issue_more == 0)
19510 can_issue_more = issue_rate;
19511 can_issue_more--;
19512 if (can_issue_more == 0)
19513 {
19514 can_issue_more = issue_rate - 1;
19515 (*group_count)++;
19516 end = true;
19517 for (i = 0; i < issue_rate; i++)
19518 {
19519 group_insns[i] = 0;
19520 }
19521 }
19522
19523 while (n_nops > 0)
19524 {
19525 nop = gen_nop ();
19526 emit_insn_before (nop, next_insn);
19527 if (can_issue_more == issue_rate - 1) /* new group begins */
19528 end = false;
19529 can_issue_more--;
19530 if (can_issue_more == 0)
19531 {
19532 can_issue_more = issue_rate - 1;
19533 (*group_count)++;
19534 end = true;
19535 for (i = 0; i < issue_rate; i++)
19536 {
19537 group_insns[i] = 0;
19538 }
19539 }
19540 n_nops--;
19541 }
19542
19543 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
19544 can_issue_more++;
19545
19546 /* Is next_insn going to start a new group? */
19547 *group_end
19548 = (end
19549 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
19550 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
19551 || (can_issue_more < issue_rate &&
19552 insn_terminates_group_p (next_insn, previous_group)));
19553 if (*group_end && end)
19554 (*group_count)--;
19555
19556 if (sched_verbose > 6)
19557 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
19558 *group_count, can_issue_more);
19559 return can_issue_more;
19560 }
19561
19562 return can_issue_more;
19563 }
19564
19565 /* This function tries to synch the dispatch groups that the compiler "sees"
19566 with the dispatch groups that the processor dispatcher is expected to
19567 form in practice. It tries to achieve this synchronization by forcing the
19568 estimated processor grouping on the compiler (as opposed to the function
19569 'pad_goups' which tries to force the scheduler's grouping on the processor).
19570
19571 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
19572 examines the (estimated) dispatch groups that will be formed by the processor
19573 dispatcher. It marks these group boundaries to reflect the estimated
19574 processor grouping, overriding the grouping that the scheduler had marked.
19575 Depending on the value of the flag '-minsert-sched-nops' this function can
19576 force certain insns into separate groups or force a certain distance between
19577 them by inserting nops, for example, if there exists a "costly dependence"
19578 between the insns.
19579
19580 The function estimates the group boundaries that the processor will form as
19581 follows: It keeps track of how many vacant issue slots are available after
19582 each insn. A subsequent insn will start a new group if one of the following
19583 4 cases applies:
19584 - no more vacant issue slots remain in the current dispatch group.
19585 - only the last issue slot, which is the branch slot, is vacant, but the next
19586 insn is not a branch.
19587 - only the last 2 or less issue slots, including the branch slot, are vacant,
19588 which means that a cracked insn (which occupies two issue slots) can't be
19589 issued in this group.
19590 - less than 'issue_rate' slots are vacant, and the next insn always needs to
19591 start a new group. */
19592
19593 static int
19594 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
19595 rtx_insn *tail)
19596 {
19597 rtx_insn *insn, *next_insn;
19598 int issue_rate;
19599 int can_issue_more;
19600 int slot, i;
19601 bool group_end;
19602 int group_count = 0;
19603 rtx *group_insns;
19604
19605 /* Initialize. */
19606 issue_rate = rs6000_issue_rate ();
19607 group_insns = XALLOCAVEC (rtx, issue_rate);
19608 for (i = 0; i < issue_rate; i++)
19609 {
19610 group_insns[i] = 0;
19611 }
19612 can_issue_more = issue_rate;
19613 slot = 0;
19614 insn = get_next_active_insn (prev_head_insn, tail);
19615 group_end = false;
19616
19617 while (insn != NULL_RTX)
19618 {
19619 slot = (issue_rate - can_issue_more);
19620 group_insns[slot] = insn;
19621 can_issue_more =
19622 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
19623 if (insn_terminates_group_p (insn, current_group))
19624 can_issue_more = 0;
19625
19626 next_insn = get_next_active_insn (insn, tail);
19627 if (next_insn == NULL_RTX)
19628 return group_count + 1;
19629
19630 /* Is next_insn going to start a new group? */
19631 group_end
19632 = (can_issue_more == 0
19633 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
19634 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
19635 || (can_issue_more < issue_rate &&
19636 insn_terminates_group_p (next_insn, previous_group)));
19637
19638 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
19639 next_insn, &group_end, can_issue_more,
19640 &group_count);
19641
19642 if (group_end)
19643 {
19644 group_count++;
19645 can_issue_more = 0;
19646 for (i = 0; i < issue_rate; i++)
19647 {
19648 group_insns[i] = 0;
19649 }
19650 }
19651
19652 if (GET_MODE (next_insn) == TImode && can_issue_more)
19653 PUT_MODE (next_insn, VOIDmode);
19654 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
19655 PUT_MODE (next_insn, TImode);
19656
19657 insn = next_insn;
19658 if (can_issue_more == 0)
19659 can_issue_more = issue_rate;
19660 } /* while */
19661
19662 return group_count;
19663 }
19664
19665 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
19666 dispatch group boundaries that the scheduler had marked. Pad with nops
19667 any dispatch groups which have vacant issue slots, in order to force the
19668 scheduler's grouping on the processor dispatcher. The function
19669 returns the number of dispatch groups found. */
19670
19671 static int
19672 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
19673 rtx_insn *tail)
19674 {
19675 rtx_insn *insn, *next_insn;
19676 rtx nop;
19677 int issue_rate;
19678 int can_issue_more;
19679 int group_end;
19680 int group_count = 0;
19681
19682 /* Initialize issue_rate. */
19683 issue_rate = rs6000_issue_rate ();
19684 can_issue_more = issue_rate;
19685
19686 insn = get_next_active_insn (prev_head_insn, tail);
19687 next_insn = get_next_active_insn (insn, tail);
19688
19689 while (insn != NULL_RTX)
19690 {
19691 can_issue_more =
19692 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
19693
19694 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
19695
19696 if (next_insn == NULL_RTX)
19697 break;
19698
19699 if (group_end)
19700 {
19701 /* If the scheduler had marked group termination at this location
19702 (between insn and next_insn), and neither insn nor next_insn will
19703 force group termination, pad the group with nops to force group
19704 termination. */
19705 if (can_issue_more
19706 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
19707 && !insn_terminates_group_p (insn, current_group)
19708 && !insn_terminates_group_p (next_insn, previous_group))
19709 {
19710 if (!is_branch_slot_insn (next_insn))
19711 can_issue_more--;
19712
19713 while (can_issue_more)
19714 {
19715 nop = gen_nop ();
19716 emit_insn_before (nop, next_insn);
19717 can_issue_more--;
19718 }
19719 }
19720
19721 can_issue_more = issue_rate;
19722 group_count++;
19723 }
19724
19725 insn = next_insn;
19726 next_insn = get_next_active_insn (insn, tail);
19727 }
19728
19729 return group_count;
19730 }
19731
19732 /* We're beginning a new block. Initialize data structures as necessary. */
19733
19734 static void
19735 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
19736 int sched_verbose ATTRIBUTE_UNUSED,
19737 int max_ready ATTRIBUTE_UNUSED)
19738 {
19739 last_scheduled_insn = NULL;
19740 load_store_pendulum = 0;
19741 divide_cnt = 0;
19742 vec_pairing = 0;
19743 }
19744
19745 /* The following function is called at the end of scheduling BB.
19746 After reload, it inserts nops at insn group bundling. */
19747
19748 static void
19749 rs6000_sched_finish (FILE *dump, int sched_verbose)
19750 {
19751 int n_groups;
19752
19753 if (sched_verbose)
19754 fprintf (dump, "=== Finishing schedule.\n");
19755
19756 if (reload_completed && rs6000_sched_groups)
19757 {
19758 /* Do not run sched_finish hook when selective scheduling enabled. */
19759 if (sel_sched_p ())
19760 return;
19761
19762 if (rs6000_sched_insert_nops == sched_finish_none)
19763 return;
19764
19765 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
19766 n_groups = pad_groups (dump, sched_verbose,
19767 current_sched_info->prev_head,
19768 current_sched_info->next_tail);
19769 else
19770 n_groups = redefine_groups (dump, sched_verbose,
19771 current_sched_info->prev_head,
19772 current_sched_info->next_tail);
19773
19774 if (sched_verbose >= 6)
19775 {
19776 fprintf (dump, "ngroups = %d\n", n_groups);
19777 print_rtl (dump, current_sched_info->prev_head);
19778 fprintf (dump, "Done finish_sched\n");
19779 }
19780 }
19781 }
19782
19783 struct rs6000_sched_context
19784 {
19785 short cached_can_issue_more;
19786 rtx_insn *last_scheduled_insn;
19787 int load_store_pendulum;
19788 int divide_cnt;
19789 int vec_pairing;
19790 };
19791
19792 typedef struct rs6000_sched_context rs6000_sched_context_def;
19793 typedef rs6000_sched_context_def *rs6000_sched_context_t;
19794
19795 /* Allocate store for new scheduling context. */
19796 static void *
19797 rs6000_alloc_sched_context (void)
19798 {
19799 return xmalloc (sizeof (rs6000_sched_context_def));
19800 }
19801
19802 /* If CLEAN_P is true then initializes _SC with clean data,
19803 and from the global context otherwise. */
19804 static void
19805 rs6000_init_sched_context (void *_sc, bool clean_p)
19806 {
19807 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
19808
19809 if (clean_p)
19810 {
19811 sc->cached_can_issue_more = 0;
19812 sc->last_scheduled_insn = NULL;
19813 sc->load_store_pendulum = 0;
19814 sc->divide_cnt = 0;
19815 sc->vec_pairing = 0;
19816 }
19817 else
19818 {
19819 sc->cached_can_issue_more = cached_can_issue_more;
19820 sc->last_scheduled_insn = last_scheduled_insn;
19821 sc->load_store_pendulum = load_store_pendulum;
19822 sc->divide_cnt = divide_cnt;
19823 sc->vec_pairing = vec_pairing;
19824 }
19825 }
19826
19827 /* Sets the global scheduling context to the one pointed to by _SC. */
19828 static void
19829 rs6000_set_sched_context (void *_sc)
19830 {
19831 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
19832
19833 gcc_assert (sc != NULL);
19834
19835 cached_can_issue_more = sc->cached_can_issue_more;
19836 last_scheduled_insn = sc->last_scheduled_insn;
19837 load_store_pendulum = sc->load_store_pendulum;
19838 divide_cnt = sc->divide_cnt;
19839 vec_pairing = sc->vec_pairing;
19840 }
19841
19842 /* Free _SC. */
19843 static void
19844 rs6000_free_sched_context (void *_sc)
19845 {
19846 gcc_assert (_sc != NULL);
19847
19848 free (_sc);
19849 }
19850
19851 static bool
19852 rs6000_sched_can_speculate_insn (rtx_insn *insn)
19853 {
19854 switch (get_attr_type (insn))
19855 {
19856 case TYPE_DIV:
19857 case TYPE_SDIV:
19858 case TYPE_DDIV:
19859 case TYPE_VECDIV:
19860 case TYPE_SSQRT:
19861 case TYPE_DSQRT:
19862 return false;
19863
19864 default:
19865 return true;
19866 }
19867 }
19868 \f
19869 /* Length in units of the trampoline for entering a nested function. */
19870
19871 int
19872 rs6000_trampoline_size (void)
19873 {
19874 int ret = 0;
19875
19876 switch (DEFAULT_ABI)
19877 {
19878 default:
19879 gcc_unreachable ();
19880
19881 case ABI_AIX:
19882 ret = (TARGET_32BIT) ? 12 : 24;
19883 break;
19884
19885 case ABI_ELFv2:
19886 gcc_assert (!TARGET_32BIT);
19887 ret = 32;
19888 break;
19889
19890 case ABI_DARWIN:
19891 case ABI_V4:
19892 ret = (TARGET_32BIT) ? 40 : 48;
19893 break;
19894 }
19895
19896 return ret;
19897 }
19898
19899 /* Emit RTL insns to initialize the variable parts of a trampoline.
19900 FNADDR is an RTX for the address of the function's pure code.
19901 CXT is an RTX for the static chain value for the function. */
19902
19903 static void
19904 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
19905 {
19906 int regsize = (TARGET_32BIT) ? 4 : 8;
19907 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
19908 rtx ctx_reg = force_reg (Pmode, cxt);
19909 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
19910
19911 switch (DEFAULT_ABI)
19912 {
19913 default:
19914 gcc_unreachable ();
19915
19916 /* Under AIX, just build the 3 word function descriptor */
19917 case ABI_AIX:
19918 {
19919 rtx fnmem, fn_reg, toc_reg;
19920
19921 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
19922 error ("you cannot take the address of a nested function if you use "
19923 "the %qs option", "-mno-pointers-to-nested-functions");
19924
19925 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
19926 fn_reg = gen_reg_rtx (Pmode);
19927 toc_reg = gen_reg_rtx (Pmode);
19928
19929 /* Macro to shorten the code expansions below. */
19930 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
19931
19932 m_tramp = replace_equiv_address (m_tramp, addr);
19933
19934 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
19935 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
19936 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
19937 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
19938 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
19939
19940 # undef MEM_PLUS
19941 }
19942 break;
19943
19944 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
19945 case ABI_ELFv2:
19946 case ABI_DARWIN:
19947 case ABI_V4:
19948 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
19949 LCT_NORMAL, VOIDmode,
19950 addr, Pmode,
19951 GEN_INT (rs6000_trampoline_size ()), SImode,
19952 fnaddr, Pmode,
19953 ctx_reg, Pmode);
19954 break;
19955 }
19956 }
19957
19958 \f
19959 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
19960 identifier as an argument, so the front end shouldn't look it up. */
19961
19962 static bool
19963 rs6000_attribute_takes_identifier_p (const_tree attr_id)
19964 {
19965 return is_attribute_p ("altivec", attr_id);
19966 }
19967
19968 /* Handle the "altivec" attribute. The attribute may have
19969 arguments as follows:
19970
19971 __attribute__((altivec(vector__)))
19972 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
19973 __attribute__((altivec(bool__))) (always followed by 'unsigned')
19974
19975 and may appear more than once (e.g., 'vector bool char') in a
19976 given declaration. */
19977
19978 static tree
19979 rs6000_handle_altivec_attribute (tree *node,
19980 tree name ATTRIBUTE_UNUSED,
19981 tree args,
19982 int flags ATTRIBUTE_UNUSED,
19983 bool *no_add_attrs)
19984 {
19985 tree type = *node, result = NULL_TREE;
19986 machine_mode mode;
19987 int unsigned_p;
19988 char altivec_type
19989 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
19990 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
19991 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
19992 : '?');
19993
19994 while (POINTER_TYPE_P (type)
19995 || TREE_CODE (type) == FUNCTION_TYPE
19996 || TREE_CODE (type) == METHOD_TYPE
19997 || TREE_CODE (type) == ARRAY_TYPE)
19998 type = TREE_TYPE (type);
19999
20000 mode = TYPE_MODE (type);
20001
20002 /* Check for invalid AltiVec type qualifiers. */
20003 if (type == long_double_type_node)
20004 error ("use of %<long double%> in AltiVec types is invalid");
20005 else if (type == boolean_type_node)
20006 error ("use of boolean types in AltiVec types is invalid");
20007 else if (TREE_CODE (type) == COMPLEX_TYPE)
20008 error ("use of %<complex%> in AltiVec types is invalid");
20009 else if (DECIMAL_FLOAT_MODE_P (mode))
20010 error ("use of decimal floating-point types in AltiVec types is invalid");
20011 else if (!TARGET_VSX)
20012 {
20013 if (type == long_unsigned_type_node || type == long_integer_type_node)
20014 {
20015 if (TARGET_64BIT)
20016 error ("use of %<long%> in AltiVec types is invalid for "
20017 "64-bit code without %qs", "-mvsx");
20018 else if (rs6000_warn_altivec_long)
20019 warning (0, "use of %<long%> in AltiVec types is deprecated; "
20020 "use %<int%>");
20021 }
20022 else if (type == long_long_unsigned_type_node
20023 || type == long_long_integer_type_node)
20024 error ("use of %<long long%> in AltiVec types is invalid without %qs",
20025 "-mvsx");
20026 else if (type == double_type_node)
20027 error ("use of %<double%> in AltiVec types is invalid without %qs",
20028 "-mvsx");
20029 }
20030
20031 switch (altivec_type)
20032 {
20033 case 'v':
20034 unsigned_p = TYPE_UNSIGNED (type);
20035 switch (mode)
20036 {
20037 case E_TImode:
20038 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
20039 break;
20040 case E_DImode:
20041 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
20042 break;
20043 case E_SImode:
20044 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
20045 break;
20046 case E_HImode:
20047 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
20048 break;
20049 case E_QImode:
20050 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
20051 break;
20052 case E_SFmode: result = V4SF_type_node; break;
20053 case E_DFmode: result = V2DF_type_node; break;
20054 /* If the user says 'vector int bool', we may be handed the 'bool'
20055 attribute _before_ the 'vector' attribute, and so select the
20056 proper type in the 'b' case below. */
20057 case E_V4SImode: case E_V8HImode: case E_V16QImode: case E_V4SFmode:
20058 case E_V2DImode: case E_V2DFmode:
20059 result = type;
20060 default: break;
20061 }
20062 break;
20063 case 'b':
20064 switch (mode)
20065 {
20066 case E_TImode: case E_V1TImode: result = bool_V1TI_type_node; break;
20067 case E_DImode: case E_V2DImode: result = bool_V2DI_type_node; break;
20068 case E_SImode: case E_V4SImode: result = bool_V4SI_type_node; break;
20069 case E_HImode: case E_V8HImode: result = bool_V8HI_type_node; break;
20070 case E_QImode: case E_V16QImode: result = bool_V16QI_type_node;
20071 default: break;
20072 }
20073 break;
20074 case 'p':
20075 switch (mode)
20076 {
20077 case E_V8HImode: result = pixel_V8HI_type_node;
20078 default: break;
20079 }
20080 default: break;
20081 }
20082
20083 /* Propagate qualifiers attached to the element type
20084 onto the vector type. */
20085 if (result && result != type && TYPE_QUALS (type))
20086 result = build_qualified_type (result, TYPE_QUALS (type));
20087
20088 *no_add_attrs = true; /* No need to hang on to the attribute. */
20089
20090 if (result)
20091 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
20092
20093 return NULL_TREE;
20094 }
20095
20096 /* AltiVec defines five built-in scalar types that serve as vector
20097 elements; we must teach the compiler how to mangle them. The 128-bit
20098 floating point mangling is target-specific as well. MMA defines
20099 two built-in types to be used as opaque vector types. */
20100
20101 static const char *
20102 rs6000_mangle_type (const_tree type)
20103 {
20104 type = TYPE_MAIN_VARIANT (type);
20105
20106 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
20107 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE
20108 && TREE_CODE (type) != OPAQUE_TYPE)
20109 return NULL;
20110
20111 if (type == bool_char_type_node) return "U6__boolc";
20112 if (type == bool_short_type_node) return "U6__bools";
20113 if (type == pixel_type_node) return "u7__pixel";
20114 if (type == bool_int_type_node) return "U6__booli";
20115 if (type == bool_long_long_type_node) return "U6__boolx";
20116
20117 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IBM_P (TYPE_MODE (type)))
20118 return "g";
20119 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IEEE_P (TYPE_MODE (type)))
20120 return "u9__ieee128";
20121
20122 if (type == vector_pair_type_node)
20123 return "u13__vector_pair";
20124 if (type == vector_quad_type_node)
20125 return "u13__vector_quad";
20126
20127 /* For all other types, use the default mangling. */
20128 return NULL;
20129 }
20130
20131 /* Handle a "longcall" or "shortcall" attribute; arguments as in
20132 struct attribute_spec.handler. */
20133
20134 static tree
20135 rs6000_handle_longcall_attribute (tree *node, tree name,
20136 tree args ATTRIBUTE_UNUSED,
20137 int flags ATTRIBUTE_UNUSED,
20138 bool *no_add_attrs)
20139 {
20140 if (TREE_CODE (*node) != FUNCTION_TYPE
20141 && TREE_CODE (*node) != FIELD_DECL
20142 && TREE_CODE (*node) != TYPE_DECL)
20143 {
20144 warning (OPT_Wattributes, "%qE attribute only applies to functions",
20145 name);
20146 *no_add_attrs = true;
20147 }
20148
20149 return NULL_TREE;
20150 }
20151
20152 /* Set longcall attributes on all functions declared when
20153 rs6000_default_long_calls is true. */
20154 static void
20155 rs6000_set_default_type_attributes (tree type)
20156 {
20157 if (rs6000_default_long_calls
20158 && (TREE_CODE (type) == FUNCTION_TYPE
20159 || TREE_CODE (type) == METHOD_TYPE))
20160 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
20161 NULL_TREE,
20162 TYPE_ATTRIBUTES (type));
20163
20164 #if TARGET_MACHO
20165 darwin_set_default_type_attributes (type);
20166 #endif
20167 }
20168
20169 /* Return a reference suitable for calling a function with the
20170 longcall attribute. */
20171
20172 static rtx
20173 rs6000_longcall_ref (rtx call_ref, rtx arg)
20174 {
20175 /* System V adds '.' to the internal name, so skip them. */
20176 const char *call_name = XSTR (call_ref, 0);
20177 if (*call_name == '.')
20178 {
20179 while (*call_name == '.')
20180 call_name++;
20181
20182 tree node = get_identifier (call_name);
20183 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
20184 }
20185
20186 if (TARGET_PLTSEQ)
20187 {
20188 rtx base = const0_rtx;
20189 int regno = 12;
20190 if (rs6000_pcrel_p ())
20191 {
20192 rtx reg = gen_rtx_REG (Pmode, regno);
20193 rtx u = gen_rtx_UNSPEC_VOLATILE (Pmode,
20194 gen_rtvec (3, base, call_ref, arg),
20195 UNSPECV_PLT_PCREL);
20196 emit_insn (gen_rtx_SET (reg, u));
20197 return reg;
20198 }
20199
20200 if (DEFAULT_ABI == ABI_ELFv2)
20201 base = gen_rtx_REG (Pmode, TOC_REGISTER);
20202 else
20203 {
20204 if (flag_pic)
20205 base = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
20206 regno = 11;
20207 }
20208 /* Reg must match that used by linker PLT stubs. For ELFv2, r12
20209 may be used by a function global entry point. For SysV4, r11
20210 is used by __glink_PLTresolve lazy resolver entry. */
20211 rtx reg = gen_rtx_REG (Pmode, regno);
20212 rtx hi = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, base, call_ref, arg),
20213 UNSPEC_PLT16_HA);
20214 rtx lo = gen_rtx_UNSPEC_VOLATILE (Pmode,
20215 gen_rtvec (3, reg, call_ref, arg),
20216 UNSPECV_PLT16_LO);
20217 emit_insn (gen_rtx_SET (reg, hi));
20218 emit_insn (gen_rtx_SET (reg, lo));
20219 return reg;
20220 }
20221
20222 return force_reg (Pmode, call_ref);
20223 }
20224 \f
20225 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
20226 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
20227 #endif
20228
20229 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
20230 struct attribute_spec.handler. */
20231 static tree
20232 rs6000_handle_struct_attribute (tree *node, tree name,
20233 tree args ATTRIBUTE_UNUSED,
20234 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
20235 {
20236 tree *type = NULL;
20237 if (DECL_P (*node))
20238 {
20239 if (TREE_CODE (*node) == TYPE_DECL)
20240 type = &TREE_TYPE (*node);
20241 }
20242 else
20243 type = node;
20244
20245 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
20246 || TREE_CODE (*type) == UNION_TYPE)))
20247 {
20248 warning (OPT_Wattributes, "%qE attribute ignored", name);
20249 *no_add_attrs = true;
20250 }
20251
20252 else if ((is_attribute_p ("ms_struct", name)
20253 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
20254 || ((is_attribute_p ("gcc_struct", name)
20255 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
20256 {
20257 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
20258 name);
20259 *no_add_attrs = true;
20260 }
20261
20262 return NULL_TREE;
20263 }
20264
20265 static bool
20266 rs6000_ms_bitfield_layout_p (const_tree record_type)
20267 {
20268 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
20269 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
20270 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
20271 }
20272 \f
20273 #ifdef USING_ELFOS_H
20274
20275 /* A get_unnamed_section callback, used for switching to toc_section. */
20276
20277 static void
20278 rs6000_elf_output_toc_section_asm_op (const char *data ATTRIBUTE_UNUSED)
20279 {
20280 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
20281 && TARGET_MINIMAL_TOC)
20282 {
20283 if (!toc_initialized)
20284 {
20285 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
20286 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20287 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
20288 fprintf (asm_out_file, "\t.tc ");
20289 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
20290 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
20291 fprintf (asm_out_file, "\n");
20292
20293 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
20294 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20295 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
20296 fprintf (asm_out_file, " = .+32768\n");
20297 toc_initialized = 1;
20298 }
20299 else
20300 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
20301 }
20302 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
20303 {
20304 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
20305 if (!toc_initialized)
20306 {
20307 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20308 toc_initialized = 1;
20309 }
20310 }
20311 else
20312 {
20313 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
20314 if (!toc_initialized)
20315 {
20316 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20317 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
20318 fprintf (asm_out_file, " = .+32768\n");
20319 toc_initialized = 1;
20320 }
20321 }
20322 }
20323
20324 /* Implement TARGET_ASM_INIT_SECTIONS. */
20325
20326 static void
20327 rs6000_elf_asm_init_sections (void)
20328 {
20329 toc_section
20330 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
20331
20332 sdata2_section
20333 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
20334 SDATA2_SECTION_ASM_OP);
20335 }
20336
20337 /* Implement TARGET_SELECT_RTX_SECTION. */
20338
20339 static section *
20340 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
20341 unsigned HOST_WIDE_INT align)
20342 {
20343 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
20344 return toc_section;
20345 else
20346 return default_elf_select_rtx_section (mode, x, align);
20347 }
20348 \f
20349 /* For a SYMBOL_REF, set generic flags and then perform some
20350 target-specific processing.
20351
20352 When the AIX ABI is requested on a non-AIX system, replace the
20353 function name with the real name (with a leading .) rather than the
20354 function descriptor name. This saves a lot of overriding code to
20355 read the prefixes. */
20356
20357 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
20358 static void
20359 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
20360 {
20361 default_encode_section_info (decl, rtl, first);
20362
20363 if (first
20364 && TREE_CODE (decl) == FUNCTION_DECL
20365 && !TARGET_AIX
20366 && DEFAULT_ABI == ABI_AIX)
20367 {
20368 rtx sym_ref = XEXP (rtl, 0);
20369 size_t len = strlen (XSTR (sym_ref, 0));
20370 char *str = XALLOCAVEC (char, len + 2);
20371 str[0] = '.';
20372 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
20373 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
20374 }
20375 }
20376
20377 static inline bool
20378 compare_section_name (const char *section, const char *templ)
20379 {
20380 int len;
20381
20382 len = strlen (templ);
20383 return (strncmp (section, templ, len) == 0
20384 && (section[len] == 0 || section[len] == '.'));
20385 }
20386
20387 bool
20388 rs6000_elf_in_small_data_p (const_tree decl)
20389 {
20390 if (rs6000_sdata == SDATA_NONE)
20391 return false;
20392
20393 /* We want to merge strings, so we never consider them small data. */
20394 if (TREE_CODE (decl) == STRING_CST)
20395 return false;
20396
20397 /* Functions are never in the small data area. */
20398 if (TREE_CODE (decl) == FUNCTION_DECL)
20399 return false;
20400
20401 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
20402 {
20403 const char *section = DECL_SECTION_NAME (decl);
20404 if (compare_section_name (section, ".sdata")
20405 || compare_section_name (section, ".sdata2")
20406 || compare_section_name (section, ".gnu.linkonce.s")
20407 || compare_section_name (section, ".sbss")
20408 || compare_section_name (section, ".sbss2")
20409 || compare_section_name (section, ".gnu.linkonce.sb")
20410 || strcmp (section, ".PPC.EMB.sdata0") == 0
20411 || strcmp (section, ".PPC.EMB.sbss0") == 0)
20412 return true;
20413 }
20414 else
20415 {
20416 /* If we are told not to put readonly data in sdata, then don't. */
20417 if (TREE_READONLY (decl) && rs6000_sdata != SDATA_EABI
20418 && !rs6000_readonly_in_sdata)
20419 return false;
20420
20421 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
20422
20423 if (size > 0
20424 && size <= g_switch_value
20425 /* If it's not public, and we're not going to reference it there,
20426 there's no need to put it in the small data section. */
20427 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
20428 return true;
20429 }
20430
20431 return false;
20432 }
20433
20434 #endif /* USING_ELFOS_H */
20435 \f
20436 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
20437
20438 static bool
20439 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
20440 {
20441 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
20442 }
20443
20444 /* Do not place thread-local symbols refs in the object blocks. */
20445
20446 static bool
20447 rs6000_use_blocks_for_decl_p (const_tree decl)
20448 {
20449 return !DECL_THREAD_LOCAL_P (decl);
20450 }
20451 \f
20452 /* Return a REG that occurs in ADDR with coefficient 1.
20453 ADDR can be effectively incremented by incrementing REG.
20454
20455 r0 is special and we must not select it as an address
20456 register by this routine since our caller will try to
20457 increment the returned register via an "la" instruction. */
20458
20459 rtx
20460 find_addr_reg (rtx addr)
20461 {
20462 while (GET_CODE (addr) == PLUS)
20463 {
20464 if (REG_P (XEXP (addr, 0))
20465 && REGNO (XEXP (addr, 0)) != 0)
20466 addr = XEXP (addr, 0);
20467 else if (REG_P (XEXP (addr, 1))
20468 && REGNO (XEXP (addr, 1)) != 0)
20469 addr = XEXP (addr, 1);
20470 else if (CONSTANT_P (XEXP (addr, 0)))
20471 addr = XEXP (addr, 1);
20472 else if (CONSTANT_P (XEXP (addr, 1)))
20473 addr = XEXP (addr, 0);
20474 else
20475 gcc_unreachable ();
20476 }
20477 gcc_assert (REG_P (addr) && REGNO (addr) != 0);
20478 return addr;
20479 }
20480
20481 void
20482 rs6000_fatal_bad_address (rtx op)
20483 {
20484 fatal_insn ("bad address", op);
20485 }
20486
20487 #if TARGET_MACHO
20488
20489 vec<branch_island, va_gc> *branch_islands;
20490
20491 /* Remember to generate a branch island for far calls to the given
20492 function. */
20493
20494 static void
20495 add_compiler_branch_island (tree label_name, tree function_name,
20496 int line_number)
20497 {
20498 branch_island bi = {function_name, label_name, line_number};
20499 vec_safe_push (branch_islands, bi);
20500 }
20501
20502 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
20503 already there or not. */
20504
20505 static int
20506 no_previous_def (tree function_name)
20507 {
20508 branch_island *bi;
20509 unsigned ix;
20510
20511 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
20512 if (function_name == bi->function_name)
20513 return 0;
20514 return 1;
20515 }
20516
20517 /* GET_PREV_LABEL gets the label name from the previous definition of
20518 the function. */
20519
20520 static tree
20521 get_prev_label (tree function_name)
20522 {
20523 branch_island *bi;
20524 unsigned ix;
20525
20526 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
20527 if (function_name == bi->function_name)
20528 return bi->label_name;
20529 return NULL_TREE;
20530 }
20531
20532 /* Generate external symbol indirection stubs (PIC and non-PIC). */
20533
20534 void
20535 machopic_output_stub (FILE *file, const char *symb, const char *stub)
20536 {
20537 unsigned int length;
20538 char *symbol_name, *lazy_ptr_name;
20539 char *local_label_0;
20540 static unsigned label = 0;
20541
20542 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
20543 symb = (*targetm.strip_name_encoding) (symb);
20544
20545 length = strlen (symb);
20546 symbol_name = XALLOCAVEC (char, length + 32);
20547 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
20548
20549 lazy_ptr_name = XALLOCAVEC (char, length + 32);
20550 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
20551
20552 if (MACHOPIC_PURE)
20553 {
20554 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
20555 fprintf (file, "\t.align 5\n");
20556
20557 fprintf (file, "%s:\n", stub);
20558 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20559
20560 label++;
20561 local_label_0 = XALLOCAVEC (char, 16);
20562 sprintf (local_label_0, "L%u$spb", label);
20563
20564 fprintf (file, "\tmflr r0\n");
20565 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
20566 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
20567 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
20568 lazy_ptr_name, local_label_0);
20569 fprintf (file, "\tmtlr r0\n");
20570 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
20571 (TARGET_64BIT ? "ldu" : "lwzu"),
20572 lazy_ptr_name, local_label_0);
20573 fprintf (file, "\tmtctr r12\n");
20574 fprintf (file, "\tbctr\n");
20575 }
20576 else /* mdynamic-no-pic or mkernel. */
20577 {
20578 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
20579 fprintf (file, "\t.align 4\n");
20580
20581 fprintf (file, "%s:\n", stub);
20582 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20583
20584 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
20585 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
20586 (TARGET_64BIT ? "ldu" : "lwzu"),
20587 lazy_ptr_name);
20588 fprintf (file, "\tmtctr r12\n");
20589 fprintf (file, "\tbctr\n");
20590 }
20591
20592 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
20593 fprintf (file, "%s:\n", lazy_ptr_name);
20594 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20595 fprintf (file, "%sdyld_stub_binding_helper\n",
20596 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
20597 }
20598
20599 /* Legitimize PIC addresses. If the address is already
20600 position-independent, we return ORIG. Newly generated
20601 position-independent addresses go into a reg. This is REG if non
20602 zero, otherwise we allocate register(s) as necessary. */
20603
20604 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
20605
20606 rtx
20607 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
20608 rtx reg)
20609 {
20610 rtx base, offset;
20611
20612 if (reg == NULL && !reload_completed)
20613 reg = gen_reg_rtx (Pmode);
20614
20615 if (GET_CODE (orig) == CONST)
20616 {
20617 rtx reg_temp;
20618
20619 if (GET_CODE (XEXP (orig, 0)) == PLUS
20620 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
20621 return orig;
20622
20623 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
20624
20625 /* Use a different reg for the intermediate value, as
20626 it will be marked UNCHANGING. */
20627 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
20628 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
20629 Pmode, reg_temp);
20630 offset =
20631 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
20632 Pmode, reg);
20633
20634 if (CONST_INT_P (offset))
20635 {
20636 if (SMALL_INT (offset))
20637 return plus_constant (Pmode, base, INTVAL (offset));
20638 else if (!reload_completed)
20639 offset = force_reg (Pmode, offset);
20640 else
20641 {
20642 rtx mem = force_const_mem (Pmode, orig);
20643 return machopic_legitimize_pic_address (mem, Pmode, reg);
20644 }
20645 }
20646 return gen_rtx_PLUS (Pmode, base, offset);
20647 }
20648
20649 /* Fall back on generic machopic code. */
20650 return machopic_legitimize_pic_address (orig, mode, reg);
20651 }
20652
20653 /* Output a .machine directive for the Darwin assembler, and call
20654 the generic start_file routine. */
20655
20656 static void
20657 rs6000_darwin_file_start (void)
20658 {
20659 static const struct
20660 {
20661 const char *arg;
20662 const char *name;
20663 HOST_WIDE_INT if_set;
20664 } mapping[] = {
20665 { "ppc64", "ppc64", MASK_64BIT },
20666 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
20667 { "power4", "ppc970", 0 },
20668 { "G5", "ppc970", 0 },
20669 { "7450", "ppc7450", 0 },
20670 { "7400", "ppc7400", MASK_ALTIVEC },
20671 { "G4", "ppc7400", 0 },
20672 { "750", "ppc750", 0 },
20673 { "740", "ppc750", 0 },
20674 { "G3", "ppc750", 0 },
20675 { "604e", "ppc604e", 0 },
20676 { "604", "ppc604", 0 },
20677 { "603e", "ppc603", 0 },
20678 { "603", "ppc603", 0 },
20679 { "601", "ppc601", 0 },
20680 { NULL, "ppc", 0 } };
20681 const char *cpu_id = "";
20682 size_t i;
20683
20684 rs6000_file_start ();
20685 darwin_file_start ();
20686
20687 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
20688
20689 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
20690 cpu_id = rs6000_default_cpu;
20691
20692 if (OPTION_SET_P (rs6000_cpu_index))
20693 cpu_id = processor_target_table[rs6000_cpu_index].name;
20694
20695 /* Look through the mapping array. Pick the first name that either
20696 matches the argument, has a bit set in IF_SET that is also set
20697 in the target flags, or has a NULL name. */
20698
20699 i = 0;
20700 while (mapping[i].arg != NULL
20701 && strcmp (mapping[i].arg, cpu_id) != 0
20702 && (mapping[i].if_set & rs6000_isa_flags) == 0)
20703 i++;
20704
20705 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
20706 }
20707
20708 #endif /* TARGET_MACHO */
20709
20710 #if TARGET_ELF
20711 static int
20712 rs6000_elf_reloc_rw_mask (void)
20713 {
20714 if (flag_pic)
20715 return 3;
20716 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
20717 return 2;
20718 else
20719 return 0;
20720 }
20721
20722 /* Record an element in the table of global constructors. SYMBOL is
20723 a SYMBOL_REF of the function to be called; PRIORITY is a number
20724 between 0 and MAX_INIT_PRIORITY.
20725
20726 This differs from default_named_section_asm_out_constructor in
20727 that we have special handling for -mrelocatable. */
20728
20729 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
20730 static void
20731 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
20732 {
20733 const char *section = ".ctors";
20734 char buf[18];
20735
20736 if (priority != DEFAULT_INIT_PRIORITY)
20737 {
20738 sprintf (buf, ".ctors.%.5u",
20739 /* Invert the numbering so the linker puts us in the proper
20740 order; constructors are run from right to left, and the
20741 linker sorts in increasing order. */
20742 MAX_INIT_PRIORITY - priority);
20743 section = buf;
20744 }
20745
20746 switch_to_section (get_section (section, SECTION_WRITE, NULL));
20747 assemble_align (POINTER_SIZE);
20748
20749 if (DEFAULT_ABI == ABI_V4
20750 && (TARGET_RELOCATABLE || flag_pic > 1))
20751 {
20752 fputs ("\t.long (", asm_out_file);
20753 output_addr_const (asm_out_file, symbol);
20754 fputs (")@fixup\n", asm_out_file);
20755 }
20756 else
20757 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
20758 }
20759
20760 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
20761 static void
20762 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
20763 {
20764 const char *section = ".dtors";
20765 char buf[18];
20766
20767 if (priority != DEFAULT_INIT_PRIORITY)
20768 {
20769 sprintf (buf, ".dtors.%.5u",
20770 /* Invert the numbering so the linker puts us in the proper
20771 order; constructors are run from right to left, and the
20772 linker sorts in increasing order. */
20773 MAX_INIT_PRIORITY - priority);
20774 section = buf;
20775 }
20776
20777 switch_to_section (get_section (section, SECTION_WRITE, NULL));
20778 assemble_align (POINTER_SIZE);
20779
20780 if (DEFAULT_ABI == ABI_V4
20781 && (TARGET_RELOCATABLE || flag_pic > 1))
20782 {
20783 fputs ("\t.long (", asm_out_file);
20784 output_addr_const (asm_out_file, symbol);
20785 fputs (")@fixup\n", asm_out_file);
20786 }
20787 else
20788 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
20789 }
20790
20791 void
20792 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
20793 {
20794 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
20795 {
20796 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
20797 ASM_OUTPUT_LABEL (file, name);
20798 fputs (DOUBLE_INT_ASM_OP, file);
20799 rs6000_output_function_entry (file, name);
20800 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
20801 if (DOT_SYMBOLS)
20802 {
20803 fputs ("\t.size\t", file);
20804 assemble_name (file, name);
20805 fputs (",24\n\t.type\t.", file);
20806 assemble_name (file, name);
20807 fputs (",@function\n", file);
20808 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
20809 {
20810 fputs ("\t.globl\t.", file);
20811 assemble_name (file, name);
20812 putc ('\n', file);
20813 }
20814 }
20815 else
20816 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
20817 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
20818 rs6000_output_function_entry (file, name);
20819 fputs (":\n", file);
20820 return;
20821 }
20822
20823 int uses_toc;
20824 if (DEFAULT_ABI == ABI_V4
20825 && (TARGET_RELOCATABLE || flag_pic > 1)
20826 && !TARGET_SECURE_PLT
20827 && (!constant_pool_empty_p () || crtl->profile)
20828 && (uses_toc = uses_TOC ()))
20829 {
20830 char buf[256];
20831
20832 if (uses_toc == 2)
20833 switch_to_other_text_partition ();
20834 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
20835
20836 fprintf (file, "\t.long ");
20837 assemble_name (file, toc_label_name);
20838 need_toc_init = 1;
20839 putc ('-', file);
20840 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
20841 assemble_name (file, buf);
20842 putc ('\n', file);
20843 if (uses_toc == 2)
20844 switch_to_other_text_partition ();
20845 }
20846
20847 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
20848 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
20849
20850 if (TARGET_CMODEL == CMODEL_LARGE
20851 && rs6000_global_entry_point_prologue_needed_p ())
20852 {
20853 char buf[256];
20854
20855 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
20856
20857 fprintf (file, "\t.quad .TOC.-");
20858 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
20859 assemble_name (file, buf);
20860 putc ('\n', file);
20861 }
20862
20863 if (DEFAULT_ABI == ABI_AIX)
20864 {
20865 const char *desc_name, *orig_name;
20866
20867 orig_name = (*targetm.strip_name_encoding) (name);
20868 desc_name = orig_name;
20869 while (*desc_name == '.')
20870 desc_name++;
20871
20872 if (TREE_PUBLIC (decl))
20873 fprintf (file, "\t.globl %s\n", desc_name);
20874
20875 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
20876 fprintf (file, "%s:\n", desc_name);
20877 fprintf (file, "\t.long %s\n", orig_name);
20878 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
20879 fputs ("\t.long 0\n", file);
20880 fprintf (file, "\t.previous\n");
20881 }
20882 ASM_OUTPUT_LABEL (file, name);
20883 }
20884
20885 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
20886 static void
20887 rs6000_elf_file_end (void)
20888 {
20889 #ifdef HAVE_AS_GNU_ATTRIBUTE
20890 /* ??? The value emitted depends on options active at file end.
20891 Assume anyone using #pragma or attributes that might change
20892 options knows what they are doing. */
20893 if ((TARGET_64BIT || DEFAULT_ABI == ABI_V4)
20894 && rs6000_passes_float)
20895 {
20896 int fp;
20897
20898 if (TARGET_HARD_FLOAT)
20899 fp = 1;
20900 else
20901 fp = 2;
20902 if (rs6000_passes_long_double)
20903 {
20904 if (!TARGET_LONG_DOUBLE_128)
20905 fp |= 2 * 4;
20906 else if (TARGET_IEEEQUAD)
20907 fp |= 3 * 4;
20908 else
20909 fp |= 1 * 4;
20910 }
20911 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", fp);
20912 }
20913 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
20914 {
20915 if (rs6000_passes_vector)
20916 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
20917 (TARGET_ALTIVEC_ABI ? 2 : 1));
20918 if (rs6000_returns_struct)
20919 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
20920 aix_struct_return ? 2 : 1);
20921 }
20922 #endif
20923 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
20924 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
20925 file_end_indicate_exec_stack ();
20926 #endif
20927
20928 if (flag_split_stack)
20929 file_end_indicate_split_stack ();
20930
20931 if (cpu_builtin_p)
20932 {
20933 /* We have expanded a CPU builtin, so we need to emit a reference to
20934 the special symbol that LIBC uses to declare it supports the
20935 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
20936 switch_to_section (data_section);
20937 fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3);
20938 fprintf (asm_out_file, "\t%s %s\n",
20939 TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol);
20940 }
20941 }
20942 #endif
20943
20944 #if TARGET_XCOFF
20945
20946 #ifndef HAVE_XCOFF_DWARF_EXTRAS
20947 #define HAVE_XCOFF_DWARF_EXTRAS 0
20948 #endif
20949
20950 static enum unwind_info_type
20951 rs6000_xcoff_debug_unwind_info (void)
20952 {
20953 return UI_NONE;
20954 }
20955
20956 static void
20957 rs6000_xcoff_asm_output_anchor (rtx symbol)
20958 {
20959 char buffer[100];
20960
20961 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
20962 SYMBOL_REF_BLOCK_OFFSET (symbol));
20963 fprintf (asm_out_file, "%s", SET_ASM_OP);
20964 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
20965 fprintf (asm_out_file, ",");
20966 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
20967 fprintf (asm_out_file, "\n");
20968 }
20969
20970 static void
20971 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
20972 {
20973 fputs (GLOBAL_ASM_OP, stream);
20974 RS6000_OUTPUT_BASENAME (stream, name);
20975 putc ('\n', stream);
20976 }
20977
20978 /* A get_unnamed_decl callback, used for read-only sections. PTR
20979 points to the section string variable. */
20980
20981 static void
20982 rs6000_xcoff_output_readonly_section_asm_op (const char *directive)
20983 {
20984 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
20985 directive
20986 ? xcoff_private_rodata_section_name
20987 : xcoff_read_only_section_name,
20988 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
20989 }
20990
20991 /* Likewise for read-write sections. */
20992
20993 static void
20994 rs6000_xcoff_output_readwrite_section_asm_op (const char *)
20995 {
20996 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
20997 xcoff_private_data_section_name,
20998 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
20999 }
21000
21001 static void
21002 rs6000_xcoff_output_tls_section_asm_op (const char *directive)
21003 {
21004 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
21005 directive
21006 ? xcoff_private_data_section_name
21007 : xcoff_tls_data_section_name,
21008 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
21009 }
21010
21011 /* A get_unnamed_section callback, used for switching to toc_section. */
21012
21013 static void
21014 rs6000_xcoff_output_toc_section_asm_op (const char *data ATTRIBUTE_UNUSED)
21015 {
21016 if (TARGET_MINIMAL_TOC)
21017 {
21018 /* toc_section is always selected at least once from
21019 rs6000_xcoff_file_start, so this is guaranteed to
21020 always be defined once and only once in each file. */
21021 if (!toc_initialized)
21022 {
21023 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
21024 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
21025 toc_initialized = 1;
21026 }
21027 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
21028 (TARGET_32BIT ? "" : ",3"));
21029 }
21030 else
21031 fputs ("\t.toc\n", asm_out_file);
21032 }
21033
21034 /* Implement TARGET_ASM_INIT_SECTIONS. */
21035
21036 static void
21037 rs6000_xcoff_asm_init_sections (void)
21038 {
21039 read_only_data_section
21040 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
21041 NULL);
21042
21043 private_data_section
21044 = get_unnamed_section (SECTION_WRITE,
21045 rs6000_xcoff_output_readwrite_section_asm_op,
21046 NULL);
21047
21048 read_only_private_data_section
21049 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
21050 "");
21051
21052 tls_data_section
21053 = get_unnamed_section (SECTION_TLS,
21054 rs6000_xcoff_output_tls_section_asm_op,
21055 NULL);
21056
21057 tls_private_data_section
21058 = get_unnamed_section (SECTION_TLS,
21059 rs6000_xcoff_output_tls_section_asm_op,
21060 "");
21061
21062 toc_section
21063 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
21064
21065 readonly_data_section = read_only_data_section;
21066 }
21067
21068 static int
21069 rs6000_xcoff_reloc_rw_mask (void)
21070 {
21071 return 3;
21072 }
21073
21074 static void
21075 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
21076 tree decl ATTRIBUTE_UNUSED)
21077 {
21078 int smclass;
21079 static const char * const suffix[7]
21080 = { "PR", "RO", "RW", "BS", "TL", "UL", "XO" };
21081
21082 if (flags & SECTION_EXCLUDE)
21083 smclass = 6;
21084 else if (flags & SECTION_DEBUG)
21085 {
21086 fprintf (asm_out_file, "\t.dwsect %s\n", name);
21087 return;
21088 }
21089 else if (flags & SECTION_CODE)
21090 smclass = 0;
21091 else if (flags & SECTION_TLS)
21092 {
21093 if (flags & SECTION_BSS)
21094 smclass = 5;
21095 else
21096 smclass = 4;
21097 }
21098 else if (flags & SECTION_WRITE)
21099 {
21100 if (flags & SECTION_BSS)
21101 smclass = 3;
21102 else
21103 smclass = 2;
21104 }
21105 else
21106 smclass = 1;
21107
21108 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
21109 (flags & SECTION_CODE) ? "." : "",
21110 name, suffix[smclass], flags & SECTION_ENTSIZE);
21111 }
21112
21113 #define IN_NAMED_SECTION(DECL) \
21114 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
21115 && DECL_SECTION_NAME (DECL) != NULL)
21116
21117 static section *
21118 rs6000_xcoff_select_section (tree decl, int reloc,
21119 unsigned HOST_WIDE_INT align)
21120 {
21121 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
21122 named section. */
21123 if (align > BIGGEST_ALIGNMENT && VAR_OR_FUNCTION_DECL_P (decl))
21124 {
21125 resolve_unique_section (decl, reloc, true);
21126 if (IN_NAMED_SECTION (decl))
21127 return get_named_section (decl, NULL, reloc);
21128 }
21129
21130 if (decl_readonly_section (decl, reloc))
21131 {
21132 if (TREE_PUBLIC (decl))
21133 return read_only_data_section;
21134 else
21135 return read_only_private_data_section;
21136 }
21137 else
21138 {
21139 #if HAVE_AS_TLS
21140 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
21141 {
21142 if (bss_initializer_p (decl))
21143 return tls_comm_section;
21144 else if (TREE_PUBLIC (decl))
21145 return tls_data_section;
21146 else
21147 return tls_private_data_section;
21148 }
21149 else
21150 #endif
21151 if (TREE_PUBLIC (decl))
21152 return data_section;
21153 else
21154 return private_data_section;
21155 }
21156 }
21157
21158 static void
21159 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
21160 {
21161 const char *name;
21162
21163 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
21164 name = (*targetm.strip_name_encoding) (name);
21165 set_decl_section_name (decl, name);
21166 }
21167
21168 /* Select section for constant in constant pool.
21169
21170 On RS/6000, all constants are in the private read-only data area.
21171 However, if this is being placed in the TOC it must be output as a
21172 toc entry. */
21173
21174 static section *
21175 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
21176 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
21177 {
21178 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
21179 return toc_section;
21180 else
21181 return read_only_private_data_section;
21182 }
21183
21184 /* Remove any trailing [DS] or the like from the symbol name. */
21185
21186 static const char *
21187 rs6000_xcoff_strip_name_encoding (const char *name)
21188 {
21189 size_t len;
21190 if (*name == '*')
21191 name++;
21192 len = strlen (name);
21193 if (name[len - 1] == ']')
21194 return ggc_alloc_string (name, len - 4);
21195 else
21196 return name;
21197 }
21198
21199 /* Section attributes. AIX is always PIC. */
21200
21201 static unsigned int
21202 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
21203 {
21204 unsigned int align;
21205 unsigned int flags = default_section_type_flags (decl, name, reloc);
21206
21207 if (decl && DECL_P (decl) && VAR_P (decl) && bss_initializer_p (decl))
21208 flags |= SECTION_BSS;
21209
21210 /* Align to at least UNIT size. */
21211 if (!decl || !DECL_P (decl))
21212 align = MIN_UNITS_PER_WORD;
21213 /* Align code CSECT to at least 32 bytes. */
21214 else if ((flags & SECTION_CODE) != 0)
21215 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT), 32);
21216 else
21217 /* Increase alignment of large objects if not already stricter. */
21218 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
21219 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
21220 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
21221
21222 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
21223 }
21224
21225 /* Output at beginning of assembler file.
21226
21227 Initialize the section names for the RS/6000 at this point.
21228
21229 Specify filename, including full path, to assembler.
21230
21231 We want to go into the TOC section so at least one .toc will be emitted.
21232 Also, in order to output proper .bs/.es pairs, we need at least one static
21233 [RW] section emitted.
21234
21235 Finally, declare mcount when profiling to make the assembler happy. */
21236
21237 static void
21238 rs6000_xcoff_file_start (void)
21239 {
21240 rs6000_gen_section_name (&xcoff_bss_section_name,
21241 main_input_filename, ".bss_");
21242 rs6000_gen_section_name (&xcoff_private_data_section_name,
21243 main_input_filename, ".rw_");
21244 rs6000_gen_section_name (&xcoff_private_rodata_section_name,
21245 main_input_filename, ".rop_");
21246 rs6000_gen_section_name (&xcoff_read_only_section_name,
21247 main_input_filename, ".ro_");
21248 rs6000_gen_section_name (&xcoff_tls_data_section_name,
21249 main_input_filename, ".tls_");
21250
21251 fputs ("\t.file\t", asm_out_file);
21252 output_quoted_string (asm_out_file, main_input_filename);
21253 fputc ('\n', asm_out_file);
21254 if (write_symbols != NO_DEBUG)
21255 switch_to_section (private_data_section);
21256 switch_to_section (toc_section);
21257 switch_to_section (text_section);
21258 if (profile_flag)
21259 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
21260 rs6000_file_start ();
21261 }
21262
21263 /* Output at end of assembler file.
21264 On the RS/6000, referencing data should automatically pull in text. */
21265
21266 static void
21267 rs6000_xcoff_file_end (void)
21268 {
21269 switch_to_section (text_section);
21270 if (xcoff_tls_exec_model_detected)
21271 {
21272 /* Add a .ref to __tls_get_addr to force libpthread dependency. */
21273 fputs ("\t.extern __tls_get_addr\n\t.ref __tls_get_addr\n", asm_out_file);
21274 }
21275 fputs ("_section_.text:\n", asm_out_file);
21276 switch_to_section (data_section);
21277 fputs (TARGET_32BIT
21278 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
21279 asm_out_file);
21280
21281 }
21282
21283 struct declare_alias_data
21284 {
21285 FILE *file;
21286 bool function_descriptor;
21287 };
21288
21289 /* Declare alias N. A helper function for for_node_and_aliases. */
21290
21291 static bool
21292 rs6000_declare_alias (struct symtab_node *n, void *d)
21293 {
21294 struct declare_alias_data *data = (struct declare_alias_data *)d;
21295 /* Main symbol is output specially, because varasm machinery does part of
21296 the job for us - we do not need to declare .globl/lglobs and such. */
21297 if (!n->alias || n->weakref)
21298 return false;
21299
21300 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
21301 return false;
21302
21303 /* Prevent assemble_alias from trying to use .set pseudo operation
21304 that does not behave as expected by the middle-end. */
21305 TREE_ASM_WRITTEN (n->decl) = true;
21306
21307 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
21308 char *buffer = (char *) alloca (strlen (name) + 2);
21309 char *p;
21310 int dollar_inside = 0;
21311
21312 strcpy (buffer, name);
21313 p = strchr (buffer, '$');
21314 while (p) {
21315 *p = '_';
21316 dollar_inside++;
21317 p = strchr (p + 1, '$');
21318 }
21319 if (TREE_PUBLIC (n->decl))
21320 {
21321 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
21322 {
21323 if (dollar_inside) {
21324 if (data->function_descriptor)
21325 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
21326 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
21327 }
21328 if (data->function_descriptor)
21329 {
21330 fputs ("\t.globl .", data->file);
21331 RS6000_OUTPUT_BASENAME (data->file, buffer);
21332 putc ('\n', data->file);
21333 }
21334 fputs ("\t.globl ", data->file);
21335 assemble_name (data->file, buffer);
21336 putc ('\n', data->file);
21337 }
21338 #ifdef ASM_WEAKEN_DECL
21339 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
21340 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
21341 #endif
21342 }
21343 else
21344 {
21345 if (dollar_inside)
21346 {
21347 if (data->function_descriptor)
21348 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
21349 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
21350 }
21351 if (data->function_descriptor)
21352 {
21353 fputs ("\t.lglobl .", data->file);
21354 RS6000_OUTPUT_BASENAME (data->file, buffer);
21355 putc ('\n', data->file);
21356 }
21357 fputs ("\t.lglobl ", data->file);
21358 assemble_name (data->file, buffer);
21359 putc ('\n', data->file);
21360 }
21361 if (data->function_descriptor)
21362 putc ('.', data->file);
21363 ASM_OUTPUT_LABEL (data->file, buffer);
21364 return false;
21365 }
21366
21367
21368 #ifdef HAVE_GAS_HIDDEN
21369 /* Helper function to calculate visibility of a DECL
21370 and return the value as a const string. */
21371
21372 static const char *
21373 rs6000_xcoff_visibility (tree decl)
21374 {
21375 static const char * const visibility_types[] = {
21376 "", ",protected", ",hidden", ",internal"
21377 };
21378
21379 enum symbol_visibility vis = DECL_VISIBILITY (decl);
21380 return visibility_types[vis];
21381 }
21382 #endif
21383
21384
21385 /* This macro produces the initial definition of a function name.
21386 On the RS/6000, we need to place an extra '.' in the function name and
21387 output the function descriptor.
21388 Dollar signs are converted to underscores.
21389
21390 The csect for the function will have already been created when
21391 text_section was selected. We do have to go back to that csect, however.
21392
21393 The third and fourth parameters to the .function pseudo-op (16 and 044)
21394 are placeholders which no longer have any use.
21395
21396 Because AIX assembler's .set command has unexpected semantics, we output
21397 all aliases as alternative labels in front of the definition. */
21398
21399 void
21400 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
21401 {
21402 char *buffer = (char *) alloca (strlen (name) + 1);
21403 char *p;
21404 int dollar_inside = 0;
21405 struct declare_alias_data data = {file, false};
21406
21407 strcpy (buffer, name);
21408 p = strchr (buffer, '$');
21409 while (p) {
21410 *p = '_';
21411 dollar_inside++;
21412 p = strchr (p + 1, '$');
21413 }
21414 if (TREE_PUBLIC (decl))
21415 {
21416 if (!RS6000_WEAK || !DECL_WEAK (decl))
21417 {
21418 if (dollar_inside) {
21419 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
21420 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
21421 }
21422 fputs ("\t.globl .", file);
21423 RS6000_OUTPUT_BASENAME (file, buffer);
21424 #ifdef HAVE_GAS_HIDDEN
21425 fputs (rs6000_xcoff_visibility (decl), file);
21426 #endif
21427 putc ('\n', file);
21428 }
21429 }
21430 else
21431 {
21432 if (dollar_inside) {
21433 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
21434 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
21435 }
21436 fputs ("\t.lglobl .", file);
21437 RS6000_OUTPUT_BASENAME (file, buffer);
21438 putc ('\n', file);
21439 }
21440
21441 fputs ("\t.csect ", file);
21442 assemble_name (file, buffer);
21443 fputs (TARGET_32BIT ? "\n" : ",3\n", file);
21444
21445 ASM_OUTPUT_LABEL (file, buffer);
21446
21447 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
21448 &data, true);
21449 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
21450 RS6000_OUTPUT_BASENAME (file, buffer);
21451 fputs (", TOC[tc0], 0\n", file);
21452
21453 in_section = NULL;
21454 switch_to_section (function_section (decl));
21455 putc ('.', file);
21456 ASM_OUTPUT_LABEL (file, buffer);
21457
21458 data.function_descriptor = true;
21459 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
21460 &data, true);
21461 if (!DECL_IGNORED_P (decl))
21462 {
21463 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
21464 xcoffout_declare_function (file, decl, buffer);
21465 else if (dwarf_debuginfo_p ())
21466 {
21467 name = (*targetm.strip_name_encoding) (name);
21468 fprintf (file, "\t.function .%s,.%s,2,0\n", name, name);
21469 }
21470 }
21471 return;
21472 }
21473
21474
21475 /* Output assembly language to globalize a symbol from a DECL,
21476 possibly with visibility. */
21477
21478 void
21479 rs6000_xcoff_asm_globalize_decl_name (FILE *stream, tree decl)
21480 {
21481 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
21482 fputs (GLOBAL_ASM_OP, stream);
21483 assemble_name (stream, name);
21484 #ifdef HAVE_GAS_HIDDEN
21485 fputs (rs6000_xcoff_visibility (decl), stream);
21486 #endif
21487 putc ('\n', stream);
21488 }
21489
21490 /* Output assembly language to define a symbol as COMMON from a DECL,
21491 possibly with visibility. */
21492
21493 void
21494 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream,
21495 tree decl ATTRIBUTE_UNUSED,
21496 const char *name,
21497 unsigned HOST_WIDE_INT size,
21498 unsigned int align)
21499 {
21500 unsigned int align2 = 2;
21501
21502 if (align == 0)
21503 align = DATA_ABI_ALIGNMENT (TREE_TYPE (decl), DECL_ALIGN (decl));
21504
21505 if (align > 32)
21506 align2 = floor_log2 (align / BITS_PER_UNIT);
21507 else if (size > 4)
21508 align2 = 3;
21509
21510 if (! DECL_COMMON (decl))
21511 {
21512 /* Forget section. */
21513 in_section = NULL;
21514
21515 /* Globalize TLS BSS. */
21516 if (TREE_PUBLIC (decl) && DECL_THREAD_LOCAL_P (decl))
21517 {
21518 fputs (GLOBAL_ASM_OP, stream);
21519 assemble_name (stream, name);
21520 fputc ('\n', stream);
21521 }
21522
21523 /* Switch to section and skip space. */
21524 fputs ("\t.csect ", stream);
21525 assemble_name (stream, name);
21526 fprintf (stream, ",%u\n", align2);
21527 ASM_DECLARE_OBJECT_NAME (stream, name, decl);
21528 ASM_OUTPUT_SKIP (stream, size ? size : 1);
21529 return;
21530 }
21531
21532 if (TREE_PUBLIC (decl))
21533 {
21534 fprintf (stream,
21535 "\t.comm %s," HOST_WIDE_INT_PRINT_UNSIGNED ",%u" ,
21536 name, size, align2);
21537
21538 #ifdef HAVE_GAS_HIDDEN
21539 if (decl != NULL)
21540 fputs (rs6000_xcoff_visibility (decl), stream);
21541 #endif
21542 putc ('\n', stream);
21543 }
21544 else
21545 fprintf (stream,
21546 "\t.lcomm %s," HOST_WIDE_INT_PRINT_UNSIGNED ",%s,%u\n",
21547 (*targetm.strip_name_encoding) (name), size, name, align2);
21548 }
21549
21550 /* This macro produces the initial definition of a object (variable) name.
21551 Because AIX assembler's .set command has unexpected semantics, we output
21552 all aliases as alternative labels in front of the definition. */
21553
21554 void
21555 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
21556 {
21557 struct declare_alias_data data = {file, false};
21558 ASM_OUTPUT_LABEL (file, name);
21559 symtab_node::get_create (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
21560 &data, true);
21561 }
21562
21563 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
21564
21565 void
21566 rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label)
21567 {
21568 fputs (integer_asm_op (size, FALSE), file);
21569 assemble_name (file, label);
21570 fputs ("-$", file);
21571 }
21572
21573 /* Output a symbol offset relative to the dbase for the current object.
21574 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
21575 signed offsets.
21576
21577 __gcc_unwind_dbase is embedded in all executables/libraries through
21578 libgcc/config/rs6000/crtdbase.S. */
21579
21580 void
21581 rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label)
21582 {
21583 fputs (integer_asm_op (size, FALSE), file);
21584 assemble_name (file, label);
21585 fputs("-__gcc_unwind_dbase", file);
21586 }
21587
21588 #ifdef HAVE_AS_TLS
21589 static void
21590 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
21591 {
21592 rtx symbol;
21593 int flags;
21594 const char *symname;
21595
21596 default_encode_section_info (decl, rtl, first);
21597
21598 /* Careful not to prod global register variables. */
21599 if (!MEM_P (rtl))
21600 return;
21601 symbol = XEXP (rtl, 0);
21602 if (!SYMBOL_REF_P (symbol))
21603 return;
21604
21605 flags = SYMBOL_REF_FLAGS (symbol);
21606
21607 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
21608 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
21609
21610 SYMBOL_REF_FLAGS (symbol) = flags;
21611
21612 symname = XSTR (symbol, 0);
21613
21614 /* Append CSECT mapping class, unless the symbol already is qualified.
21615 Aliases are implemented as labels, so the symbol name should not add
21616 a mapping class. */
21617 if (decl
21618 && DECL_P (decl)
21619 && VAR_OR_FUNCTION_DECL_P (decl)
21620 && (symtab_node::get (decl) == NULL
21621 || symtab_node::get (decl)->alias == 0)
21622 && symname[strlen (symname) - 1] != ']')
21623 {
21624 const char *smclass = NULL;
21625
21626 if (TREE_CODE (decl) == FUNCTION_DECL)
21627 smclass = "[DS]";
21628 else if (DECL_THREAD_LOCAL_P (decl))
21629 {
21630 if (bss_initializer_p (decl))
21631 smclass = "[UL]";
21632 else if (flag_data_sections)
21633 smclass = "[TL]";
21634 }
21635 else if (DECL_EXTERNAL (decl))
21636 smclass = "[UA]";
21637 else if (bss_initializer_p (decl))
21638 smclass = "[BS]";
21639 else if (flag_data_sections)
21640 {
21641 /* This must exactly match the logic of select section. */
21642 if (decl_readonly_section (decl, compute_reloc_for_var (decl)))
21643 smclass = "[RO]";
21644 else
21645 smclass = "[RW]";
21646 }
21647
21648 if (smclass != NULL)
21649 {
21650 char *newname = XALLOCAVEC (char, strlen (symname) + 5);
21651
21652 strcpy (newname, symname);
21653 strcat (newname, smclass);
21654 XSTR (symbol, 0) = ggc_strdup (newname);
21655 }
21656 }
21657 }
21658 #endif /* HAVE_AS_TLS */
21659 #endif /* TARGET_XCOFF */
21660
21661 void
21662 rs6000_asm_weaken_decl (FILE *stream, tree decl,
21663 const char *name, const char *val)
21664 {
21665 fputs ("\t.weak\t", stream);
21666 assemble_name (stream, name);
21667 if (decl && TREE_CODE (decl) == FUNCTION_DECL
21668 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
21669 {
21670 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
21671 if (TARGET_XCOFF)
21672 fputs (rs6000_xcoff_visibility (decl), stream);
21673 #endif
21674 fputs ("\n\t.weak\t.", stream);
21675 RS6000_OUTPUT_BASENAME (stream, name);
21676 }
21677 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
21678 if (TARGET_XCOFF)
21679 fputs (rs6000_xcoff_visibility (decl), stream);
21680 #endif
21681 fputc ('\n', stream);
21682
21683 if (val)
21684 {
21685 #ifdef ASM_OUTPUT_DEF
21686 ASM_OUTPUT_DEF (stream, name, val);
21687 #endif
21688 if (decl && TREE_CODE (decl) == FUNCTION_DECL
21689 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
21690 {
21691 fputs ("\t.set\t.", stream);
21692 RS6000_OUTPUT_BASENAME (stream, name);
21693 fputs (",.", stream);
21694 RS6000_OUTPUT_BASENAME (stream, val);
21695 fputc ('\n', stream);
21696 }
21697 }
21698 }
21699
21700
21701 /* Return true if INSN should not be copied. */
21702
21703 static bool
21704 rs6000_cannot_copy_insn_p (rtx_insn *insn)
21705 {
21706 return recog_memoized (insn) >= 0
21707 && get_attr_cannot_copy (insn);
21708 }
21709
21710 /* Compute a (partial) cost for rtx X. Return true if the complete
21711 cost has been computed, and false if subexpressions should be
21712 scanned. In either case, *TOTAL contains the cost result. */
21713
21714 static bool
21715 rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
21716 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
21717 {
21718 int code = GET_CODE (x);
21719
21720 switch (code)
21721 {
21722 /* On the RS/6000, if it is valid in the insn, it is free. */
21723 case CONST_INT:
21724 if (((outer_code == SET
21725 || outer_code == PLUS
21726 || outer_code == MINUS)
21727 && (satisfies_constraint_I (x)
21728 || satisfies_constraint_L (x)))
21729 || (outer_code == AND
21730 && (satisfies_constraint_K (x)
21731 || (mode == SImode
21732 ? satisfies_constraint_L (x)
21733 : satisfies_constraint_J (x))))
21734 || ((outer_code == IOR || outer_code == XOR)
21735 && (satisfies_constraint_K (x)
21736 || (mode == SImode
21737 ? satisfies_constraint_L (x)
21738 : satisfies_constraint_J (x))))
21739 || outer_code == ASHIFT
21740 || outer_code == ASHIFTRT
21741 || outer_code == LSHIFTRT
21742 || outer_code == ROTATE
21743 || outer_code == ROTATERT
21744 || outer_code == ZERO_EXTRACT
21745 || (outer_code == MULT
21746 && satisfies_constraint_I (x))
21747 || ((outer_code == DIV || outer_code == UDIV
21748 || outer_code == MOD || outer_code == UMOD)
21749 && exact_log2 (INTVAL (x)) >= 0)
21750 || (outer_code == COMPARE
21751 && (satisfies_constraint_I (x)
21752 || satisfies_constraint_K (x)))
21753 || ((outer_code == EQ || outer_code == NE)
21754 && (satisfies_constraint_I (x)
21755 || satisfies_constraint_K (x)
21756 || (mode == SImode
21757 ? satisfies_constraint_L (x)
21758 : satisfies_constraint_J (x))))
21759 || (outer_code == GTU
21760 && satisfies_constraint_I (x))
21761 || (outer_code == LTU
21762 && satisfies_constraint_P (x)))
21763 {
21764 *total = 0;
21765 return true;
21766 }
21767 else if ((outer_code == PLUS
21768 && reg_or_add_cint_operand (x, mode))
21769 || (outer_code == MINUS
21770 && reg_or_sub_cint_operand (x, mode))
21771 || ((outer_code == SET
21772 || outer_code == IOR
21773 || outer_code == XOR)
21774 && (INTVAL (x)
21775 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
21776 {
21777 *total = COSTS_N_INSNS (1);
21778 return true;
21779 }
21780 /* FALLTHRU */
21781
21782 case CONST_DOUBLE:
21783 case CONST_WIDE_INT:
21784 case CONST:
21785 case HIGH:
21786 case SYMBOL_REF:
21787 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
21788 return true;
21789
21790 case MEM:
21791 /* When optimizing for size, MEM should be slightly more expensive
21792 than generating address, e.g., (plus (reg) (const)).
21793 L1 cache latency is about two instructions. */
21794 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
21795 if (rs6000_slow_unaligned_access (mode, MEM_ALIGN (x)))
21796 *total += COSTS_N_INSNS (100);
21797 return true;
21798
21799 case LABEL_REF:
21800 *total = 0;
21801 return true;
21802
21803 case PLUS:
21804 case MINUS:
21805 if (FLOAT_MODE_P (mode))
21806 *total = rs6000_cost->fp;
21807 else
21808 *total = COSTS_N_INSNS (1);
21809 return false;
21810
21811 case MULT:
21812 if (CONST_INT_P (XEXP (x, 1))
21813 && satisfies_constraint_I (XEXP (x, 1)))
21814 {
21815 if (INTVAL (XEXP (x, 1)) >= -256
21816 && INTVAL (XEXP (x, 1)) <= 255)
21817 *total = rs6000_cost->mulsi_const9;
21818 else
21819 *total = rs6000_cost->mulsi_const;
21820 }
21821 else if (mode == SFmode)
21822 *total = rs6000_cost->fp;
21823 else if (FLOAT_MODE_P (mode))
21824 *total = rs6000_cost->dmul;
21825 else if (mode == DImode)
21826 *total = rs6000_cost->muldi;
21827 else
21828 *total = rs6000_cost->mulsi;
21829 return false;
21830
21831 case FMA:
21832 if (mode == SFmode)
21833 *total = rs6000_cost->fp;
21834 else
21835 *total = rs6000_cost->dmul;
21836 break;
21837
21838 case DIV:
21839 case MOD:
21840 if (FLOAT_MODE_P (mode))
21841 {
21842 *total = mode == DFmode ? rs6000_cost->ddiv
21843 : rs6000_cost->sdiv;
21844 return false;
21845 }
21846 /* FALLTHRU */
21847
21848 case UDIV:
21849 case UMOD:
21850 if (CONST_INT_P (XEXP (x, 1))
21851 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
21852 {
21853 if (code == DIV || code == MOD)
21854 /* Shift, addze */
21855 *total = COSTS_N_INSNS (2);
21856 else
21857 /* Shift */
21858 *total = COSTS_N_INSNS (1);
21859 }
21860 else
21861 {
21862 if (GET_MODE (XEXP (x, 1)) == DImode)
21863 *total = rs6000_cost->divdi;
21864 else
21865 *total = rs6000_cost->divsi;
21866 }
21867 /* Add in shift and subtract for MOD unless we have a mod instruction. */
21868 if (!TARGET_MODULO && (code == MOD || code == UMOD))
21869 *total += COSTS_N_INSNS (2);
21870 return false;
21871
21872 case CTZ:
21873 *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4);
21874 return false;
21875
21876 case FFS:
21877 *total = COSTS_N_INSNS (4);
21878 return false;
21879
21880 case POPCOUNT:
21881 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
21882 return false;
21883
21884 case PARITY:
21885 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
21886 return false;
21887
21888 case NOT:
21889 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
21890 *total = 0;
21891 else
21892 *total = COSTS_N_INSNS (1);
21893 return false;
21894
21895 case AND:
21896 if (CONST_INT_P (XEXP (x, 1)))
21897 {
21898 rtx left = XEXP (x, 0);
21899 rtx_code left_code = GET_CODE (left);
21900
21901 /* rotate-and-mask: 1 insn. */
21902 if ((left_code == ROTATE
21903 || left_code == ASHIFT
21904 || left_code == LSHIFTRT)
21905 && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
21906 {
21907 *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
21908 if (!CONST_INT_P (XEXP (left, 1)))
21909 *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
21910 *total += COSTS_N_INSNS (1);
21911 return true;
21912 }
21913
21914 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
21915 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
21916 if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
21917 || (val & 0xffff) == val
21918 || (val & 0xffff0000) == val
21919 || ((val & 0xffff) == 0 && mode == SImode))
21920 {
21921 *total = rtx_cost (left, mode, AND, 0, speed);
21922 *total += COSTS_N_INSNS (1);
21923 return true;
21924 }
21925
21926 /* 2 insns. */
21927 if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
21928 {
21929 *total = rtx_cost (left, mode, AND, 0, speed);
21930 *total += COSTS_N_INSNS (2);
21931 return true;
21932 }
21933 }
21934
21935 *total = COSTS_N_INSNS (1);
21936 return false;
21937
21938 case IOR:
21939 /* FIXME */
21940 *total = COSTS_N_INSNS (1);
21941 return true;
21942
21943 case CLZ:
21944 case XOR:
21945 case ZERO_EXTRACT:
21946 *total = COSTS_N_INSNS (1);
21947 return false;
21948
21949 case ASHIFT:
21950 /* The EXTSWSLI instruction is a combined instruction. Don't count both
21951 the sign extend and shift separately within the insn. */
21952 if (TARGET_EXTSWSLI && mode == DImode
21953 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
21954 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
21955 {
21956 *total = 0;
21957 return false;
21958 }
21959 /* fall through */
21960
21961 case ASHIFTRT:
21962 case LSHIFTRT:
21963 case ROTATE:
21964 case ROTATERT:
21965 /* Handle mul_highpart. */
21966 if (outer_code == TRUNCATE
21967 && GET_CODE (XEXP (x, 0)) == MULT)
21968 {
21969 if (mode == DImode)
21970 *total = rs6000_cost->muldi;
21971 else
21972 *total = rs6000_cost->mulsi;
21973 return true;
21974 }
21975 else if (outer_code == AND)
21976 *total = 0;
21977 else
21978 *total = COSTS_N_INSNS (1);
21979 return false;
21980
21981 case SIGN_EXTEND:
21982 case ZERO_EXTEND:
21983 if (MEM_P (XEXP (x, 0)))
21984 *total = 0;
21985 else
21986 *total = COSTS_N_INSNS (1);
21987 return false;
21988
21989 case COMPARE:
21990 case NEG:
21991 case ABS:
21992 if (!FLOAT_MODE_P (mode))
21993 {
21994 *total = COSTS_N_INSNS (1);
21995 return false;
21996 }
21997 /* FALLTHRU */
21998
21999 case FLOAT:
22000 case UNSIGNED_FLOAT:
22001 case FIX:
22002 case UNSIGNED_FIX:
22003 case FLOAT_TRUNCATE:
22004 *total = rs6000_cost->fp;
22005 return false;
22006
22007 case FLOAT_EXTEND:
22008 if (mode == DFmode)
22009 *total = rs6000_cost->sfdf_convert;
22010 else
22011 *total = rs6000_cost->fp;
22012 return false;
22013
22014 case CALL:
22015 case IF_THEN_ELSE:
22016 if (!speed)
22017 {
22018 *total = COSTS_N_INSNS (1);
22019 return true;
22020 }
22021 else if (FLOAT_MODE_P (mode) && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT)
22022 {
22023 *total = rs6000_cost->fp;
22024 return false;
22025 }
22026 break;
22027
22028 case NE:
22029 case EQ:
22030 case GTU:
22031 case LTU:
22032 /* Carry bit requires mode == Pmode.
22033 NEG or PLUS already counted so only add one. */
22034 if (mode == Pmode
22035 && (outer_code == NEG || outer_code == PLUS))
22036 {
22037 *total = COSTS_N_INSNS (1);
22038 return true;
22039 }
22040 /* FALLTHRU */
22041
22042 case GT:
22043 case LT:
22044 case UNORDERED:
22045 if (outer_code == SET)
22046 {
22047 if (XEXP (x, 1) == const0_rtx)
22048 {
22049 *total = COSTS_N_INSNS (2);
22050 return true;
22051 }
22052 else
22053 {
22054 *total = COSTS_N_INSNS (3);
22055 return false;
22056 }
22057 }
22058 /* CC COMPARE. */
22059 if (outer_code == COMPARE)
22060 {
22061 *total = 0;
22062 return true;
22063 }
22064 break;
22065
22066 case UNSPEC:
22067 if (XINT (x, 1) == UNSPECV_MMA_XXSETACCZ)
22068 {
22069 *total = 0;
22070 return true;
22071 }
22072 break;
22073
22074 default:
22075 break;
22076 }
22077
22078 return false;
22079 }
22080
22081 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
22082
22083 static bool
22084 rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
22085 int opno, int *total, bool speed)
22086 {
22087 bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed);
22088
22089 fprintf (stderr,
22090 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
22091 "opno = %d, total = %d, speed = %s, x:\n",
22092 ret ? "complete" : "scan inner",
22093 GET_MODE_NAME (mode),
22094 GET_RTX_NAME (outer_code),
22095 opno,
22096 *total,
22097 speed ? "true" : "false");
22098
22099 debug_rtx (x);
22100
22101 return ret;
22102 }
22103
22104 static int
22105 rs6000_insn_cost (rtx_insn *insn, bool speed)
22106 {
22107 if (recog_memoized (insn) < 0)
22108 return 0;
22109
22110 /* If we are optimizing for size, just use the length. */
22111 if (!speed)
22112 return get_attr_length (insn);
22113
22114 /* Use the cost if provided. */
22115 int cost = get_attr_cost (insn);
22116 if (cost > 0)
22117 return cost;
22118
22119 /* If the insn tells us how many insns there are, use that. Otherwise use
22120 the length/4. Adjust the insn length to remove the extra size that
22121 prefixed instructions take. */
22122 int n = get_attr_num_insns (insn);
22123 if (n == 0)
22124 {
22125 int length = get_attr_length (insn);
22126 if (get_attr_prefixed (insn) == PREFIXED_YES)
22127 {
22128 int adjust = 0;
22129 ADJUST_INSN_LENGTH (insn, adjust);
22130 length -= adjust;
22131 }
22132
22133 n = length / 4;
22134 }
22135
22136 enum attr_type type = get_attr_type (insn);
22137
22138 switch (type)
22139 {
22140 case TYPE_LOAD:
22141 case TYPE_FPLOAD:
22142 case TYPE_VECLOAD:
22143 cost = COSTS_N_INSNS (n + 1);
22144 break;
22145
22146 case TYPE_MUL:
22147 switch (get_attr_size (insn))
22148 {
22149 case SIZE_8:
22150 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const9;
22151 break;
22152 case SIZE_16:
22153 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const;
22154 break;
22155 case SIZE_32:
22156 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi;
22157 break;
22158 case SIZE_64:
22159 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->muldi;
22160 break;
22161 default:
22162 gcc_unreachable ();
22163 }
22164 break;
22165 case TYPE_DIV:
22166 switch (get_attr_size (insn))
22167 {
22168 case SIZE_32:
22169 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divsi;
22170 break;
22171 case SIZE_64:
22172 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divdi;
22173 break;
22174 default:
22175 gcc_unreachable ();
22176 }
22177 break;
22178
22179 case TYPE_FP:
22180 cost = n * rs6000_cost->fp;
22181 break;
22182 case TYPE_DMUL:
22183 cost = n * rs6000_cost->dmul;
22184 break;
22185 case TYPE_SDIV:
22186 cost = n * rs6000_cost->sdiv;
22187 break;
22188 case TYPE_DDIV:
22189 cost = n * rs6000_cost->ddiv;
22190 break;
22191
22192 case TYPE_SYNC:
22193 case TYPE_LOAD_L:
22194 case TYPE_MFCR:
22195 case TYPE_MFCRF:
22196 cost = COSTS_N_INSNS (n + 2);
22197 break;
22198
22199 default:
22200 cost = COSTS_N_INSNS (n);
22201 }
22202
22203 return cost;
22204 }
22205
22206 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
22207
22208 static int
22209 rs6000_debug_address_cost (rtx x, machine_mode mode,
22210 addr_space_t as, bool speed)
22211 {
22212 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
22213
22214 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
22215 ret, speed ? "true" : "false");
22216 debug_rtx (x);
22217
22218 return ret;
22219 }
22220
22221
22222 /* A C expression returning the cost of moving data from a register of class
22223 CLASS1 to one of CLASS2. */
22224
22225 static int
22226 rs6000_register_move_cost (machine_mode mode,
22227 reg_class_t from, reg_class_t to)
22228 {
22229 int ret;
22230 reg_class_t rclass;
22231
22232 if (TARGET_DEBUG_COST)
22233 dbg_cost_ctrl++;
22234
22235 /* If we have VSX, we can easily move between FPR or Altivec registers,
22236 otherwise we can only easily move within classes.
22237 Do this first so we give best-case answers for union classes
22238 containing both gprs and vsx regs. */
22239 HARD_REG_SET to_vsx, from_vsx;
22240 to_vsx = reg_class_contents[to] & reg_class_contents[VSX_REGS];
22241 from_vsx = reg_class_contents[from] & reg_class_contents[VSX_REGS];
22242 if (!hard_reg_set_empty_p (to_vsx)
22243 && !hard_reg_set_empty_p (from_vsx)
22244 && (TARGET_VSX
22245 || hard_reg_set_intersect_p (to_vsx, from_vsx)))
22246 {
22247 int reg = FIRST_FPR_REGNO;
22248 if (TARGET_VSX
22249 || (TEST_HARD_REG_BIT (to_vsx, FIRST_ALTIVEC_REGNO)
22250 && TEST_HARD_REG_BIT (from_vsx, FIRST_ALTIVEC_REGNO)))
22251 reg = FIRST_ALTIVEC_REGNO;
22252 ret = 2 * hard_regno_nregs (reg, mode);
22253 }
22254
22255 /* Moves from/to GENERAL_REGS. */
22256 else if ((rclass = from, reg_classes_intersect_p (to, GENERAL_REGS))
22257 || (rclass = to, reg_classes_intersect_p (from, GENERAL_REGS)))
22258 {
22259 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
22260 {
22261 if (TARGET_DIRECT_MOVE)
22262 {
22263 /* Keep the cost for direct moves above that for within
22264 a register class even if the actual processor cost is
22265 comparable. We do this because a direct move insn
22266 can't be a nop, whereas with ideal register
22267 allocation a move within the same class might turn
22268 out to be a nop. */
22269 if (rs6000_tune == PROCESSOR_POWER9
22270 || rs6000_tune == PROCESSOR_POWER10)
22271 ret = 3 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22272 else
22273 ret = 4 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22274 /* SFmode requires a conversion when moving between gprs
22275 and vsx. */
22276 if (mode == SFmode)
22277 ret += 2;
22278 }
22279 else
22280 ret = (rs6000_memory_move_cost (mode, rclass, false)
22281 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
22282 }
22283
22284 /* It's more expensive to move CR_REGS than CR0_REGS because of the
22285 shift. */
22286 else if (rclass == CR_REGS)
22287 ret = 4;
22288
22289 /* For those processors that have slow LR/CTR moves, make them more
22290 expensive than memory in order to bias spills to memory .*/
22291 else if ((rs6000_tune == PROCESSOR_POWER6
22292 || rs6000_tune == PROCESSOR_POWER7
22293 || rs6000_tune == PROCESSOR_POWER8
22294 || rs6000_tune == PROCESSOR_POWER9)
22295 && reg_class_subset_p (rclass, SPECIAL_REGS))
22296 ret = 6 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22297
22298 else
22299 /* A move will cost one instruction per GPR moved. */
22300 ret = 2 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22301 }
22302
22303 /* Everything else has to go through GENERAL_REGS. */
22304 else
22305 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
22306 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
22307
22308 if (TARGET_DEBUG_COST)
22309 {
22310 if (dbg_cost_ctrl == 1)
22311 fprintf (stderr,
22312 "rs6000_register_move_cost: ret=%d, mode=%s, from=%s, to=%s\n",
22313 ret, GET_MODE_NAME (mode), reg_class_names[from],
22314 reg_class_names[to]);
22315 dbg_cost_ctrl--;
22316 }
22317
22318 return ret;
22319 }
22320
22321 /* A C expressions returning the cost of moving data of MODE from a register to
22322 or from memory. */
22323
22324 static int
22325 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
22326 bool in ATTRIBUTE_UNUSED)
22327 {
22328 int ret;
22329
22330 if (TARGET_DEBUG_COST)
22331 dbg_cost_ctrl++;
22332
22333 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
22334 ret = 4 * hard_regno_nregs (0, mode);
22335 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
22336 || reg_classes_intersect_p (rclass, VSX_REGS)))
22337 ret = 4 * hard_regno_nregs (32, mode);
22338 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
22339 ret = 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO, mode);
22340 else
22341 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
22342
22343 if (TARGET_DEBUG_COST)
22344 {
22345 if (dbg_cost_ctrl == 1)
22346 fprintf (stderr,
22347 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
22348 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
22349 dbg_cost_ctrl--;
22350 }
22351
22352 return ret;
22353 }
22354
22355 /* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
22356
22357 The register allocator chooses GEN_OR_VSX_REGS for the allocno
22358 class if GENERAL_REGS and VSX_REGS cost is lower than the memory
22359 cost. This happens a lot when TARGET_DIRECT_MOVE makes the register
22360 move cost between GENERAL_REGS and VSX_REGS low.
22361
22362 It might seem reasonable to use a union class. After all, if usage
22363 of vsr is low and gpr high, it might make sense to spill gpr to vsr
22364 rather than memory. However, in cases where register pressure of
22365 both is high, like the cactus_adm spec test, allowing
22366 GEN_OR_VSX_REGS as the allocno class results in bad decisions in
22367 the first scheduling pass. This is partly due to an allocno of
22368 GEN_OR_VSX_REGS wrongly contributing to the GENERAL_REGS pressure
22369 class, which gives too high a pressure for GENERAL_REGS and too low
22370 for VSX_REGS. So, force a choice of the subclass here.
22371
22372 The best class is also the union if GENERAL_REGS and VSX_REGS have
22373 the same cost. In that case we do use GEN_OR_VSX_REGS as the
22374 allocno class, since trying to narrow down the class by regno mode
22375 is prone to error. For example, SImode is allowed in VSX regs and
22376 in some cases (eg. gcc.target/powerpc/p9-xxbr-3.c do_bswap32_vect)
22377 it would be wrong to choose an allocno of GENERAL_REGS based on
22378 SImode. */
22379
22380 static reg_class_t
22381 rs6000_ira_change_pseudo_allocno_class (int regno ATTRIBUTE_UNUSED,
22382 reg_class_t allocno_class,
22383 reg_class_t best_class)
22384 {
22385 switch (allocno_class)
22386 {
22387 case GEN_OR_VSX_REGS:
22388 /* best_class must be a subset of allocno_class. */
22389 gcc_checking_assert (best_class == GEN_OR_VSX_REGS
22390 || best_class == GEN_OR_FLOAT_REGS
22391 || best_class == VSX_REGS
22392 || best_class == ALTIVEC_REGS
22393 || best_class == FLOAT_REGS
22394 || best_class == GENERAL_REGS
22395 || best_class == BASE_REGS);
22396 /* Use best_class but choose wider classes when copying from the
22397 wider class to best_class is cheap. This mimics IRA choice
22398 of allocno class. */
22399 if (best_class == BASE_REGS)
22400 return GENERAL_REGS;
22401 if (TARGET_VSX && best_class == FLOAT_REGS)
22402 return VSX_REGS;
22403 return best_class;
22404
22405 case VSX_REGS:
22406 if (best_class == ALTIVEC_REGS)
22407 return ALTIVEC_REGS;
22408
22409 default:
22410 break;
22411 }
22412
22413 return allocno_class;
22414 }
22415
22416 /* Load up a constant. If the mode is a vector mode, splat the value across
22417 all of the vector elements. */
22418
22419 static rtx
22420 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
22421 {
22422 rtx reg;
22423
22424 if (mode == SFmode || mode == DFmode)
22425 {
22426 rtx d = const_double_from_real_value (dconst, mode);
22427 reg = force_reg (mode, d);
22428 }
22429 else if (mode == V4SFmode)
22430 {
22431 rtx d = const_double_from_real_value (dconst, SFmode);
22432 rtvec v = gen_rtvec (4, d, d, d, d);
22433 reg = gen_reg_rtx (mode);
22434 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
22435 }
22436 else if (mode == V2DFmode)
22437 {
22438 rtx d = const_double_from_real_value (dconst, DFmode);
22439 rtvec v = gen_rtvec (2, d, d);
22440 reg = gen_reg_rtx (mode);
22441 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
22442 }
22443 else
22444 gcc_unreachable ();
22445
22446 return reg;
22447 }
22448
22449 /* Generate an FMA instruction. */
22450
22451 static void
22452 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
22453 {
22454 machine_mode mode = GET_MODE (target);
22455 rtx dst;
22456
22457 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
22458 gcc_assert (dst != NULL);
22459
22460 if (dst != target)
22461 emit_move_insn (target, dst);
22462 }
22463
22464 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
22465
22466 static void
22467 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
22468 {
22469 machine_mode mode = GET_MODE (dst);
22470 rtx r;
22471
22472 /* This is a tad more complicated, since the fnma_optab is for
22473 a different expression: fma(-m1, m2, a), which is the same
22474 thing except in the case of signed zeros.
22475
22476 Fortunately we know that if FMA is supported that FNMSUB is
22477 also supported in the ISA. Just expand it directly. */
22478
22479 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
22480
22481 r = gen_rtx_NEG (mode, a);
22482 r = gen_rtx_FMA (mode, m1, m2, r);
22483 r = gen_rtx_NEG (mode, r);
22484 emit_insn (gen_rtx_SET (dst, r));
22485 }
22486
22487 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
22488 add a reg_note saying that this was a division. Support both scalar and
22489 vector divide. Assumes no trapping math and finite arguments. */
22490
22491 void
22492 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
22493 {
22494 machine_mode mode = GET_MODE (dst);
22495 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
22496 int i;
22497
22498 /* Low precision estimates guarantee 5 bits of accuracy. High
22499 precision estimates guarantee 14 bits of accuracy. SFmode
22500 requires 23 bits of accuracy. DFmode requires 52 bits of
22501 accuracy. Each pass at least doubles the accuracy, leading
22502 to the following. */
22503 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
22504 if (mode == DFmode || mode == V2DFmode)
22505 passes++;
22506
22507 enum insn_code code = optab_handler (smul_optab, mode);
22508 insn_gen_fn gen_mul = GEN_FCN (code);
22509
22510 gcc_assert (code != CODE_FOR_nothing);
22511
22512 one = rs6000_load_constant_and_splat (mode, dconst1);
22513
22514 /* x0 = 1./d estimate */
22515 x0 = gen_reg_rtx (mode);
22516 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
22517 UNSPEC_FRES)));
22518
22519 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
22520 if (passes > 1) {
22521
22522 /* e0 = 1. - d * x0 */
22523 e0 = gen_reg_rtx (mode);
22524 rs6000_emit_nmsub (e0, d, x0, one);
22525
22526 /* x1 = x0 + e0 * x0 */
22527 x1 = gen_reg_rtx (mode);
22528 rs6000_emit_madd (x1, e0, x0, x0);
22529
22530 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
22531 ++i, xprev = xnext, eprev = enext) {
22532
22533 /* enext = eprev * eprev */
22534 enext = gen_reg_rtx (mode);
22535 emit_insn (gen_mul (enext, eprev, eprev));
22536
22537 /* xnext = xprev + enext * xprev */
22538 xnext = gen_reg_rtx (mode);
22539 rs6000_emit_madd (xnext, enext, xprev, xprev);
22540 }
22541
22542 } else
22543 xprev = x0;
22544
22545 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
22546
22547 /* u = n * xprev */
22548 u = gen_reg_rtx (mode);
22549 emit_insn (gen_mul (u, n, xprev));
22550
22551 /* v = n - (d * u) */
22552 v = gen_reg_rtx (mode);
22553 rs6000_emit_nmsub (v, d, u, n);
22554
22555 /* dst = (v * xprev) + u */
22556 rs6000_emit_madd (dst, v, xprev, u);
22557
22558 if (note_p)
22559 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
22560 }
22561
22562 /* Goldschmidt's Algorithm for single/double-precision floating point
22563 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
22564
22565 void
22566 rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
22567 {
22568 machine_mode mode = GET_MODE (src);
22569 rtx e = gen_reg_rtx (mode);
22570 rtx g = gen_reg_rtx (mode);
22571 rtx h = gen_reg_rtx (mode);
22572
22573 /* Low precision estimates guarantee 5 bits of accuracy. High
22574 precision estimates guarantee 14 bits of accuracy. SFmode
22575 requires 23 bits of accuracy. DFmode requires 52 bits of
22576 accuracy. Each pass at least doubles the accuracy, leading
22577 to the following. */
22578 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
22579 if (mode == DFmode || mode == V2DFmode)
22580 passes++;
22581
22582 int i;
22583 rtx mhalf;
22584 enum insn_code code = optab_handler (smul_optab, mode);
22585 insn_gen_fn gen_mul = GEN_FCN (code);
22586
22587 gcc_assert (code != CODE_FOR_nothing);
22588
22589 mhalf = rs6000_load_constant_and_splat (mode, dconsthalf);
22590
22591 /* e = rsqrt estimate */
22592 emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
22593 UNSPEC_RSQRT)));
22594
22595 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
22596 if (!recip)
22597 {
22598 rtx zero = force_reg (mode, CONST0_RTX (mode));
22599
22600 if (mode == SFmode)
22601 {
22602 rtx target = emit_conditional_move (e, { GT, src, zero, mode },
22603 e, zero, mode, 0);
22604 if (target != e)
22605 emit_move_insn (e, target);
22606 }
22607 else
22608 {
22609 rtx cond = gen_rtx_GT (VOIDmode, e, zero);
22610 rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero);
22611 }
22612 }
22613
22614 /* g = sqrt estimate. */
22615 emit_insn (gen_mul (g, e, src));
22616 /* h = 1/(2*sqrt) estimate. */
22617 emit_insn (gen_mul (h, e, mhalf));
22618
22619 if (recip)
22620 {
22621 if (passes == 1)
22622 {
22623 rtx t = gen_reg_rtx (mode);
22624 rs6000_emit_nmsub (t, g, h, mhalf);
22625 /* Apply correction directly to 1/rsqrt estimate. */
22626 rs6000_emit_madd (dst, e, t, e);
22627 }
22628 else
22629 {
22630 for (i = 0; i < passes; i++)
22631 {
22632 rtx t1 = gen_reg_rtx (mode);
22633 rtx g1 = gen_reg_rtx (mode);
22634 rtx h1 = gen_reg_rtx (mode);
22635
22636 rs6000_emit_nmsub (t1, g, h, mhalf);
22637 rs6000_emit_madd (g1, g, t1, g);
22638 rs6000_emit_madd (h1, h, t1, h);
22639
22640 g = g1;
22641 h = h1;
22642 }
22643 /* Multiply by 2 for 1/rsqrt. */
22644 emit_insn (gen_add3_insn (dst, h, h));
22645 }
22646 }
22647 else
22648 {
22649 rtx t = gen_reg_rtx (mode);
22650 rs6000_emit_nmsub (t, g, h, mhalf);
22651 rs6000_emit_madd (dst, g, t, g);
22652 }
22653
22654 return;
22655 }
22656
22657 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
22658 (Power7) targets. DST is the target, and SRC is the argument operand. */
22659
22660 void
22661 rs6000_emit_popcount (rtx dst, rtx src)
22662 {
22663 machine_mode mode = GET_MODE (dst);
22664 rtx tmp1, tmp2;
22665
22666 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
22667 if (TARGET_POPCNTD)
22668 {
22669 if (mode == SImode)
22670 emit_insn (gen_popcntdsi2 (dst, src));
22671 else
22672 emit_insn (gen_popcntddi2 (dst, src));
22673 return;
22674 }
22675
22676 tmp1 = gen_reg_rtx (mode);
22677
22678 if (mode == SImode)
22679 {
22680 emit_insn (gen_popcntbsi2 (tmp1, src));
22681 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
22682 NULL_RTX, 0);
22683 tmp2 = force_reg (SImode, tmp2);
22684 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
22685 }
22686 else
22687 {
22688 emit_insn (gen_popcntbdi2 (tmp1, src));
22689 tmp2 = expand_mult (DImode, tmp1,
22690 GEN_INT ((HOST_WIDE_INT)
22691 0x01010101 << 32 | 0x01010101),
22692 NULL_RTX, 0);
22693 tmp2 = force_reg (DImode, tmp2);
22694 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
22695 }
22696 }
22697
22698
22699 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
22700 target, and SRC is the argument operand. */
22701
22702 void
22703 rs6000_emit_parity (rtx dst, rtx src)
22704 {
22705 machine_mode mode = GET_MODE (dst);
22706 rtx tmp;
22707
22708 tmp = gen_reg_rtx (mode);
22709
22710 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
22711 if (TARGET_CMPB)
22712 {
22713 if (mode == SImode)
22714 {
22715 emit_insn (gen_popcntbsi2 (tmp, src));
22716 emit_insn (gen_paritysi2_cmpb (dst, tmp));
22717 }
22718 else
22719 {
22720 emit_insn (gen_popcntbdi2 (tmp, src));
22721 emit_insn (gen_paritydi2_cmpb (dst, tmp));
22722 }
22723 return;
22724 }
22725
22726 if (mode == SImode)
22727 {
22728 /* Is mult+shift >= shift+xor+shift+xor? */
22729 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
22730 {
22731 rtx tmp1, tmp2, tmp3, tmp4;
22732
22733 tmp1 = gen_reg_rtx (SImode);
22734 emit_insn (gen_popcntbsi2 (tmp1, src));
22735
22736 tmp2 = gen_reg_rtx (SImode);
22737 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
22738 tmp3 = gen_reg_rtx (SImode);
22739 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
22740
22741 tmp4 = gen_reg_rtx (SImode);
22742 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
22743 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
22744 }
22745 else
22746 rs6000_emit_popcount (tmp, src);
22747 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
22748 }
22749 else
22750 {
22751 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
22752 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
22753 {
22754 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
22755
22756 tmp1 = gen_reg_rtx (DImode);
22757 emit_insn (gen_popcntbdi2 (tmp1, src));
22758
22759 tmp2 = gen_reg_rtx (DImode);
22760 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
22761 tmp3 = gen_reg_rtx (DImode);
22762 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
22763
22764 tmp4 = gen_reg_rtx (DImode);
22765 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
22766 tmp5 = gen_reg_rtx (DImode);
22767 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
22768
22769 tmp6 = gen_reg_rtx (DImode);
22770 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
22771 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
22772 }
22773 else
22774 rs6000_emit_popcount (tmp, src);
22775 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
22776 }
22777 }
22778
22779 /* Expand an Altivec constant permutation for little endian mode.
22780 OP0 and OP1 are the input vectors and TARGET is the output vector.
22781 SEL specifies the constant permutation vector.
22782
22783 There are two issues: First, the two input operands must be
22784 swapped so that together they form a double-wide array in LE
22785 order. Second, the vperm instruction has surprising behavior
22786 in LE mode: it interprets the elements of the source vectors
22787 in BE mode ("left to right") and interprets the elements of
22788 the destination vector in LE mode ("right to left"). To
22789 correct for this, we must subtract each element of the permute
22790 control vector from 31.
22791
22792 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
22793 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
22794 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
22795 serve as the permute control vector. Then, in BE mode,
22796
22797 vperm 9,10,11,12
22798
22799 places the desired result in vr9. However, in LE mode the
22800 vector contents will be
22801
22802 vr10 = 00000003 00000002 00000001 00000000
22803 vr11 = 00000007 00000006 00000005 00000004
22804
22805 The result of the vperm using the same permute control vector is
22806
22807 vr9 = 05000000 07000000 01000000 03000000
22808
22809 That is, the leftmost 4 bytes of vr10 are interpreted as the
22810 source for the rightmost 4 bytes of vr9, and so on.
22811
22812 If we change the permute control vector to
22813
22814 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
22815
22816 and issue
22817
22818 vperm 9,11,10,12
22819
22820 we get the desired
22821
22822 vr9 = 00000006 00000004 00000002 00000000. */
22823
22824 static void
22825 altivec_expand_vec_perm_const_le (rtx target, rtx op0, rtx op1,
22826 const vec_perm_indices &sel)
22827 {
22828 unsigned int i;
22829 rtx perm[16];
22830 rtx constv, unspec;
22831
22832 /* Unpack and adjust the constant selector. */
22833 for (i = 0; i < 16; ++i)
22834 {
22835 unsigned int elt = 31 - (sel[i] & 31);
22836 perm[i] = GEN_INT (elt);
22837 }
22838
22839 /* Expand to a permute, swapping the inputs and using the
22840 adjusted selector. */
22841 if (!REG_P (op0))
22842 op0 = force_reg (V16QImode, op0);
22843 if (!REG_P (op1))
22844 op1 = force_reg (V16QImode, op1);
22845
22846 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
22847 constv = force_reg (V16QImode, constv);
22848 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
22849 UNSPEC_VPERM);
22850 if (!REG_P (target))
22851 {
22852 rtx tmp = gen_reg_rtx (V16QImode);
22853 emit_move_insn (tmp, unspec);
22854 unspec = tmp;
22855 }
22856
22857 emit_move_insn (target, unspec);
22858 }
22859
22860 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
22861 permute control vector. But here it's not a constant, so we must
22862 generate a vector NAND or NOR to do the adjustment. */
22863
22864 void
22865 altivec_expand_vec_perm_le (rtx operands[4])
22866 {
22867 rtx notx, iorx, unspec;
22868 rtx target = operands[0];
22869 rtx op0 = operands[1];
22870 rtx op1 = operands[2];
22871 rtx sel = operands[3];
22872 rtx tmp = target;
22873 rtx norreg = gen_reg_rtx (V16QImode);
22874 machine_mode mode = GET_MODE (target);
22875
22876 /* Get everything in regs so the pattern matches. */
22877 if (!REG_P (op0))
22878 op0 = force_reg (mode, op0);
22879 if (!REG_P (op1))
22880 op1 = force_reg (mode, op1);
22881 if (!REG_P (sel))
22882 sel = force_reg (V16QImode, sel);
22883 if (!REG_P (target))
22884 tmp = gen_reg_rtx (mode);
22885
22886 if (TARGET_P9_VECTOR)
22887 {
22888 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, sel),
22889 UNSPEC_VPERMR);
22890 }
22891 else
22892 {
22893 /* Invert the selector with a VNAND if available, else a VNOR.
22894 The VNAND is preferred for future fusion opportunities. */
22895 notx = gen_rtx_NOT (V16QImode, sel);
22896 iorx = (TARGET_P8_VECTOR
22897 ? gen_rtx_IOR (V16QImode, notx, notx)
22898 : gen_rtx_AND (V16QImode, notx, notx));
22899 emit_insn (gen_rtx_SET (norreg, iorx));
22900
22901 /* Permute with operands reversed and adjusted selector. */
22902 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
22903 UNSPEC_VPERM);
22904 }
22905
22906 /* Copy into target, possibly by way of a register. */
22907 if (!REG_P (target))
22908 {
22909 emit_move_insn (tmp, unspec);
22910 unspec = tmp;
22911 }
22912
22913 emit_move_insn (target, unspec);
22914 }
22915
22916 /* Expand an Altivec constant permutation. Return true if we match
22917 an efficient implementation; false to fall back to VPERM.
22918
22919 OP0 and OP1 are the input vectors and TARGET is the output vector.
22920 SEL specifies the constant permutation vector. */
22921
22922 static bool
22923 altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1,
22924 const vec_perm_indices &sel)
22925 {
22926 struct altivec_perm_insn {
22927 HOST_WIDE_INT mask;
22928 enum insn_code impl;
22929 unsigned char perm[16];
22930 };
22931 static const struct altivec_perm_insn patterns[] = {
22932 {OPTION_MASK_ALTIVEC,
22933 CODE_FOR_altivec_vpkuhum_direct,
22934 {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}},
22935 {OPTION_MASK_ALTIVEC,
22936 CODE_FOR_altivec_vpkuwum_direct,
22937 {2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31}},
22938 {OPTION_MASK_ALTIVEC,
22939 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
22940 : CODE_FOR_altivec_vmrglb_direct,
22941 {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}},
22942 {OPTION_MASK_ALTIVEC,
22943 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
22944 : CODE_FOR_altivec_vmrglh_direct,
22945 {0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23}},
22946 {OPTION_MASK_ALTIVEC,
22947 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct_v4si
22948 : CODE_FOR_altivec_vmrglw_direct_v4si,
22949 {0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23}},
22950 {OPTION_MASK_ALTIVEC,
22951 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
22952 : CODE_FOR_altivec_vmrghb_direct,
22953 {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}},
22954 {OPTION_MASK_ALTIVEC,
22955 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
22956 : CODE_FOR_altivec_vmrghh_direct,
22957 {8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31}},
22958 {OPTION_MASK_ALTIVEC,
22959 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct_v4si
22960 : CODE_FOR_altivec_vmrghw_direct_v4si,
22961 {8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31}},
22962 {OPTION_MASK_P8_VECTOR,
22963 BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgew_v4sf_direct
22964 : CODE_FOR_p8_vmrgow_v4sf_direct,
22965 {0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27}},
22966 {OPTION_MASK_P8_VECTOR,
22967 BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgow_v4sf_direct
22968 : CODE_FOR_p8_vmrgew_v4sf_direct,
22969 {4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31}},
22970 {OPTION_MASK_VSX, CODE_FOR_vsx_xxpermdi_v16qi,
22971 {0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23}},
22972 {OPTION_MASK_VSX, CODE_FOR_vsx_xxpermdi_v16qi,
22973 {8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}},
22974 {OPTION_MASK_VSX, CODE_FOR_vsx_xxpermdi_v16qi,
22975 {0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31}},
22976 {OPTION_MASK_VSX, CODE_FOR_vsx_xxpermdi_v16qi,
22977 {8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31}}};
22978
22979 unsigned int i, j, elt, which;
22980 unsigned char perm[16];
22981 rtx x;
22982 bool one_vec;
22983
22984 /* Unpack the constant selector. */
22985 for (i = which = 0; i < 16; ++i)
22986 {
22987 elt = sel[i] & 31;
22988 which |= (elt < 16 ? 1 : 2);
22989 perm[i] = elt;
22990 }
22991
22992 /* Simplify the constant selector based on operands. */
22993 switch (which)
22994 {
22995 default:
22996 gcc_unreachable ();
22997
22998 case 3:
22999 one_vec = false;
23000 if (!rtx_equal_p (op0, op1))
23001 break;
23002 /* FALLTHRU */
23003
23004 case 2:
23005 for (i = 0; i < 16; ++i)
23006 perm[i] &= 15;
23007 op0 = op1;
23008 one_vec = true;
23009 break;
23010
23011 case 1:
23012 op1 = op0;
23013 one_vec = true;
23014 break;
23015 }
23016
23017 /* Look for splat patterns. */
23018 if (one_vec)
23019 {
23020 elt = perm[0];
23021
23022 for (i = 0; i < 16; ++i)
23023 if (perm[i] != elt)
23024 break;
23025 if (i == 16)
23026 {
23027 if (!BYTES_BIG_ENDIAN)
23028 elt = 15 - elt;
23029 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
23030 return true;
23031 }
23032
23033 if (elt % 2 == 0)
23034 {
23035 for (i = 0; i < 16; i += 2)
23036 if (perm[i] != elt || perm[i + 1] != elt + 1)
23037 break;
23038 if (i == 16)
23039 {
23040 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
23041 x = gen_reg_rtx (V8HImode);
23042 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
23043 GEN_INT (field)));
23044 emit_move_insn (target, gen_lowpart (V16QImode, x));
23045 return true;
23046 }
23047 }
23048
23049 if (elt % 4 == 0)
23050 {
23051 for (i = 0; i < 16; i += 4)
23052 if (perm[i] != elt
23053 || perm[i + 1] != elt + 1
23054 || perm[i + 2] != elt + 2
23055 || perm[i + 3] != elt + 3)
23056 break;
23057 if (i == 16)
23058 {
23059 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
23060 x = gen_reg_rtx (V4SImode);
23061 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
23062 GEN_INT (field)));
23063 emit_move_insn (target, gen_lowpart (V16QImode, x));
23064 return true;
23065 }
23066 }
23067 }
23068
23069 /* Look for merge and pack patterns. */
23070 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
23071 {
23072 bool swapped;
23073
23074 if ((patterns[j].mask & rs6000_isa_flags) == 0)
23075 continue;
23076
23077 elt = patterns[j].perm[0];
23078 if (perm[0] == elt)
23079 swapped = false;
23080 else if (perm[0] == elt + 16)
23081 swapped = true;
23082 else
23083 continue;
23084 for (i = 1; i < 16; ++i)
23085 {
23086 elt = patterns[j].perm[i];
23087 if (swapped)
23088 elt = (elt >= 16 ? elt - 16 : elt + 16);
23089 else if (one_vec && elt >= 16)
23090 elt -= 16;
23091 if (perm[i] != elt)
23092 break;
23093 }
23094 if (i == 16)
23095 {
23096 enum insn_code icode = patterns[j].impl;
23097 machine_mode omode = insn_data[icode].operand[0].mode;
23098 machine_mode imode = insn_data[icode].operand[1].mode;
23099
23100 rtx perm_idx = GEN_INT (0);
23101 if (icode == CODE_FOR_vsx_xxpermdi_v16qi)
23102 {
23103 int perm_val = 0;
23104 if (one_vec)
23105 {
23106 if (perm[0] == 8)
23107 perm_val |= 2;
23108 if (perm[8] == 8)
23109 perm_val |= 1;
23110 }
23111 else
23112 {
23113 if (perm[0] != 0)
23114 perm_val |= 2;
23115 if (perm[8] != 16)
23116 perm_val |= 1;
23117 }
23118 perm_idx = GEN_INT (perm_val);
23119 }
23120
23121 /* For little-endian, don't use vpkuwum and vpkuhum if the
23122 underlying vector type is not V4SI and V8HI, respectively.
23123 For example, using vpkuwum with a V8HI picks up the even
23124 halfwords (BE numbering) when the even halfwords (LE
23125 numbering) are what we need. */
23126 if (!BYTES_BIG_ENDIAN
23127 && icode == CODE_FOR_altivec_vpkuwum_direct
23128 && ((REG_P (op0)
23129 && GET_MODE (op0) != V4SImode)
23130 || (SUBREG_P (op0)
23131 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
23132 continue;
23133 if (!BYTES_BIG_ENDIAN
23134 && icode == CODE_FOR_altivec_vpkuhum_direct
23135 && ((REG_P (op0)
23136 && GET_MODE (op0) != V8HImode)
23137 || (SUBREG_P (op0)
23138 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
23139 continue;
23140
23141 /* For little-endian, the two input operands must be swapped
23142 (or swapped back) to ensure proper right-to-left numbering
23143 from 0 to 2N-1. */
23144 if (swapped ^ !BYTES_BIG_ENDIAN
23145 && icode != CODE_FOR_vsx_xxpermdi_v16qi)
23146 std::swap (op0, op1);
23147 if (imode != V16QImode)
23148 {
23149 op0 = gen_lowpart (imode, op0);
23150 op1 = gen_lowpart (imode, op1);
23151 }
23152 if (omode == V16QImode)
23153 x = target;
23154 else
23155 x = gen_reg_rtx (omode);
23156 if (icode == CODE_FOR_vsx_xxpermdi_v16qi)
23157 emit_insn (GEN_FCN (icode) (x, op0, op1, perm_idx));
23158 else
23159 emit_insn (GEN_FCN (icode) (x, op0, op1));
23160 if (omode != V16QImode)
23161 emit_move_insn (target, gen_lowpart (V16QImode, x));
23162 return true;
23163 }
23164 }
23165
23166 if (!BYTES_BIG_ENDIAN)
23167 {
23168 altivec_expand_vec_perm_const_le (target, op0, op1, sel);
23169 return true;
23170 }
23171
23172 return false;
23173 }
23174
23175 /* Expand a VSX Permute Doubleword constant permutation.
23176 Return true if we match an efficient implementation. */
23177
23178 static bool
23179 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
23180 unsigned char perm0, unsigned char perm1)
23181 {
23182 rtx x;
23183
23184 /* If both selectors come from the same operand, fold to single op. */
23185 if ((perm0 & 2) == (perm1 & 2))
23186 {
23187 if (perm0 & 2)
23188 op0 = op1;
23189 else
23190 op1 = op0;
23191 }
23192 /* If both operands are equal, fold to simpler permutation. */
23193 if (rtx_equal_p (op0, op1))
23194 {
23195 perm0 = perm0 & 1;
23196 perm1 = (perm1 & 1) + 2;
23197 }
23198 /* If the first selector comes from the second operand, swap. */
23199 else if (perm0 & 2)
23200 {
23201 if (perm1 & 2)
23202 return false;
23203 perm0 -= 2;
23204 perm1 += 2;
23205 std::swap (op0, op1);
23206 }
23207 /* If the second selector does not come from the second operand, fail. */
23208 else if ((perm1 & 2) == 0)
23209 return false;
23210
23211 /* Success! */
23212 if (target != NULL)
23213 {
23214 machine_mode vmode, dmode;
23215 rtvec v;
23216
23217 vmode = GET_MODE (target);
23218 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
23219 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4).require ();
23220 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
23221 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
23222 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
23223 emit_insn (gen_rtx_SET (target, x));
23224 }
23225 return true;
23226 }
23227
23228 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
23229
23230 static bool
23231 rs6000_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
23232 rtx op1, const vec_perm_indices &sel)
23233 {
23234 bool testing_p = !target;
23235
23236 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
23237 if (TARGET_ALTIVEC && testing_p)
23238 return true;
23239
23240 if (op0)
23241 {
23242 rtx nop0 = force_reg (vmode, op0);
23243 if (op0 == op1)
23244 op1 = nop0;
23245 op0 = nop0;
23246 }
23247 if (op1)
23248 op1 = force_reg (vmode, op1);
23249
23250 /* Check for ps_merge* or xxpermdi insns. */
23251 if ((vmode == V2DFmode || vmode == V2DImode) && VECTOR_MEM_VSX_P (vmode))
23252 {
23253 if (testing_p)
23254 {
23255 op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
23256 op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
23257 }
23258 if (rs6000_expand_vec_perm_const_1 (target, op0, op1, sel[0], sel[1]))
23259 return true;
23260 }
23261
23262 if (TARGET_ALTIVEC)
23263 {
23264 /* Force the target-independent code to lower to V16QImode. */
23265 if (vmode != V16QImode)
23266 return false;
23267 if (altivec_expand_vec_perm_const (target, op0, op1, sel))
23268 return true;
23269 }
23270
23271 return false;
23272 }
23273
23274 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave.
23275 OP0 and OP1 are the input vectors and TARGET is the output vector.
23276 PERM specifies the constant permutation vector. */
23277
23278 static void
23279 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
23280 machine_mode vmode, const vec_perm_builder &perm)
23281 {
23282 rtx x = expand_vec_perm_const (vmode, op0, op1, perm, BLKmode, target);
23283 if (x != target)
23284 emit_move_insn (target, x);
23285 }
23286
23287 /* Expand an extract even operation. */
23288
23289 void
23290 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
23291 {
23292 machine_mode vmode = GET_MODE (target);
23293 unsigned i, nelt = GET_MODE_NUNITS (vmode);
23294 vec_perm_builder perm (nelt, nelt, 1);
23295
23296 for (i = 0; i < nelt; i++)
23297 perm.quick_push (i * 2);
23298
23299 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
23300 }
23301
23302 /* Expand a vector interleave operation. */
23303
23304 void
23305 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
23306 {
23307 machine_mode vmode = GET_MODE (target);
23308 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
23309 vec_perm_builder perm (nelt, nelt, 1);
23310
23311 high = (highp ? 0 : nelt / 2);
23312 for (i = 0; i < nelt / 2; i++)
23313 {
23314 perm.quick_push (i + high);
23315 perm.quick_push (i + nelt + high);
23316 }
23317
23318 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
23319 }
23320
23321 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
23322 void
23323 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
23324 {
23325 HOST_WIDE_INT hwi_scale (scale);
23326 REAL_VALUE_TYPE r_pow;
23327 rtvec v = rtvec_alloc (2);
23328 rtx elt;
23329 rtx scale_vec = gen_reg_rtx (V2DFmode);
23330 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
23331 elt = const_double_from_real_value (r_pow, DFmode);
23332 RTVEC_ELT (v, 0) = elt;
23333 RTVEC_ELT (v, 1) = elt;
23334 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
23335 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
23336 }
23337
23338 /* Return an RTX representing where to find the function value of a
23339 function returning MODE. */
23340 static rtx
23341 rs6000_complex_function_value (machine_mode mode)
23342 {
23343 unsigned int regno;
23344 rtx r1, r2;
23345 machine_mode inner = GET_MODE_INNER (mode);
23346 unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode);
23347
23348 if (TARGET_FLOAT128_TYPE
23349 && (mode == KCmode
23350 || (mode == TCmode && TARGET_IEEEQUAD)))
23351 regno = ALTIVEC_ARG_RETURN;
23352
23353 else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
23354 regno = FP_ARG_RETURN;
23355
23356 else
23357 {
23358 regno = GP_ARG_RETURN;
23359
23360 /* 32-bit is OK since it'll go in r3/r4. */
23361 if (TARGET_32BIT && inner_bytes >= 4)
23362 return gen_rtx_REG (mode, regno);
23363 }
23364
23365 if (inner_bytes >= 8)
23366 return gen_rtx_REG (mode, regno);
23367
23368 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
23369 const0_rtx);
23370 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
23371 GEN_INT (inner_bytes));
23372 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
23373 }
23374
23375 /* Return an rtx describing a return value of MODE as a PARALLEL
23376 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
23377 stride REG_STRIDE. */
23378
23379 static rtx
23380 rs6000_parallel_return (machine_mode mode,
23381 int n_elts, machine_mode elt_mode,
23382 unsigned int regno, unsigned int reg_stride)
23383 {
23384 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
23385
23386 int i;
23387 for (i = 0; i < n_elts; i++)
23388 {
23389 rtx r = gen_rtx_REG (elt_mode, regno);
23390 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
23391 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
23392 regno += reg_stride;
23393 }
23394
23395 return par;
23396 }
23397
23398 /* Target hook for TARGET_FUNCTION_VALUE.
23399
23400 An integer value is in r3 and a floating-point value is in fp1,
23401 unless -msoft-float. */
23402
23403 static rtx
23404 rs6000_function_value (const_tree valtype,
23405 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
23406 bool outgoing ATTRIBUTE_UNUSED)
23407 {
23408 machine_mode mode;
23409 unsigned int regno;
23410 machine_mode elt_mode;
23411 int n_elts;
23412
23413 /* Special handling for structs in darwin64. */
23414 if (TARGET_MACHO
23415 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
23416 {
23417 CUMULATIVE_ARGS valcum;
23418 rtx valret;
23419
23420 valcum.words = 0;
23421 valcum.fregno = FP_ARG_MIN_REG;
23422 valcum.vregno = ALTIVEC_ARG_MIN_REG;
23423 /* Do a trial code generation as if this were going to be passed as
23424 an argument; if any part goes in memory, we return NULL. */
23425 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
23426 if (valret)
23427 return valret;
23428 /* Otherwise fall through to standard ABI rules. */
23429 }
23430
23431 mode = TYPE_MODE (valtype);
23432
23433 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
23434 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
23435 {
23436 int first_reg, n_regs;
23437
23438 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode))
23439 {
23440 /* _Decimal128 must use even/odd register pairs. */
23441 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
23442 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
23443 }
23444 else
23445 {
23446 first_reg = ALTIVEC_ARG_RETURN;
23447 n_regs = 1;
23448 }
23449
23450 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
23451 }
23452
23453 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
23454 if (TARGET_32BIT && TARGET_POWERPC64)
23455 switch (mode)
23456 {
23457 default:
23458 break;
23459 case E_DImode:
23460 case E_SCmode:
23461 case E_DCmode:
23462 case E_TCmode:
23463 int count = GET_MODE_SIZE (mode) / 4;
23464 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
23465 }
23466
23467 if ((INTEGRAL_TYPE_P (valtype)
23468 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
23469 || POINTER_TYPE_P (valtype))
23470 mode = TARGET_32BIT ? SImode : DImode;
23471
23472 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
23473 /* _Decimal128 must use an even/odd register pair. */
23474 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
23475 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT
23476 && !FLOAT128_VECTOR_P (mode))
23477 regno = FP_ARG_RETURN;
23478 else if (TREE_CODE (valtype) == COMPLEX_TYPE
23479 && targetm.calls.split_complex_arg)
23480 return rs6000_complex_function_value (mode);
23481 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
23482 return register is used in both cases, and we won't see V2DImode/V2DFmode
23483 for pure altivec, combine the two cases. */
23484 else if ((TREE_CODE (valtype) == VECTOR_TYPE || VECTOR_ALIGNMENT_P (mode))
23485 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
23486 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
23487 regno = ALTIVEC_ARG_RETURN;
23488 else
23489 regno = GP_ARG_RETURN;
23490
23491 return gen_rtx_REG (mode, regno);
23492 }
23493
23494 /* Define how to find the value returned by a library function
23495 assuming the value has mode MODE. */
23496 rtx
23497 rs6000_libcall_value (machine_mode mode)
23498 {
23499 unsigned int regno;
23500
23501 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
23502 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
23503 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
23504
23505 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
23506 /* _Decimal128 must use an even/odd register pair. */
23507 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
23508 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && TARGET_HARD_FLOAT)
23509 regno = FP_ARG_RETURN;
23510 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
23511 return register is used in both cases, and we won't see V2DImode/V2DFmode
23512 for pure altivec, combine the two cases. */
23513 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
23514 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
23515 regno = ALTIVEC_ARG_RETURN;
23516 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
23517 return rs6000_complex_function_value (mode);
23518 else
23519 regno = GP_ARG_RETURN;
23520
23521 return gen_rtx_REG (mode, regno);
23522 }
23523
23524 /* Compute register pressure classes. We implement the target hook to avoid
23525 IRA picking something like GEN_OR_FLOAT_REGS as a pressure class, which can
23526 lead to incorrect estimates of number of available registers and therefor
23527 increased register pressure/spill. */
23528 static int
23529 rs6000_compute_pressure_classes (enum reg_class *pressure_classes)
23530 {
23531 int n;
23532
23533 n = 0;
23534 pressure_classes[n++] = GENERAL_REGS;
23535 if (TARGET_ALTIVEC)
23536 pressure_classes[n++] = ALTIVEC_REGS;
23537 if (TARGET_VSX)
23538 pressure_classes[n++] = VSX_REGS;
23539 else
23540 {
23541 if (TARGET_HARD_FLOAT)
23542 pressure_classes[n++] = FLOAT_REGS;
23543 }
23544 pressure_classes[n++] = CR_REGS;
23545 pressure_classes[n++] = SPECIAL_REGS;
23546
23547 return n;
23548 }
23549
23550 /* Given FROM and TO register numbers, say whether this elimination is allowed.
23551 Frame pointer elimination is automatically handled.
23552
23553 For the RS/6000, if frame pointer elimination is being done, we would like
23554 to convert ap into fp, not sp.
23555
23556 We need r30 if -mminimal-toc was specified, and there are constant pool
23557 references. */
23558
23559 static bool
23560 rs6000_can_eliminate (const int from, const int to)
23561 {
23562 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
23563 ? ! frame_pointer_needed
23564 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
23565 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC_OR_PCREL
23566 || constant_pool_empty_p ()
23567 : true);
23568 }
23569
23570 /* Define the offset between two registers, FROM to be eliminated and its
23571 replacement TO, at the start of a routine. */
23572 HOST_WIDE_INT
23573 rs6000_initial_elimination_offset (int from, int to)
23574 {
23575 rs6000_stack_t *info = rs6000_stack_info ();
23576 HOST_WIDE_INT offset;
23577
23578 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
23579 offset = info->push_p ? 0 : -info->total_size;
23580 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
23581 {
23582 offset = info->push_p ? 0 : -info->total_size;
23583 if (FRAME_GROWS_DOWNWARD)
23584 offset += info->fixed_size + info->vars_size + info->parm_size;
23585 }
23586 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
23587 offset = FRAME_GROWS_DOWNWARD
23588 ? info->fixed_size + info->vars_size + info->parm_size
23589 : 0;
23590 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
23591 offset = info->total_size;
23592 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
23593 offset = info->push_p ? info->total_size : 0;
23594 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
23595 offset = 0;
23596 else
23597 gcc_unreachable ();
23598
23599 return offset;
23600 }
23601
23602 /* Fill in sizes of registers used by unwinder. */
23603
23604 static void
23605 rs6000_init_dwarf_reg_sizes_extra (tree address)
23606 {
23607 if (TARGET_MACHO && ! TARGET_ALTIVEC)
23608 {
23609 int i;
23610 machine_mode mode = TYPE_MODE (char_type_node);
23611 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
23612 rtx mem = gen_rtx_MEM (BLKmode, addr);
23613 rtx value = gen_int_mode (16, mode);
23614
23615 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
23616 The unwinder still needs to know the size of Altivec registers. */
23617
23618 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
23619 {
23620 int column = DWARF_REG_TO_UNWIND_COLUMN
23621 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
23622 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
23623
23624 emit_move_insn (adjust_address (mem, mode, offset), value);
23625 }
23626 }
23627 }
23628
23629 /* Map internal gcc register numbers to debug format register numbers.
23630 FORMAT specifies the type of debug register number to use:
23631 0 -- debug information, except for frame-related sections
23632 1 -- DWARF .debug_frame section
23633 2 -- DWARF .eh_frame section */
23634
23635 unsigned int
23636 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
23637 {
23638 /* On some platforms, we use the standard DWARF register
23639 numbering for .debug_info and .debug_frame. */
23640 if ((format == 0 && dwarf_debuginfo_p ()) || format == 1)
23641 {
23642 #ifdef RS6000_USE_DWARF_NUMBERING
23643 if (regno <= 31)
23644 return regno;
23645 if (FP_REGNO_P (regno))
23646 return regno - FIRST_FPR_REGNO + 32;
23647 if (ALTIVEC_REGNO_P (regno))
23648 return regno - FIRST_ALTIVEC_REGNO + 1124;
23649 if (regno == LR_REGNO)
23650 return 108;
23651 if (regno == CTR_REGNO)
23652 return 109;
23653 if (regno == CA_REGNO)
23654 return 101; /* XER */
23655 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
23656 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
23657 The actual code emitted saves the whole of CR, so we map CR2_REGNO
23658 to the DWARF reg for CR. */
23659 if (format == 1 && regno == CR2_REGNO)
23660 return 64;
23661 if (CR_REGNO_P (regno))
23662 return regno - CR0_REGNO + 86;
23663 if (regno == VRSAVE_REGNO)
23664 return 356;
23665 if (regno == VSCR_REGNO)
23666 return 67;
23667
23668 /* These do not make much sense. */
23669 if (regno == FRAME_POINTER_REGNUM)
23670 return 111;
23671 if (regno == ARG_POINTER_REGNUM)
23672 return 67;
23673 if (regno == 64)
23674 return 100;
23675
23676 gcc_unreachable ();
23677 #endif
23678 }
23679
23680 /* We use the GCC 7 (and before) internal number for non-DWARF debug
23681 information, and also for .eh_frame. */
23682 /* Translate the regnos to their numbers in GCC 7 (and before). */
23683 if (regno <= 31)
23684 return regno;
23685 if (FP_REGNO_P (regno))
23686 return regno - FIRST_FPR_REGNO + 32;
23687 if (ALTIVEC_REGNO_P (regno))
23688 return regno - FIRST_ALTIVEC_REGNO + 77;
23689 if (regno == LR_REGNO)
23690 return 65;
23691 if (regno == CTR_REGNO)
23692 return 66;
23693 if (regno == CA_REGNO)
23694 return 76; /* XER */
23695 if (CR_REGNO_P (regno))
23696 return regno - CR0_REGNO + 68;
23697 if (regno == VRSAVE_REGNO)
23698 return 109;
23699 if (regno == VSCR_REGNO)
23700 return 110;
23701
23702 if (regno == FRAME_POINTER_REGNUM)
23703 return 111;
23704 if (regno == ARG_POINTER_REGNUM)
23705 return 67;
23706 if (regno == 64)
23707 return 64;
23708
23709 gcc_unreachable ();
23710 }
23711
23712 /* target hook eh_return_filter_mode */
23713 static scalar_int_mode
23714 rs6000_eh_return_filter_mode (void)
23715 {
23716 return TARGET_32BIT ? SImode : word_mode;
23717 }
23718
23719 /* Target hook for translate_mode_attribute. */
23720 static machine_mode
23721 rs6000_translate_mode_attribute (machine_mode mode)
23722 {
23723 if ((FLOAT128_IEEE_P (mode)
23724 && ieee128_float_type_node == long_double_type_node)
23725 || (FLOAT128_IBM_P (mode)
23726 && ibm128_float_type_node == long_double_type_node))
23727 return COMPLEX_MODE_P (mode) ? E_TCmode : E_TFmode;
23728 return mode;
23729 }
23730
23731 /* Target hook for scalar_mode_supported_p. */
23732 static bool
23733 rs6000_scalar_mode_supported_p (scalar_mode mode)
23734 {
23735 /* -m32 does not support TImode. This is the default, from
23736 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
23737 same ABI as for -m32. But default_scalar_mode_supported_p allows
23738 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
23739 for -mpowerpc64. */
23740 if (TARGET_32BIT && mode == TImode)
23741 return false;
23742
23743 if (DECIMAL_FLOAT_MODE_P (mode))
23744 return default_decimal_float_supported_p ();
23745 else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode))
23746 return true;
23747 else
23748 return default_scalar_mode_supported_p (mode);
23749 }
23750
23751 /* Target hook for libgcc_floating_mode_supported_p. */
23752
23753 static bool
23754 rs6000_libgcc_floating_mode_supported_p (scalar_float_mode mode)
23755 {
23756 switch (mode)
23757 {
23758 case E_SFmode:
23759 case E_DFmode:
23760 case E_TFmode:
23761 return true;
23762
23763 /* We only return true for KFmode if IEEE 128-bit types are supported, and
23764 if long double does not use the IEEE 128-bit format. If long double
23765 uses the IEEE 128-bit format, it will use TFmode and not KFmode.
23766 Because the code will not use KFmode in that case, there will be aborts
23767 because it can't find KFmode in the Floatn types. */
23768 case E_KFmode:
23769 return TARGET_FLOAT128_TYPE && !TARGET_IEEEQUAD;
23770
23771 default:
23772 return false;
23773 }
23774 }
23775
23776 /* Target hook for vector_mode_supported_p. */
23777 static bool
23778 rs6000_vector_mode_supported_p (machine_mode mode)
23779 {
23780 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
23781 128-bit, the compiler might try to widen IEEE 128-bit to IBM
23782 double-double. */
23783 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode))
23784 return true;
23785
23786 else
23787 return false;
23788 }
23789
23790 /* Target hook for floatn_mode. */
23791 static opt_scalar_float_mode
23792 rs6000_floatn_mode (int n, bool extended)
23793 {
23794 if (extended)
23795 {
23796 switch (n)
23797 {
23798 case 32:
23799 return DFmode;
23800
23801 case 64:
23802 if (TARGET_FLOAT128_TYPE)
23803 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
23804 else
23805 return opt_scalar_float_mode ();
23806
23807 case 128:
23808 return opt_scalar_float_mode ();
23809
23810 default:
23811 /* Those are the only valid _FloatNx types. */
23812 gcc_unreachable ();
23813 }
23814 }
23815 else
23816 {
23817 switch (n)
23818 {
23819 case 32:
23820 return SFmode;
23821
23822 case 64:
23823 return DFmode;
23824
23825 case 128:
23826 if (TARGET_FLOAT128_TYPE)
23827 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
23828 else
23829 return opt_scalar_float_mode ();
23830
23831 default:
23832 return opt_scalar_float_mode ();
23833 }
23834 }
23835
23836 }
23837
23838 /* Target hook for c_mode_for_suffix. */
23839 static machine_mode
23840 rs6000_c_mode_for_suffix (char suffix)
23841 {
23842 if (TARGET_FLOAT128_TYPE)
23843 {
23844 if (suffix == 'q' || suffix == 'Q')
23845 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
23846
23847 /* At the moment, we are not defining a suffix for IBM extended double.
23848 If/when the default for -mabi=ieeelongdouble is changed, and we want
23849 to support __ibm128 constants in legacy library code, we may need to
23850 re-evalaute this decision. Currently, c-lex.cc only supports 'w' and
23851 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
23852 __float80 constants. */
23853 }
23854
23855 return VOIDmode;
23856 }
23857
23858 /* Target hook for invalid_arg_for_unprototyped_fn. */
23859 static const char *
23860 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
23861 {
23862 return (!rs6000_darwin64_abi
23863 && typelist == 0
23864 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
23865 && (funcdecl == NULL_TREE
23866 || (TREE_CODE (funcdecl) == FUNCTION_DECL
23867 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
23868 ? N_("AltiVec argument passed to unprototyped function")
23869 : NULL;
23870 }
23871
23872 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
23873 setup by using __stack_chk_fail_local hidden function instead of
23874 calling __stack_chk_fail directly. Otherwise it is better to call
23875 __stack_chk_fail directly. */
23876
23877 static tree ATTRIBUTE_UNUSED
23878 rs6000_stack_protect_fail (void)
23879 {
23880 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
23881 ? default_hidden_stack_protect_fail ()
23882 : default_external_stack_protect_fail ();
23883 }
23884
23885 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
23886
23887 #if TARGET_ELF
23888 static unsigned HOST_WIDE_INT
23889 rs6000_asan_shadow_offset (void)
23890 {
23891 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
23892 }
23893 #endif
23894 \f
23895 /* Mask options that we want to support inside of attribute((target)) and
23896 #pragma GCC target operations. Note, we do not include things like
23897 64/32-bit, endianness, hard/soft floating point, etc. that would have
23898 different calling sequences. */
23899
23900 struct rs6000_opt_mask {
23901 const char *name; /* option name */
23902 HOST_WIDE_INT mask; /* mask to set */
23903 bool invert; /* invert sense of mask */
23904 bool valid_target; /* option is a target option */
23905 };
23906
23907 static struct rs6000_opt_mask const rs6000_opt_masks[] =
23908 {
23909 { "altivec", OPTION_MASK_ALTIVEC, false, true },
23910 { "block-ops-unaligned-vsx", OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX,
23911 false, true },
23912 { "block-ops-vector-pair", OPTION_MASK_BLOCK_OPS_VECTOR_PAIR,
23913 false, true },
23914 { "cmpb", OPTION_MASK_CMPB, false, true },
23915 { "crypto", OPTION_MASK_CRYPTO, false, true },
23916 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
23917 { "dlmzb", OPTION_MASK_DLMZB, false, true },
23918 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
23919 false, true },
23920 { "float128", OPTION_MASK_FLOAT128_KEYWORD, false, true },
23921 { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, true },
23922 { "fprnd", OPTION_MASK_FPRND, false, true },
23923 { "power10", OPTION_MASK_POWER10, false, true },
23924 { "hard-dfp", OPTION_MASK_DFP, false, true },
23925 { "htm", OPTION_MASK_HTM, false, true },
23926 { "isel", OPTION_MASK_ISEL, false, true },
23927 { "mfcrf", OPTION_MASK_MFCRF, false, true },
23928 { "mfpgpr", 0, false, true },
23929 { "mma", OPTION_MASK_MMA, false, true },
23930 { "modulo", OPTION_MASK_MODULO, false, true },
23931 { "mulhw", OPTION_MASK_MULHW, false, true },
23932 { "multiple", OPTION_MASK_MULTIPLE, false, true },
23933 { "pcrel", OPTION_MASK_PCREL, false, true },
23934 { "pcrel-opt", OPTION_MASK_PCREL_OPT, false, true },
23935 { "popcntb", OPTION_MASK_POPCNTB, false, true },
23936 { "popcntd", OPTION_MASK_POPCNTD, false, true },
23937 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
23938 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
23939 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
23940 { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true },
23941 { "power9-misc", OPTION_MASK_P9_MISC, false, true },
23942 { "power9-vector", OPTION_MASK_P9_VECTOR, false, true },
23943 { "power10-fusion", OPTION_MASK_P10_FUSION, false, true },
23944 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
23945 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
23946 { "prefixed", OPTION_MASK_PREFIXED, false, true },
23947 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
23948 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
23949 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
23950 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
23951 { "string", 0, false, true },
23952 { "update", OPTION_MASK_NO_UPDATE, true , true },
23953 { "vsx", OPTION_MASK_VSX, false, true },
23954 #ifdef OPTION_MASK_64BIT
23955 #if TARGET_AIX_OS
23956 { "aix64", OPTION_MASK_64BIT, false, false },
23957 { "aix32", OPTION_MASK_64BIT, true, false },
23958 #else
23959 { "64", OPTION_MASK_64BIT, false, false },
23960 { "32", OPTION_MASK_64BIT, true, false },
23961 #endif
23962 #endif
23963 #ifdef OPTION_MASK_EABI
23964 { "eabi", OPTION_MASK_EABI, false, false },
23965 #endif
23966 #ifdef OPTION_MASK_LITTLE_ENDIAN
23967 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
23968 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
23969 #endif
23970 #ifdef OPTION_MASK_RELOCATABLE
23971 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
23972 #endif
23973 #ifdef OPTION_MASK_STRICT_ALIGN
23974 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
23975 #endif
23976 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
23977 { "string", 0, false, false },
23978 };
23979
23980 /* Builtin mask mapping for printing the flags. */
23981 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
23982 {
23983 { "altivec", RS6000_BTM_ALTIVEC, false, false },
23984 { "vsx", RS6000_BTM_VSX, false, false },
23985 { "fre", RS6000_BTM_FRE, false, false },
23986 { "fres", RS6000_BTM_FRES, false, false },
23987 { "frsqrte", RS6000_BTM_FRSQRTE, false, false },
23988 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
23989 { "popcntd", RS6000_BTM_POPCNTD, false, false },
23990 { "cell", RS6000_BTM_CELL, false, false },
23991 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
23992 { "power9-vector", RS6000_BTM_P9_VECTOR, false, false },
23993 { "power9-misc", RS6000_BTM_P9_MISC, false, false },
23994 { "crypto", RS6000_BTM_CRYPTO, false, false },
23995 { "htm", RS6000_BTM_HTM, false, false },
23996 { "hard-dfp", RS6000_BTM_DFP, false, false },
23997 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
23998 { "long-double-128", RS6000_BTM_LDBL128, false, false },
23999 { "powerpc64", RS6000_BTM_POWERPC64, false, false },
24000 { "float128", RS6000_BTM_FLOAT128, false, false },
24001 { "float128-hw", RS6000_BTM_FLOAT128_HW,false, false },
24002 { "mma", RS6000_BTM_MMA, false, false },
24003 { "power10", RS6000_BTM_P10, false, false },
24004 };
24005
24006 /* Option variables that we want to support inside attribute((target)) and
24007 #pragma GCC target operations. */
24008
24009 struct rs6000_opt_var {
24010 const char *name; /* option name */
24011 size_t global_offset; /* offset of the option in global_options. */
24012 size_t target_offset; /* offset of the option in target options. */
24013 };
24014
24015 static struct rs6000_opt_var const rs6000_opt_vars[] =
24016 {
24017 { "friz",
24018 offsetof (struct gcc_options, x_TARGET_FRIZ),
24019 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
24020 { "avoid-indexed-addresses",
24021 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
24022 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
24023 { "longcall",
24024 offsetof (struct gcc_options, x_rs6000_default_long_calls),
24025 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
24026 { "optimize-swaps",
24027 offsetof (struct gcc_options, x_rs6000_optimize_swaps),
24028 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
24029 { "allow-movmisalign",
24030 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
24031 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
24032 { "sched-groups",
24033 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
24034 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
24035 { "always-hint",
24036 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
24037 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
24038 { "align-branch-targets",
24039 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
24040 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
24041 { "sched-prolog",
24042 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
24043 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
24044 { "sched-epilog",
24045 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
24046 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
24047 { "speculate-indirect-jumps",
24048 offsetof (struct gcc_options, x_rs6000_speculate_indirect_jumps),
24049 offsetof (struct cl_target_option, x_rs6000_speculate_indirect_jumps), },
24050 };
24051
24052 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
24053 parsing. Return true if there were no errors. */
24054
24055 static bool
24056 rs6000_inner_target_options (tree args, bool attr_p)
24057 {
24058 bool ret = true;
24059
24060 if (args == NULL_TREE)
24061 ;
24062
24063 else if (TREE_CODE (args) == STRING_CST)
24064 {
24065 char *p = ASTRDUP (TREE_STRING_POINTER (args));
24066 char *q;
24067
24068 while ((q = strtok (p, ",")) != NULL)
24069 {
24070 bool error_p = false;
24071 bool not_valid_p = false;
24072 const char *cpu_opt = NULL;
24073
24074 p = NULL;
24075 if (startswith (q, "cpu="))
24076 {
24077 int cpu_index = rs6000_cpu_name_lookup (q+4);
24078 if (cpu_index >= 0)
24079 rs6000_cpu_index = cpu_index;
24080 else
24081 {
24082 error_p = true;
24083 cpu_opt = q+4;
24084 }
24085 }
24086 else if (startswith (q, "tune="))
24087 {
24088 int tune_index = rs6000_cpu_name_lookup (q+5);
24089 if (tune_index >= 0)
24090 rs6000_tune_index = tune_index;
24091 else
24092 {
24093 error_p = true;
24094 cpu_opt = q+5;
24095 }
24096 }
24097 else
24098 {
24099 size_t i;
24100 bool invert = false;
24101 char *r = q;
24102
24103 error_p = true;
24104 if (startswith (r, "no-"))
24105 {
24106 invert = true;
24107 r += 3;
24108 }
24109
24110 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
24111 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
24112 {
24113 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
24114
24115 if (!rs6000_opt_masks[i].valid_target)
24116 not_valid_p = true;
24117 else
24118 {
24119 error_p = false;
24120 rs6000_isa_flags_explicit |= mask;
24121
24122 /* VSX needs altivec, so -mvsx automagically sets
24123 altivec and disables -mavoid-indexed-addresses. */
24124 if (!invert)
24125 {
24126 if (mask == OPTION_MASK_VSX)
24127 {
24128 mask |= OPTION_MASK_ALTIVEC;
24129 TARGET_AVOID_XFORM = 0;
24130 }
24131 }
24132
24133 if (rs6000_opt_masks[i].invert)
24134 invert = !invert;
24135
24136 if (invert)
24137 rs6000_isa_flags &= ~mask;
24138 else
24139 rs6000_isa_flags |= mask;
24140 }
24141 break;
24142 }
24143
24144 if (error_p && !not_valid_p)
24145 {
24146 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
24147 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
24148 {
24149 size_t j = rs6000_opt_vars[i].global_offset;
24150 *((int *) ((char *)&global_options + j)) = !invert;
24151 error_p = false;
24152 not_valid_p = false;
24153 break;
24154 }
24155 }
24156 }
24157
24158 if (error_p)
24159 {
24160 const char *eprefix, *esuffix;
24161
24162 ret = false;
24163 if (attr_p)
24164 {
24165 eprefix = "__attribute__((__target__(";
24166 esuffix = ")))";
24167 }
24168 else
24169 {
24170 eprefix = "#pragma GCC target ";
24171 esuffix = "";
24172 }
24173
24174 if (cpu_opt)
24175 error ("invalid cpu %qs for %s%qs%s", cpu_opt, eprefix,
24176 q, esuffix);
24177 else if (not_valid_p)
24178 error ("%s%qs%s is not allowed", eprefix, q, esuffix);
24179 else
24180 error ("%s%qs%s is invalid", eprefix, q, esuffix);
24181 }
24182 }
24183 }
24184
24185 else if (TREE_CODE (args) == TREE_LIST)
24186 {
24187 do
24188 {
24189 tree value = TREE_VALUE (args);
24190 if (value)
24191 {
24192 bool ret2 = rs6000_inner_target_options (value, attr_p);
24193 if (!ret2)
24194 ret = false;
24195 }
24196 args = TREE_CHAIN (args);
24197 }
24198 while (args != NULL_TREE);
24199 }
24200
24201 else
24202 {
24203 error ("attribute %<target%> argument not a string");
24204 return false;
24205 }
24206
24207 return ret;
24208 }
24209
24210 /* Print out the target options as a list for -mdebug=target. */
24211
24212 static void
24213 rs6000_debug_target_options (tree args, const char *prefix)
24214 {
24215 if (args == NULL_TREE)
24216 fprintf (stderr, "%s<NULL>", prefix);
24217
24218 else if (TREE_CODE (args) == STRING_CST)
24219 {
24220 char *p = ASTRDUP (TREE_STRING_POINTER (args));
24221 char *q;
24222
24223 while ((q = strtok (p, ",")) != NULL)
24224 {
24225 p = NULL;
24226 fprintf (stderr, "%s\"%s\"", prefix, q);
24227 prefix = ", ";
24228 }
24229 }
24230
24231 else if (TREE_CODE (args) == TREE_LIST)
24232 {
24233 do
24234 {
24235 tree value = TREE_VALUE (args);
24236 if (value)
24237 {
24238 rs6000_debug_target_options (value, prefix);
24239 prefix = ", ";
24240 }
24241 args = TREE_CHAIN (args);
24242 }
24243 while (args != NULL_TREE);
24244 }
24245
24246 else
24247 gcc_unreachable ();
24248
24249 return;
24250 }
24251
24252 \f
24253 /* Hook to validate attribute((target("..."))). */
24254
24255 static bool
24256 rs6000_valid_attribute_p (tree fndecl,
24257 tree ARG_UNUSED (name),
24258 tree args,
24259 int flags)
24260 {
24261 struct cl_target_option cur_target;
24262 bool ret;
24263 tree old_optimize;
24264 tree new_target, new_optimize;
24265 tree func_optimize;
24266
24267 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
24268
24269 if (TARGET_DEBUG_TARGET)
24270 {
24271 tree tname = DECL_NAME (fndecl);
24272 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
24273 if (tname)
24274 fprintf (stderr, "function: %.*s\n",
24275 (int) IDENTIFIER_LENGTH (tname),
24276 IDENTIFIER_POINTER (tname));
24277 else
24278 fprintf (stderr, "function: unknown\n");
24279
24280 fprintf (stderr, "args:");
24281 rs6000_debug_target_options (args, " ");
24282 fprintf (stderr, "\n");
24283
24284 if (flags)
24285 fprintf (stderr, "flags: 0x%x\n", flags);
24286
24287 fprintf (stderr, "--------------------\n");
24288 }
24289
24290 /* attribute((target("default"))) does nothing, beyond
24291 affecting multi-versioning. */
24292 if (TREE_VALUE (args)
24293 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
24294 && TREE_CHAIN (args) == NULL_TREE
24295 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
24296 return true;
24297
24298 old_optimize = build_optimization_node (&global_options,
24299 &global_options_set);
24300 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
24301
24302 /* If the function changed the optimization levels as well as setting target
24303 options, start with the optimizations specified. */
24304 if (func_optimize && func_optimize != old_optimize)
24305 cl_optimization_restore (&global_options, &global_options_set,
24306 TREE_OPTIMIZATION (func_optimize));
24307
24308 /* The target attributes may also change some optimization flags, so update
24309 the optimization options if necessary. */
24310 cl_target_option_save (&cur_target, &global_options, &global_options_set);
24311 rs6000_cpu_index = rs6000_tune_index = -1;
24312 ret = rs6000_inner_target_options (args, true);
24313
24314 /* Set up any additional state. */
24315 if (ret)
24316 {
24317 ret = rs6000_option_override_internal (false);
24318 new_target = build_target_option_node (&global_options,
24319 &global_options_set);
24320 }
24321 else
24322 new_target = NULL;
24323
24324 new_optimize = build_optimization_node (&global_options,
24325 &global_options_set);
24326
24327 if (!new_target)
24328 ret = false;
24329
24330 else if (fndecl)
24331 {
24332 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
24333
24334 if (old_optimize != new_optimize)
24335 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
24336 }
24337
24338 cl_target_option_restore (&global_options, &global_options_set, &cur_target);
24339
24340 if (old_optimize != new_optimize)
24341 cl_optimization_restore (&global_options, &global_options_set,
24342 TREE_OPTIMIZATION (old_optimize));
24343
24344 return ret;
24345 }
24346
24347 \f
24348 /* Hook to validate the current #pragma GCC target and set the state, and
24349 update the macros based on what was changed. If ARGS is NULL, then
24350 POP_TARGET is used to reset the options. */
24351
24352 bool
24353 rs6000_pragma_target_parse (tree args, tree pop_target)
24354 {
24355 tree prev_tree = build_target_option_node (&global_options,
24356 &global_options_set);
24357 tree cur_tree;
24358 struct cl_target_option *prev_opt, *cur_opt;
24359 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
24360 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
24361
24362 if (TARGET_DEBUG_TARGET)
24363 {
24364 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
24365 fprintf (stderr, "args:");
24366 rs6000_debug_target_options (args, " ");
24367 fprintf (stderr, "\n");
24368
24369 if (pop_target)
24370 {
24371 fprintf (stderr, "pop_target:\n");
24372 debug_tree (pop_target);
24373 }
24374 else
24375 fprintf (stderr, "pop_target: <NULL>\n");
24376
24377 fprintf (stderr, "--------------------\n");
24378 }
24379
24380 if (! args)
24381 {
24382 cur_tree = ((pop_target)
24383 ? pop_target
24384 : target_option_default_node);
24385 cl_target_option_restore (&global_options, &global_options_set,
24386 TREE_TARGET_OPTION (cur_tree));
24387 }
24388 else
24389 {
24390 rs6000_cpu_index = rs6000_tune_index = -1;
24391 if (!rs6000_inner_target_options (args, false)
24392 || !rs6000_option_override_internal (false)
24393 || (cur_tree = build_target_option_node (&global_options,
24394 &global_options_set))
24395 == NULL_TREE)
24396 {
24397 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
24398 fprintf (stderr, "invalid pragma\n");
24399
24400 return false;
24401 }
24402 }
24403
24404 target_option_current_node = cur_tree;
24405 rs6000_activate_target_options (target_option_current_node);
24406
24407 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
24408 change the macros that are defined. */
24409 if (rs6000_target_modify_macros_ptr)
24410 {
24411 prev_opt = TREE_TARGET_OPTION (prev_tree);
24412 prev_bumask = prev_opt->x_rs6000_builtin_mask;
24413 prev_flags = prev_opt->x_rs6000_isa_flags;
24414
24415 cur_opt = TREE_TARGET_OPTION (cur_tree);
24416 cur_flags = cur_opt->x_rs6000_isa_flags;
24417 cur_bumask = cur_opt->x_rs6000_builtin_mask;
24418
24419 diff_bumask = (prev_bumask ^ cur_bumask);
24420 diff_flags = (prev_flags ^ cur_flags);
24421
24422 if ((diff_flags != 0) || (diff_bumask != 0))
24423 {
24424 /* Delete old macros. */
24425 rs6000_target_modify_macros_ptr (false,
24426 prev_flags & diff_flags,
24427 prev_bumask & diff_bumask);
24428
24429 /* Define new macros. */
24430 rs6000_target_modify_macros_ptr (true,
24431 cur_flags & diff_flags,
24432 cur_bumask & diff_bumask);
24433 }
24434 }
24435
24436 return true;
24437 }
24438
24439 \f
24440 /* Remember the last target of rs6000_set_current_function. */
24441 static GTY(()) tree rs6000_previous_fndecl;
24442
24443 /* Restore target's globals from NEW_TREE and invalidate the
24444 rs6000_previous_fndecl cache. */
24445
24446 void
24447 rs6000_activate_target_options (tree new_tree)
24448 {
24449 cl_target_option_restore (&global_options, &global_options_set,
24450 TREE_TARGET_OPTION (new_tree));
24451 if (TREE_TARGET_GLOBALS (new_tree))
24452 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
24453 else if (new_tree == target_option_default_node)
24454 restore_target_globals (&default_target_globals);
24455 else
24456 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
24457 rs6000_previous_fndecl = NULL_TREE;
24458 }
24459
24460 /* Establish appropriate back-end context for processing the function
24461 FNDECL. The argument might be NULL to indicate processing at top
24462 level, outside of any function scope. */
24463 static void
24464 rs6000_set_current_function (tree fndecl)
24465 {
24466 if (TARGET_DEBUG_TARGET)
24467 {
24468 fprintf (stderr, "\n==================== rs6000_set_current_function");
24469
24470 if (fndecl)
24471 fprintf (stderr, ", fndecl %s (%p)",
24472 (DECL_NAME (fndecl)
24473 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
24474 : "<unknown>"), (void *)fndecl);
24475
24476 if (rs6000_previous_fndecl)
24477 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
24478
24479 fprintf (stderr, "\n");
24480 }
24481
24482 /* Only change the context if the function changes. This hook is called
24483 several times in the course of compiling a function, and we don't want to
24484 slow things down too much or call target_reinit when it isn't safe. */
24485 if (fndecl == rs6000_previous_fndecl)
24486 return;
24487
24488 tree old_tree;
24489 if (rs6000_previous_fndecl == NULL_TREE)
24490 old_tree = target_option_current_node;
24491 else if (DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl))
24492 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl);
24493 else
24494 old_tree = target_option_default_node;
24495
24496 tree new_tree;
24497 if (fndecl == NULL_TREE)
24498 {
24499 if (old_tree != target_option_current_node)
24500 new_tree = target_option_current_node;
24501 else
24502 new_tree = NULL_TREE;
24503 }
24504 else
24505 {
24506 new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
24507 if (new_tree == NULL_TREE)
24508 new_tree = target_option_default_node;
24509 }
24510
24511 if (TARGET_DEBUG_TARGET)
24512 {
24513 if (new_tree)
24514 {
24515 fprintf (stderr, "\nnew fndecl target specific options:\n");
24516 debug_tree (new_tree);
24517 }
24518
24519 if (old_tree)
24520 {
24521 fprintf (stderr, "\nold fndecl target specific options:\n");
24522 debug_tree (old_tree);
24523 }
24524
24525 if (old_tree != NULL_TREE || new_tree != NULL_TREE)
24526 fprintf (stderr, "--------------------\n");
24527 }
24528
24529 if (new_tree && old_tree != new_tree)
24530 rs6000_activate_target_options (new_tree);
24531
24532 if (fndecl)
24533 rs6000_previous_fndecl = fndecl;
24534 }
24535
24536 \f
24537 /* Save the current options */
24538
24539 static void
24540 rs6000_function_specific_save (struct cl_target_option *ptr,
24541 struct gcc_options *opts,
24542 struct gcc_options */* opts_set */)
24543 {
24544 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
24545 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
24546 }
24547
24548 /* Restore the current options */
24549
24550 static void
24551 rs6000_function_specific_restore (struct gcc_options *opts,
24552 struct gcc_options */* opts_set */,
24553 struct cl_target_option *ptr)
24554
24555 {
24556 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
24557 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
24558 (void) rs6000_option_override_internal (false);
24559 }
24560
24561 /* Print the current options */
24562
24563 static void
24564 rs6000_function_specific_print (FILE *file, int indent,
24565 struct cl_target_option *ptr)
24566 {
24567 rs6000_print_isa_options (file, indent, "Isa options set",
24568 ptr->x_rs6000_isa_flags);
24569
24570 rs6000_print_isa_options (file, indent, "Isa options explicit",
24571 ptr->x_rs6000_isa_flags_explicit);
24572 }
24573
24574 /* Helper function to print the current isa or misc options on a line. */
24575
24576 static void
24577 rs6000_print_options_internal (FILE *file,
24578 int indent,
24579 const char *string,
24580 HOST_WIDE_INT flags,
24581 const char *prefix,
24582 const struct rs6000_opt_mask *opts,
24583 size_t num_elements)
24584 {
24585 size_t i;
24586 size_t start_column = 0;
24587 size_t cur_column;
24588 size_t max_column = 120;
24589 size_t prefix_len = strlen (prefix);
24590 size_t comma_len = 0;
24591 const char *comma = "";
24592
24593 if (indent)
24594 start_column += fprintf (file, "%*s", indent, "");
24595
24596 if (!flags)
24597 {
24598 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
24599 return;
24600 }
24601
24602 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
24603
24604 /* Print the various mask options. */
24605 cur_column = start_column;
24606 for (i = 0; i < num_elements; i++)
24607 {
24608 bool invert = opts[i].invert;
24609 const char *name = opts[i].name;
24610 const char *no_str = "";
24611 HOST_WIDE_INT mask = opts[i].mask;
24612 size_t len = comma_len + prefix_len + strlen (name);
24613
24614 if (!invert)
24615 {
24616 if ((flags & mask) == 0)
24617 {
24618 no_str = "no-";
24619 len += strlen ("no-");
24620 }
24621
24622 flags &= ~mask;
24623 }
24624
24625 else
24626 {
24627 if ((flags & mask) != 0)
24628 {
24629 no_str = "no-";
24630 len += strlen ("no-");
24631 }
24632
24633 flags |= mask;
24634 }
24635
24636 cur_column += len;
24637 if (cur_column > max_column)
24638 {
24639 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
24640 cur_column = start_column + len;
24641 comma = "";
24642 }
24643
24644 fprintf (file, "%s%s%s%s", comma, prefix, no_str, name);
24645 comma = ", ";
24646 comma_len = strlen (", ");
24647 }
24648
24649 fputs ("\n", file);
24650 }
24651
24652 /* Helper function to print the current isa options on a line. */
24653
24654 static void
24655 rs6000_print_isa_options (FILE *file, int indent, const char *string,
24656 HOST_WIDE_INT flags)
24657 {
24658 rs6000_print_options_internal (file, indent, string, flags, "-m",
24659 &rs6000_opt_masks[0],
24660 ARRAY_SIZE (rs6000_opt_masks));
24661 }
24662
24663 static void
24664 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
24665 HOST_WIDE_INT flags)
24666 {
24667 rs6000_print_options_internal (file, indent, string, flags, "",
24668 &rs6000_builtin_mask_names[0],
24669 ARRAY_SIZE (rs6000_builtin_mask_names));
24670 }
24671
24672 /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06,
24673 2.07, and 3.0 options that relate to the vector unit (-mdirect-move,
24674 -mupper-regs-df, etc.).
24675
24676 If the user used -mno-power8-vector, we need to turn off all of the implicit
24677 ISA 2.07 and 3.0 options that relate to the vector unit.
24678
24679 If the user used -mno-power9-vector, we need to turn off all of the implicit
24680 ISA 3.0 options that relate to the vector unit.
24681
24682 This function does not handle explicit options such as the user specifying
24683 -mdirect-move. These are handled in rs6000_option_override_internal, and
24684 the appropriate error is given if needed.
24685
24686 We return a mask of all of the implicit options that should not be enabled
24687 by default. */
24688
24689 static HOST_WIDE_INT
24690 rs6000_disable_incompatible_switches (void)
24691 {
24692 HOST_WIDE_INT ignore_masks = rs6000_isa_flags_explicit;
24693 size_t i, j;
24694
24695 static const struct {
24696 const HOST_WIDE_INT no_flag; /* flag explicitly turned off. */
24697 const HOST_WIDE_INT dep_flags; /* flags that depend on this option. */
24698 const char *const name; /* name of the switch. */
24699 } flags[] = {
24700 { OPTION_MASK_P9_VECTOR, OTHER_P9_VECTOR_MASKS, "power9-vector" },
24701 { OPTION_MASK_P8_VECTOR, OTHER_P8_VECTOR_MASKS, "power8-vector" },
24702 { OPTION_MASK_VSX, OTHER_VSX_VECTOR_MASKS, "vsx" },
24703 { OPTION_MASK_ALTIVEC, OTHER_ALTIVEC_MASKS, "altivec" },
24704 };
24705
24706 for (i = 0; i < ARRAY_SIZE (flags); i++)
24707 {
24708 HOST_WIDE_INT no_flag = flags[i].no_flag;
24709
24710 if ((rs6000_isa_flags & no_flag) == 0
24711 && (rs6000_isa_flags_explicit & no_flag) != 0)
24712 {
24713 HOST_WIDE_INT dep_flags = flags[i].dep_flags;
24714 HOST_WIDE_INT set_flags = (rs6000_isa_flags_explicit
24715 & rs6000_isa_flags
24716 & dep_flags);
24717
24718 if (set_flags)
24719 {
24720 for (j = 0; j < ARRAY_SIZE (rs6000_opt_masks); j++)
24721 if ((set_flags & rs6000_opt_masks[j].mask) != 0)
24722 {
24723 set_flags &= ~rs6000_opt_masks[j].mask;
24724 error ("%<-mno-%s%> turns off %<-m%s%>",
24725 flags[i].name,
24726 rs6000_opt_masks[j].name);
24727 }
24728
24729 gcc_assert (!set_flags);
24730 }
24731
24732 rs6000_isa_flags &= ~dep_flags;
24733 ignore_masks |= no_flag | dep_flags;
24734 }
24735 }
24736
24737 return ignore_masks;
24738 }
24739
24740 \f
24741 /* Helper function for printing the function name when debugging. */
24742
24743 static const char *
24744 get_decl_name (tree fn)
24745 {
24746 tree name;
24747
24748 if (!fn)
24749 return "<null>";
24750
24751 name = DECL_NAME (fn);
24752 if (!name)
24753 return "<no-name>";
24754
24755 return IDENTIFIER_POINTER (name);
24756 }
24757
24758 /* Return the clone id of the target we are compiling code for in a target
24759 clone. The clone id is ordered from 0 (default) to CLONE_MAX-1 and gives
24760 the priority list for the target clones (ordered from lowest to
24761 highest). */
24762
24763 static int
24764 rs6000_clone_priority (tree fndecl)
24765 {
24766 tree fn_opts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
24767 HOST_WIDE_INT isa_masks;
24768 int ret = CLONE_DEFAULT;
24769 tree attrs = lookup_attribute ("target", DECL_ATTRIBUTES (fndecl));
24770 const char *attrs_str = NULL;
24771
24772 attrs = TREE_VALUE (TREE_VALUE (attrs));
24773 attrs_str = TREE_STRING_POINTER (attrs);
24774
24775 /* Return priority zero for default function. Return the ISA needed for the
24776 function if it is not the default. */
24777 if (strcmp (attrs_str, "default") != 0)
24778 {
24779 if (fn_opts == NULL_TREE)
24780 fn_opts = target_option_default_node;
24781
24782 if (!fn_opts || !TREE_TARGET_OPTION (fn_opts))
24783 isa_masks = rs6000_isa_flags;
24784 else
24785 isa_masks = TREE_TARGET_OPTION (fn_opts)->x_rs6000_isa_flags;
24786
24787 for (ret = CLONE_MAX - 1; ret != 0; ret--)
24788 if ((rs6000_clone_map[ret].isa_mask & isa_masks) != 0)
24789 break;
24790 }
24791
24792 if (TARGET_DEBUG_TARGET)
24793 fprintf (stderr, "rs6000_get_function_version_priority (%s) => %d\n",
24794 get_decl_name (fndecl), ret);
24795
24796 return ret;
24797 }
24798
24799 /* This compares the priority of target features in function DECL1 and DECL2.
24800 It returns positive value if DECL1 is higher priority, negative value if
24801 DECL2 is higher priority and 0 if they are the same. Note, priorities are
24802 ordered from lowest (CLONE_DEFAULT) to highest (currently CLONE_ISA_3_0). */
24803
24804 static int
24805 rs6000_compare_version_priority (tree decl1, tree decl2)
24806 {
24807 int priority1 = rs6000_clone_priority (decl1);
24808 int priority2 = rs6000_clone_priority (decl2);
24809 int ret = priority1 - priority2;
24810
24811 if (TARGET_DEBUG_TARGET)
24812 fprintf (stderr, "rs6000_compare_version_priority (%s, %s) => %d\n",
24813 get_decl_name (decl1), get_decl_name (decl2), ret);
24814
24815 return ret;
24816 }
24817
24818 /* Make a dispatcher declaration for the multi-versioned function DECL.
24819 Calls to DECL function will be replaced with calls to the dispatcher
24820 by the front-end. Returns the decl of the dispatcher function. */
24821
24822 static tree
24823 rs6000_get_function_versions_dispatcher (void *decl)
24824 {
24825 tree fn = (tree) decl;
24826 struct cgraph_node *node = NULL;
24827 struct cgraph_node *default_node = NULL;
24828 struct cgraph_function_version_info *node_v = NULL;
24829 struct cgraph_function_version_info *first_v = NULL;
24830
24831 tree dispatch_decl = NULL;
24832
24833 struct cgraph_function_version_info *default_version_info = NULL;
24834 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
24835
24836 if (TARGET_DEBUG_TARGET)
24837 fprintf (stderr, "rs6000_get_function_versions_dispatcher (%s)\n",
24838 get_decl_name (fn));
24839
24840 node = cgraph_node::get (fn);
24841 gcc_assert (node != NULL);
24842
24843 node_v = node->function_version ();
24844 gcc_assert (node_v != NULL);
24845
24846 if (node_v->dispatcher_resolver != NULL)
24847 return node_v->dispatcher_resolver;
24848
24849 /* Find the default version and make it the first node. */
24850 first_v = node_v;
24851 /* Go to the beginning of the chain. */
24852 while (first_v->prev != NULL)
24853 first_v = first_v->prev;
24854
24855 default_version_info = first_v;
24856 while (default_version_info != NULL)
24857 {
24858 const tree decl2 = default_version_info->this_node->decl;
24859 if (is_function_default_version (decl2))
24860 break;
24861 default_version_info = default_version_info->next;
24862 }
24863
24864 /* If there is no default node, just return NULL. */
24865 if (default_version_info == NULL)
24866 return NULL;
24867
24868 /* Make default info the first node. */
24869 if (first_v != default_version_info)
24870 {
24871 default_version_info->prev->next = default_version_info->next;
24872 if (default_version_info->next)
24873 default_version_info->next->prev = default_version_info->prev;
24874 first_v->prev = default_version_info;
24875 default_version_info->next = first_v;
24876 default_version_info->prev = NULL;
24877 }
24878
24879 default_node = default_version_info->this_node;
24880
24881 #ifndef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
24882 error_at (DECL_SOURCE_LOCATION (default_node->decl),
24883 "%<target_clones%> attribute needs GLIBC (2.23 and newer) that "
24884 "exports hardware capability bits");
24885 #else
24886
24887 if (targetm.has_ifunc_p ())
24888 {
24889 struct cgraph_function_version_info *it_v = NULL;
24890 struct cgraph_node *dispatcher_node = NULL;
24891 struct cgraph_function_version_info *dispatcher_version_info = NULL;
24892
24893 /* Right now, the dispatching is done via ifunc. */
24894 dispatch_decl = make_dispatcher_decl (default_node->decl);
24895
24896 dispatcher_node = cgraph_node::get_create (dispatch_decl);
24897 gcc_assert (dispatcher_node != NULL);
24898 dispatcher_node->dispatcher_function = 1;
24899 dispatcher_version_info
24900 = dispatcher_node->insert_new_function_version ();
24901 dispatcher_version_info->next = default_version_info;
24902 dispatcher_node->definition = 1;
24903
24904 /* Set the dispatcher for all the versions. */
24905 it_v = default_version_info;
24906 while (it_v != NULL)
24907 {
24908 it_v->dispatcher_resolver = dispatch_decl;
24909 it_v = it_v->next;
24910 }
24911 }
24912 else
24913 {
24914 error_at (DECL_SOURCE_LOCATION (default_node->decl),
24915 "multiversioning needs %<ifunc%> which is not supported "
24916 "on this target");
24917 }
24918 #endif
24919
24920 return dispatch_decl;
24921 }
24922
24923 /* Make the resolver function decl to dispatch the versions of a multi-
24924 versioned function, DEFAULT_DECL. Create an empty basic block in the
24925 resolver and store the pointer in EMPTY_BB. Return the decl of the resolver
24926 function. */
24927
24928 static tree
24929 make_resolver_func (const tree default_decl,
24930 const tree dispatch_decl,
24931 basic_block *empty_bb)
24932 {
24933 /* Make the resolver function static. The resolver function returns
24934 void *. */
24935 tree decl_name = clone_function_name (default_decl, "resolver");
24936 const char *resolver_name = IDENTIFIER_POINTER (decl_name);
24937 tree type = build_function_type_list (ptr_type_node, NULL_TREE);
24938 tree decl = build_fn_decl (resolver_name, type);
24939 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
24940
24941 DECL_NAME (decl) = decl_name;
24942 TREE_USED (decl) = 1;
24943 DECL_ARTIFICIAL (decl) = 1;
24944 DECL_IGNORED_P (decl) = 0;
24945 TREE_PUBLIC (decl) = 0;
24946 DECL_UNINLINABLE (decl) = 1;
24947
24948 /* Resolver is not external, body is generated. */
24949 DECL_EXTERNAL (decl) = 0;
24950 DECL_EXTERNAL (dispatch_decl) = 0;
24951
24952 DECL_CONTEXT (decl) = NULL_TREE;
24953 DECL_INITIAL (decl) = make_node (BLOCK);
24954 DECL_STATIC_CONSTRUCTOR (decl) = 0;
24955
24956 if (DECL_COMDAT_GROUP (default_decl)
24957 || TREE_PUBLIC (default_decl))
24958 {
24959 /* In this case, each translation unit with a call to this
24960 versioned function will put out a resolver. Ensure it
24961 is comdat to keep just one copy. */
24962 DECL_COMDAT (decl) = 1;
24963 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
24964 }
24965 else
24966 TREE_PUBLIC (dispatch_decl) = 0;
24967
24968 /* Build result decl and add to function_decl. */
24969 tree t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
24970 DECL_CONTEXT (t) = decl;
24971 DECL_ARTIFICIAL (t) = 1;
24972 DECL_IGNORED_P (t) = 1;
24973 DECL_RESULT (decl) = t;
24974
24975 gimplify_function_tree (decl);
24976 push_cfun (DECL_STRUCT_FUNCTION (decl));
24977 *empty_bb = init_lowered_empty_function (decl, false,
24978 profile_count::uninitialized ());
24979
24980 cgraph_node::add_new_function (decl, true);
24981 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
24982
24983 pop_cfun ();
24984
24985 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
24986 DECL_ATTRIBUTES (dispatch_decl)
24987 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
24988
24989 cgraph_node::create_same_body_alias (dispatch_decl, decl);
24990
24991 return decl;
24992 }
24993
24994 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL to
24995 return a pointer to VERSION_DECL if we are running on a machine that
24996 supports the index CLONE_ISA hardware architecture bits. This function will
24997 be called during version dispatch to decide which function version to
24998 execute. It returns the basic block at the end, to which more conditions
24999 can be added. */
25000
25001 static basic_block
25002 add_condition_to_bb (tree function_decl, tree version_decl,
25003 int clone_isa, basic_block new_bb)
25004 {
25005 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
25006
25007 gcc_assert (new_bb != NULL);
25008 gimple_seq gseq = bb_seq (new_bb);
25009
25010
25011 tree convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
25012 build_fold_addr_expr (version_decl));
25013 tree result_var = create_tmp_var (ptr_type_node);
25014 gimple *convert_stmt = gimple_build_assign (result_var, convert_expr);
25015 gimple *return_stmt = gimple_build_return (result_var);
25016
25017 if (clone_isa == CLONE_DEFAULT)
25018 {
25019 gimple_seq_add_stmt (&gseq, convert_stmt);
25020 gimple_seq_add_stmt (&gseq, return_stmt);
25021 set_bb_seq (new_bb, gseq);
25022 gimple_set_bb (convert_stmt, new_bb);
25023 gimple_set_bb (return_stmt, new_bb);
25024 pop_cfun ();
25025 return new_bb;
25026 }
25027
25028 tree bool_zero = build_int_cst (bool_int_type_node, 0);
25029 tree cond_var = create_tmp_var (bool_int_type_node);
25030 tree predicate_decl = rs6000_builtin_decls[(int) RS6000_BIF_CPU_SUPPORTS];
25031 const char *arg_str = rs6000_clone_map[clone_isa].name;
25032 tree predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
25033 gimple *call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
25034 gimple_call_set_lhs (call_cond_stmt, cond_var);
25035
25036 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
25037 gimple_set_bb (call_cond_stmt, new_bb);
25038 gimple_seq_add_stmt (&gseq, call_cond_stmt);
25039
25040 gimple *if_else_stmt = gimple_build_cond (NE_EXPR, cond_var, bool_zero,
25041 NULL_TREE, NULL_TREE);
25042 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
25043 gimple_set_bb (if_else_stmt, new_bb);
25044 gimple_seq_add_stmt (&gseq, if_else_stmt);
25045
25046 gimple_seq_add_stmt (&gseq, convert_stmt);
25047 gimple_seq_add_stmt (&gseq, return_stmt);
25048 set_bb_seq (new_bb, gseq);
25049
25050 basic_block bb1 = new_bb;
25051 edge e12 = split_block (bb1, if_else_stmt);
25052 basic_block bb2 = e12->dest;
25053 e12->flags &= ~EDGE_FALLTHRU;
25054 e12->flags |= EDGE_TRUE_VALUE;
25055
25056 edge e23 = split_block (bb2, return_stmt);
25057 gimple_set_bb (convert_stmt, bb2);
25058 gimple_set_bb (return_stmt, bb2);
25059
25060 basic_block bb3 = e23->dest;
25061 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
25062
25063 remove_edge (e23);
25064 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
25065
25066 pop_cfun ();
25067 return bb3;
25068 }
25069
25070 /* This function generates the dispatch function for multi-versioned functions.
25071 DISPATCH_DECL is the function which will contain the dispatch logic.
25072 FNDECLS are the function choices for dispatch, and is a tree chain.
25073 EMPTY_BB is the basic block pointer in DISPATCH_DECL in which the dispatch
25074 code is generated. */
25075
25076 static int
25077 dispatch_function_versions (tree dispatch_decl,
25078 void *fndecls_p,
25079 basic_block *empty_bb)
25080 {
25081 int ix;
25082 tree ele;
25083 vec<tree> *fndecls;
25084 tree clones[CLONE_MAX];
25085
25086 if (TARGET_DEBUG_TARGET)
25087 fputs ("dispatch_function_versions, top\n", stderr);
25088
25089 gcc_assert (dispatch_decl != NULL
25090 && fndecls_p != NULL
25091 && empty_bb != NULL);
25092
25093 /* fndecls_p is actually a vector. */
25094 fndecls = static_cast<vec<tree> *> (fndecls_p);
25095
25096 /* At least one more version other than the default. */
25097 gcc_assert (fndecls->length () >= 2);
25098
25099 /* The first version in the vector is the default decl. */
25100 memset ((void *) clones, '\0', sizeof (clones));
25101 clones[CLONE_DEFAULT] = (*fndecls)[0];
25102
25103 /* On the PowerPC, we do not need to call __builtin_cpu_init, which is a NOP
25104 on the PowerPC (on the x86_64, it is not a NOP). The builtin function
25105 __builtin_cpu_support ensures that the TOC fields are setup by requiring a
25106 recent glibc. If we ever need to call __builtin_cpu_init, we would need
25107 to insert the code here to do the call. */
25108
25109 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
25110 {
25111 int priority = rs6000_clone_priority (ele);
25112 if (!clones[priority])
25113 clones[priority] = ele;
25114 }
25115
25116 for (ix = CLONE_MAX - 1; ix >= 0; ix--)
25117 if (clones[ix])
25118 {
25119 if (TARGET_DEBUG_TARGET)
25120 fprintf (stderr, "dispatch_function_versions, clone %d, %s\n",
25121 ix, get_decl_name (clones[ix]));
25122
25123 *empty_bb = add_condition_to_bb (dispatch_decl, clones[ix], ix,
25124 *empty_bb);
25125 }
25126
25127 return 0;
25128 }
25129
25130 /* Generate the dispatching code body to dispatch multi-versioned function
25131 DECL. The target hook is called to process the "target" attributes and
25132 provide the code to dispatch the right function at run-time. NODE points
25133 to the dispatcher decl whose body will be created. */
25134
25135 static tree
25136 rs6000_generate_version_dispatcher_body (void *node_p)
25137 {
25138 tree resolver;
25139 basic_block empty_bb;
25140 struct cgraph_node *node = (cgraph_node *) node_p;
25141 struct cgraph_function_version_info *ninfo = node->function_version ();
25142
25143 if (ninfo->dispatcher_resolver)
25144 return ninfo->dispatcher_resolver;
25145
25146 /* node is going to be an alias, so remove the finalized bit. */
25147 node->definition = false;
25148
25149 /* The first version in the chain corresponds to the default version. */
25150 ninfo->dispatcher_resolver = resolver
25151 = make_resolver_func (ninfo->next->this_node->decl, node->decl, &empty_bb);
25152
25153 if (TARGET_DEBUG_TARGET)
25154 fprintf (stderr, "rs6000_get_function_versions_dispatcher, %s\n",
25155 get_decl_name (resolver));
25156
25157 push_cfun (DECL_STRUCT_FUNCTION (resolver));
25158 auto_vec<tree, 2> fn_ver_vec;
25159
25160 for (struct cgraph_function_version_info *vinfo = ninfo->next;
25161 vinfo;
25162 vinfo = vinfo->next)
25163 {
25164 struct cgraph_node *version = vinfo->this_node;
25165 /* Check for virtual functions here again, as by this time it should
25166 have been determined if this function needs a vtable index or
25167 not. This happens for methods in derived classes that override
25168 virtual methods in base classes but are not explicitly marked as
25169 virtual. */
25170 if (DECL_VINDEX (version->decl))
25171 sorry ("Virtual function multiversioning not supported");
25172
25173 fn_ver_vec.safe_push (version->decl);
25174 }
25175
25176 dispatch_function_versions (resolver, &fn_ver_vec, &empty_bb);
25177 cgraph_edge::rebuild_edges ();
25178 pop_cfun ();
25179 return resolver;
25180 }
25181
25182 /* Hook to decide if we need to scan function gimple statements to
25183 collect target specific information for inlining, and update the
25184 corresponding RS6000_FN_TARGET_INFO_* bit in INFO if we are able
25185 to predict which ISA feature is used at this time. Return true
25186 if we need to scan, otherwise return false. */
25187
25188 static bool
25189 rs6000_need_ipa_fn_target_info (const_tree decl,
25190 unsigned int &info ATTRIBUTE_UNUSED)
25191 {
25192 tree target = DECL_FUNCTION_SPECIFIC_TARGET (decl);
25193 if (!target)
25194 target = target_option_default_node;
25195 struct cl_target_option *opts = TREE_TARGET_OPTION (target);
25196
25197 /* See PR102059, we only handle HTM for now, so will only do
25198 the consequent scannings when HTM feature enabled. */
25199 if (opts->x_rs6000_isa_flags & OPTION_MASK_HTM)
25200 return true;
25201
25202 return false;
25203 }
25204
25205 /* Hook to update target specific information INFO for inlining by
25206 checking the given STMT. Return false if we don't need to scan
25207 any more, otherwise return true. */
25208
25209 static bool
25210 rs6000_update_ipa_fn_target_info (unsigned int &info, const gimple *stmt)
25211 {
25212 /* Assume inline asm can use any instruction features. */
25213 if (gimple_code (stmt) == GIMPLE_ASM)
25214 {
25215 /* Should set any bits we concerned, for now OPTION_MASK_HTM is
25216 the only bit we care about. */
25217 info |= RS6000_FN_TARGET_INFO_HTM;
25218 return false;
25219 }
25220 else if (gimple_code (stmt) == GIMPLE_CALL)
25221 {
25222 tree fndecl = gimple_call_fndecl (stmt);
25223 if (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD))
25224 {
25225 enum rs6000_gen_builtins fcode
25226 = (enum rs6000_gen_builtins) DECL_MD_FUNCTION_CODE (fndecl);
25227 /* HTM bifs definitely exploit HTM insns. */
25228 if (bif_is_htm (rs6000_builtin_info[fcode]))
25229 {
25230 info |= RS6000_FN_TARGET_INFO_HTM;
25231 return false;
25232 }
25233 }
25234 }
25235
25236 return true;
25237 }
25238
25239 /* Hook to determine if one function can safely inline another. */
25240
25241 static bool
25242 rs6000_can_inline_p (tree caller, tree callee)
25243 {
25244 bool ret = false;
25245 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
25246 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
25247
25248 /* If the callee has no option attributes, then it is ok to inline. */
25249 if (!callee_tree)
25250 ret = true;
25251
25252 else
25253 {
25254 HOST_WIDE_INT caller_isa;
25255 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
25256 HOST_WIDE_INT callee_isa = callee_opts->x_rs6000_isa_flags;
25257 HOST_WIDE_INT explicit_isa = callee_opts->x_rs6000_isa_flags_explicit;
25258
25259 /* If the caller has option attributes, then use them.
25260 Otherwise, use the command line options. */
25261 if (caller_tree)
25262 caller_isa = TREE_TARGET_OPTION (caller_tree)->x_rs6000_isa_flags;
25263 else
25264 caller_isa = rs6000_isa_flags;
25265
25266 cgraph_node *callee_node = cgraph_node::get (callee);
25267 if (ipa_fn_summaries && ipa_fn_summaries->get (callee_node) != NULL)
25268 {
25269 unsigned int info = ipa_fn_summaries->get (callee_node)->target_info;
25270 if ((info & RS6000_FN_TARGET_INFO_HTM) == 0)
25271 {
25272 callee_isa &= ~OPTION_MASK_HTM;
25273 explicit_isa &= ~OPTION_MASK_HTM;
25274 }
25275 }
25276
25277 /* The callee's options must be a subset of the caller's options, i.e.
25278 a vsx function may inline an altivec function, but a no-vsx function
25279 must not inline a vsx function. However, for those options that the
25280 callee has explicitly enabled or disabled, then we must enforce that
25281 the callee's and caller's options match exactly; see PR70010. */
25282 if (((caller_isa & callee_isa) == callee_isa)
25283 && (caller_isa & explicit_isa) == (callee_isa & explicit_isa))
25284 ret = true;
25285 }
25286
25287 if (TARGET_DEBUG_TARGET)
25288 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
25289 get_decl_name (caller), get_decl_name (callee),
25290 (ret ? "can" : "cannot"));
25291
25292 return ret;
25293 }
25294 \f
25295 /* Allocate a stack temp and fixup the address so it meets the particular
25296 memory requirements (either offetable or REG+REG addressing). */
25297
25298 rtx
25299 rs6000_allocate_stack_temp (machine_mode mode,
25300 bool offsettable_p,
25301 bool reg_reg_p)
25302 {
25303 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
25304 rtx addr = XEXP (stack, 0);
25305 int strict_p = reload_completed;
25306
25307 if (!legitimate_indirect_address_p (addr, strict_p))
25308 {
25309 if (offsettable_p
25310 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
25311 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
25312
25313 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
25314 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
25315 }
25316
25317 return stack;
25318 }
25319
25320 /* Given a memory reference, if it is not a reg or reg+reg addressing,
25321 convert to such a form to deal with memory reference instructions
25322 like STFIWX and LDBRX that only take reg+reg addressing. */
25323
25324 rtx
25325 rs6000_force_indexed_or_indirect_mem (rtx x)
25326 {
25327 machine_mode mode = GET_MODE (x);
25328
25329 gcc_assert (MEM_P (x));
25330 if (can_create_pseudo_p () && !indexed_or_indirect_operand (x, mode))
25331 {
25332 rtx addr = XEXP (x, 0);
25333 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
25334 {
25335 rtx reg = XEXP (addr, 0);
25336 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
25337 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
25338 gcc_assert (REG_P (reg));
25339 emit_insn (gen_add3_insn (reg, reg, size_rtx));
25340 addr = reg;
25341 }
25342 else if (GET_CODE (addr) == PRE_MODIFY)
25343 {
25344 rtx reg = XEXP (addr, 0);
25345 rtx expr = XEXP (addr, 1);
25346 gcc_assert (REG_P (reg));
25347 gcc_assert (GET_CODE (expr) == PLUS);
25348 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
25349 addr = reg;
25350 }
25351
25352 if (GET_CODE (addr) == PLUS)
25353 {
25354 rtx op0 = XEXP (addr, 0);
25355 rtx op1 = XEXP (addr, 1);
25356 op0 = force_reg (Pmode, op0);
25357 op1 = force_reg (Pmode, op1);
25358 x = replace_equiv_address (x, gen_rtx_PLUS (Pmode, op0, op1));
25359 }
25360 else
25361 x = replace_equiv_address (x, force_reg (Pmode, addr));
25362 }
25363
25364 return x;
25365 }
25366
25367 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
25368
25369 On the RS/6000, all integer constants are acceptable, most won't be valid
25370 for particular insns, though. Only easy FP constants are acceptable. */
25371
25372 static bool
25373 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
25374 {
25375 if (TARGET_ELF && tls_referenced_p (x))
25376 return false;
25377
25378 if (CONST_DOUBLE_P (x))
25379 return easy_fp_constant (x, mode);
25380
25381 if (GET_CODE (x) == CONST_VECTOR)
25382 return easy_vector_constant (x, mode);
25383
25384 return true;
25385 }
25386
25387 #if TARGET_AIX_OS
25388 /* Implement TARGET_PRECOMPUTE_TLS_P.
25389
25390 On the AIX, TLS symbols are in the TOC, which is maintained in the
25391 constant pool. AIX TOC TLS symbols need to be pre-computed, but
25392 must be considered legitimate constants. */
25393
25394 static bool
25395 rs6000_aix_precompute_tls_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
25396 {
25397 return tls_referenced_p (x);
25398 }
25399 #endif
25400
25401 \f
25402 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
25403
25404 static bool
25405 chain_already_loaded (rtx_insn *last)
25406 {
25407 for (; last != NULL; last = PREV_INSN (last))
25408 {
25409 if (NONJUMP_INSN_P (last))
25410 {
25411 rtx patt = PATTERN (last);
25412
25413 if (GET_CODE (patt) == SET)
25414 {
25415 rtx lhs = XEXP (patt, 0);
25416
25417 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
25418 return true;
25419 }
25420 }
25421 }
25422 return false;
25423 }
25424
25425 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
25426
25427 void
25428 rs6000_call_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
25429 {
25430 rtx func = func_desc;
25431 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
25432 rtx toc_load = NULL_RTX;
25433 rtx toc_restore = NULL_RTX;
25434 rtx func_addr;
25435 rtx abi_reg = NULL_RTX;
25436 rtx call[5];
25437 int n_call;
25438 rtx insn;
25439 bool is_pltseq_longcall;
25440
25441 if (global_tlsarg)
25442 tlsarg = global_tlsarg;
25443
25444 /* Handle longcall attributes. */
25445 is_pltseq_longcall = false;
25446 if ((INTVAL (cookie) & CALL_LONG) != 0
25447 && GET_CODE (func_desc) == SYMBOL_REF)
25448 {
25449 func = rs6000_longcall_ref (func_desc, tlsarg);
25450 if (TARGET_PLTSEQ)
25451 is_pltseq_longcall = true;
25452 }
25453
25454 /* Handle indirect calls. */
25455 if (!SYMBOL_REF_P (func)
25456 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func)))
25457 {
25458 if (!rs6000_pcrel_p ())
25459 {
25460 /* Save the TOC into its reserved slot before the call,
25461 and prepare to restore it after the call. */
25462 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
25463 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
25464 gen_rtvec (1, stack_toc_offset),
25465 UNSPEC_TOCSLOT);
25466 toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
25467
25468 /* Can we optimize saving the TOC in the prologue or
25469 do we need to do it at every call? */
25470 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
25471 cfun->machine->save_toc_in_prologue = true;
25472 else
25473 {
25474 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
25475 rtx stack_toc_mem = gen_frame_mem (Pmode,
25476 gen_rtx_PLUS (Pmode, stack_ptr,
25477 stack_toc_offset));
25478 MEM_VOLATILE_P (stack_toc_mem) = 1;
25479 if (is_pltseq_longcall)
25480 {
25481 rtvec v = gen_rtvec (3, toc_reg, func_desc, tlsarg);
25482 rtx mark_toc_reg = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25483 emit_insn (gen_rtx_SET (stack_toc_mem, mark_toc_reg));
25484 }
25485 else
25486 emit_move_insn (stack_toc_mem, toc_reg);
25487 }
25488 }
25489
25490 if (DEFAULT_ABI == ABI_ELFv2)
25491 {
25492 /* A function pointer in the ELFv2 ABI is just a plain address, but
25493 the ABI requires it to be loaded into r12 before the call. */
25494 func_addr = gen_rtx_REG (Pmode, 12);
25495 emit_move_insn (func_addr, func);
25496 abi_reg = func_addr;
25497 /* Indirect calls via CTR are strongly preferred over indirect
25498 calls via LR, so move the address there. Needed to mark
25499 this insn for linker plt sequence editing too. */
25500 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
25501 if (is_pltseq_longcall)
25502 {
25503 rtvec v = gen_rtvec (3, abi_reg, func_desc, tlsarg);
25504 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25505 emit_insn (gen_rtx_SET (func_addr, mark_func));
25506 v = gen_rtvec (2, func_addr, func_desc);
25507 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25508 }
25509 else
25510 emit_move_insn (func_addr, abi_reg);
25511 }
25512 else
25513 {
25514 /* A function pointer under AIX is a pointer to a data area whose
25515 first word contains the actual address of the function, whose
25516 second word contains a pointer to its TOC, and whose third word
25517 contains a value to place in the static chain register (r11).
25518 Note that if we load the static chain, our "trampoline" need
25519 not have any executable code. */
25520
25521 /* Load up address of the actual function. */
25522 func = force_reg (Pmode, func);
25523 func_addr = gen_reg_rtx (Pmode);
25524 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func));
25525
25526 /* Indirect calls via CTR are strongly preferred over indirect
25527 calls via LR, so move the address there. */
25528 rtx ctr_reg = gen_rtx_REG (Pmode, CTR_REGNO);
25529 emit_move_insn (ctr_reg, func_addr);
25530 func_addr = ctr_reg;
25531
25532 /* Prepare to load the TOC of the called function. Note that the
25533 TOC load must happen immediately before the actual call so
25534 that unwinding the TOC registers works correctly. See the
25535 comment in frob_update_context. */
25536 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
25537 rtx func_toc_mem = gen_rtx_MEM (Pmode,
25538 gen_rtx_PLUS (Pmode, func,
25539 func_toc_offset));
25540 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
25541
25542 /* If we have a static chain, load it up. But, if the call was
25543 originally direct, the 3rd word has not been written since no
25544 trampoline has been built, so we ought not to load it, lest we
25545 override a static chain value. */
25546 if (!(GET_CODE (func_desc) == SYMBOL_REF
25547 && SYMBOL_REF_FUNCTION_P (func_desc))
25548 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
25549 && !chain_already_loaded (get_current_sequence ()->next->last))
25550 {
25551 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
25552 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
25553 rtx func_sc_mem = gen_rtx_MEM (Pmode,
25554 gen_rtx_PLUS (Pmode, func,
25555 func_sc_offset));
25556 emit_move_insn (sc_reg, func_sc_mem);
25557 abi_reg = sc_reg;
25558 }
25559 }
25560 }
25561 else
25562 {
25563 /* No TOC register needed for calls from PC-relative callers. */
25564 if (!rs6000_pcrel_p ())
25565 /* Direct calls use the TOC: for local calls, the callee will
25566 assume the TOC register is set; for non-local calls, the
25567 PLT stub needs the TOC register. */
25568 abi_reg = toc_reg;
25569 func_addr = func;
25570 }
25571
25572 /* Create the call. */
25573 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
25574 if (value != NULL_RTX)
25575 call[0] = gen_rtx_SET (value, call[0]);
25576 call[1] = gen_rtx_USE (VOIDmode, cookie);
25577 n_call = 2;
25578
25579 if (toc_load)
25580 call[n_call++] = toc_load;
25581 if (toc_restore)
25582 call[n_call++] = toc_restore;
25583
25584 call[n_call++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
25585
25586 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
25587 insn = emit_call_insn (insn);
25588
25589 /* Mention all registers defined by the ABI to hold information
25590 as uses in CALL_INSN_FUNCTION_USAGE. */
25591 if (abi_reg)
25592 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
25593 }
25594
25595 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
25596
25597 void
25598 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
25599 {
25600 rtx call[2];
25601 rtx insn;
25602 rtx r12 = NULL_RTX;
25603 rtx func_addr = func_desc;
25604
25605 gcc_assert (INTVAL (cookie) == 0);
25606
25607 if (global_tlsarg)
25608 tlsarg = global_tlsarg;
25609
25610 /* For ELFv2, r12 and CTR need to hold the function address
25611 for an indirect call. */
25612 if (GET_CODE (func_desc) != SYMBOL_REF && DEFAULT_ABI == ABI_ELFv2)
25613 {
25614 r12 = gen_rtx_REG (Pmode, 12);
25615 emit_move_insn (r12, func_desc);
25616 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
25617 emit_move_insn (func_addr, r12);
25618 }
25619
25620 /* Create the call. */
25621 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
25622 if (value != NULL_RTX)
25623 call[0] = gen_rtx_SET (value, call[0]);
25624
25625 call[1] = simple_return_rtx;
25626
25627 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
25628 insn = emit_call_insn (insn);
25629
25630 /* Note use of the TOC register. */
25631 if (!rs6000_pcrel_p ())
25632 use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
25633 gen_rtx_REG (Pmode, TOC_REGNUM));
25634
25635 /* Note use of r12. */
25636 if (r12)
25637 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r12);
25638 }
25639
25640 /* Expand code to perform a call under the SYSV4 ABI. */
25641
25642 void
25643 rs6000_call_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
25644 {
25645 rtx func = func_desc;
25646 rtx func_addr;
25647 rtx call[4];
25648 rtx insn;
25649 rtx abi_reg = NULL_RTX;
25650 int n;
25651
25652 if (global_tlsarg)
25653 tlsarg = global_tlsarg;
25654
25655 /* Handle longcall attributes. */
25656 if ((INTVAL (cookie) & CALL_LONG) != 0
25657 && GET_CODE (func_desc) == SYMBOL_REF)
25658 {
25659 func = rs6000_longcall_ref (func_desc, tlsarg);
25660 /* If the longcall was implemented as an inline PLT call using
25661 PLT unspecs then func will be REG:r11. If not, func will be
25662 a pseudo reg. The inline PLT call sequence supports lazy
25663 linking (and longcalls to functions in dlopen'd libraries).
25664 The other style of longcalls don't. The lazy linking entry
25665 to the dynamic symbol resolver requires r11 be the function
25666 address (as it is for linker generated PLT stubs). Ensure
25667 r11 stays valid to the bctrl by marking r11 used by the call. */
25668 if (TARGET_PLTSEQ)
25669 abi_reg = func;
25670 }
25671
25672 /* Handle indirect calls. */
25673 if (GET_CODE (func) != SYMBOL_REF)
25674 {
25675 func = force_reg (Pmode, func);
25676
25677 /* Indirect calls via CTR are strongly preferred over indirect
25678 calls via LR, so move the address there. That can't be left
25679 to reload because we want to mark every instruction in an
25680 inline PLT call sequence with a reloc, enabling the linker to
25681 edit the sequence back to a direct call when that makes sense. */
25682 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
25683 if (abi_reg)
25684 {
25685 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
25686 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25687 emit_insn (gen_rtx_SET (func_addr, mark_func));
25688 v = gen_rtvec (2, func_addr, func_desc);
25689 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25690 }
25691 else
25692 emit_move_insn (func_addr, func);
25693 }
25694 else
25695 func_addr = func;
25696
25697 /* Create the call. */
25698 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
25699 if (value != NULL_RTX)
25700 call[0] = gen_rtx_SET (value, call[0]);
25701
25702 call[1] = gen_rtx_USE (VOIDmode, cookie);
25703 n = 2;
25704 if (TARGET_SECURE_PLT
25705 && flag_pic
25706 && GET_CODE (func_addr) == SYMBOL_REF
25707 && !SYMBOL_REF_LOCAL_P (func_addr))
25708 call[n++] = gen_rtx_USE (VOIDmode, pic_offset_table_rtx);
25709
25710 call[n++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
25711
25712 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n, call));
25713 insn = emit_call_insn (insn);
25714 if (abi_reg)
25715 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
25716 }
25717
25718 /* Expand code to perform a sibling call under the SysV4 ABI. */
25719
25720 void
25721 rs6000_sibcall_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
25722 {
25723 rtx func = func_desc;
25724 rtx func_addr;
25725 rtx call[3];
25726 rtx insn;
25727 rtx abi_reg = NULL_RTX;
25728
25729 if (global_tlsarg)
25730 tlsarg = global_tlsarg;
25731
25732 /* Handle longcall attributes. */
25733 if ((INTVAL (cookie) & CALL_LONG) != 0
25734 && GET_CODE (func_desc) == SYMBOL_REF)
25735 {
25736 func = rs6000_longcall_ref (func_desc, tlsarg);
25737 /* If the longcall was implemented as an inline PLT call using
25738 PLT unspecs then func will be REG:r11. If not, func will be
25739 a pseudo reg. The inline PLT call sequence supports lazy
25740 linking (and longcalls to functions in dlopen'd libraries).
25741 The other style of longcalls don't. The lazy linking entry
25742 to the dynamic symbol resolver requires r11 be the function
25743 address (as it is for linker generated PLT stubs). Ensure
25744 r11 stays valid to the bctr by marking r11 used by the call. */
25745 if (TARGET_PLTSEQ)
25746 abi_reg = func;
25747 }
25748
25749 /* Handle indirect calls. */
25750 if (GET_CODE (func) != SYMBOL_REF)
25751 {
25752 func = force_reg (Pmode, func);
25753
25754 /* Indirect sibcalls must go via CTR. That can't be left to
25755 reload because we want to mark every instruction in an inline
25756 PLT call sequence with a reloc, enabling the linker to edit
25757 the sequence back to a direct call when that makes sense. */
25758 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
25759 if (abi_reg)
25760 {
25761 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
25762 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25763 emit_insn (gen_rtx_SET (func_addr, mark_func));
25764 v = gen_rtvec (2, func_addr, func_desc);
25765 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25766 }
25767 else
25768 emit_move_insn (func_addr, func);
25769 }
25770 else
25771 func_addr = func;
25772
25773 /* Create the call. */
25774 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
25775 if (value != NULL_RTX)
25776 call[0] = gen_rtx_SET (value, call[0]);
25777
25778 call[1] = gen_rtx_USE (VOIDmode, cookie);
25779 call[2] = simple_return_rtx;
25780
25781 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
25782 insn = emit_call_insn (insn);
25783 if (abi_reg)
25784 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
25785 }
25786
25787 #if TARGET_MACHO
25788
25789 /* Expand code to perform a call under the Darwin ABI.
25790 Modulo handling of mlongcall, this is much the same as sysv.
25791 if/when the longcall optimisation is removed, we could drop this
25792 code and use the sysv case (taking care to avoid the tls stuff).
25793
25794 We can use this for sibcalls too, if needed. */
25795
25796 void
25797 rs6000_call_darwin_1 (rtx value, rtx func_desc, rtx tlsarg,
25798 rtx cookie, bool sibcall)
25799 {
25800 rtx func = func_desc;
25801 rtx func_addr;
25802 rtx call[3];
25803 rtx insn;
25804 int cookie_val = INTVAL (cookie);
25805 bool make_island = false;
25806
25807 /* Handle longcall attributes, there are two cases for Darwin:
25808 1) Newer linkers are capable of synthesising any branch islands needed.
25809 2) We need a helper branch island synthesised by the compiler.
25810 The second case has mostly been retired and we don't use it for m64.
25811 In fact, it's is an optimisation, we could just indirect as sysv does..
25812 ... however, backwards compatibility for now.
25813 If we're going to use this, then we need to keep the CALL_LONG bit set,
25814 so that we can pick up the special insn form later. */
25815 if ((cookie_val & CALL_LONG) != 0
25816 && GET_CODE (func_desc) == SYMBOL_REF)
25817 {
25818 /* FIXME: the longcall opt should not hang off this flag, it is most
25819 likely incorrect for kernel-mode code-generation. */
25820 if (darwin_symbol_stubs && TARGET_32BIT)
25821 make_island = true; /* Do nothing yet, retain the CALL_LONG flag. */
25822 else
25823 {
25824 /* The linker is capable of doing this, but the user explicitly
25825 asked for -mlongcall, so we'll do the 'normal' version. */
25826 func = rs6000_longcall_ref (func_desc, NULL_RTX);
25827 cookie_val &= ~CALL_LONG; /* Handled, zap it. */
25828 }
25829 }
25830
25831 /* Handle indirect calls. */
25832 if (GET_CODE (func) != SYMBOL_REF)
25833 {
25834 func = force_reg (Pmode, func);
25835
25836 /* Indirect calls via CTR are strongly preferred over indirect
25837 calls via LR, and are required for indirect sibcalls, so move
25838 the address there. */
25839 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
25840 emit_move_insn (func_addr, func);
25841 }
25842 else
25843 func_addr = func;
25844
25845 /* Create the call. */
25846 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
25847 if (value != NULL_RTX)
25848 call[0] = gen_rtx_SET (value, call[0]);
25849
25850 call[1] = gen_rtx_USE (VOIDmode, GEN_INT (cookie_val));
25851
25852 if (sibcall)
25853 call[2] = simple_return_rtx;
25854 else
25855 call[2] = gen_hard_reg_clobber (Pmode, LR_REGNO);
25856
25857 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
25858 insn = emit_call_insn (insn);
25859 /* Now we have the debug info in the insn, we can set up the branch island
25860 if we're using one. */
25861 if (make_island)
25862 {
25863 tree funname = get_identifier (XSTR (func_desc, 0));
25864
25865 if (no_previous_def (funname))
25866 {
25867 rtx label_rtx = gen_label_rtx ();
25868 char *label_buf, temp_buf[256];
25869 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
25870 CODE_LABEL_NUMBER (label_rtx));
25871 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
25872 tree labelname = get_identifier (label_buf);
25873 add_compiler_branch_island (labelname, funname,
25874 insn_line ((const rtx_insn*)insn));
25875 }
25876 }
25877 }
25878 #endif
25879
25880 void
25881 rs6000_call_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
25882 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
25883 {
25884 #if TARGET_MACHO
25885 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, false);
25886 #else
25887 gcc_unreachable();
25888 #endif
25889 }
25890
25891
25892 void
25893 rs6000_sibcall_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
25894 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
25895 {
25896 #if TARGET_MACHO
25897 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, true);
25898 #else
25899 gcc_unreachable();
25900 #endif
25901 }
25902
25903 /* Return whether we should generate PC-relative code for FNDECL. */
25904 bool
25905 rs6000_fndecl_pcrel_p (const_tree fndecl)
25906 {
25907 if (DEFAULT_ABI != ABI_ELFv2)
25908 return false;
25909
25910 struct cl_target_option *opts = target_opts_for_fn (fndecl);
25911
25912 return ((opts->x_rs6000_isa_flags & OPTION_MASK_PCREL) != 0
25913 && TARGET_CMODEL == CMODEL_MEDIUM);
25914 }
25915
25916 /* Return whether we should generate PC-relative code for *FN. */
25917 bool
25918 rs6000_function_pcrel_p (struct function *fn)
25919 {
25920 if (DEFAULT_ABI != ABI_ELFv2)
25921 return false;
25922
25923 /* Optimize usual case. */
25924 if (fn == cfun)
25925 return ((rs6000_isa_flags & OPTION_MASK_PCREL) != 0
25926 && TARGET_CMODEL == CMODEL_MEDIUM);
25927
25928 return rs6000_fndecl_pcrel_p (fn->decl);
25929 }
25930
25931 /* Return whether we should generate PC-relative code for the current
25932 function. */
25933 bool
25934 rs6000_pcrel_p ()
25935 {
25936 return (DEFAULT_ABI == ABI_ELFv2
25937 && (rs6000_isa_flags & OPTION_MASK_PCREL) != 0
25938 && TARGET_CMODEL == CMODEL_MEDIUM);
25939 }
25940
25941 \f
25942 /* Given an address (ADDR), a mode (MODE), and what the format of the
25943 non-prefixed address (NON_PREFIXED_FORMAT) is, return the instruction format
25944 for the address. */
25945
25946 enum insn_form
25947 address_to_insn_form (rtx addr,
25948 machine_mode mode,
25949 enum non_prefixed_form non_prefixed_format)
25950 {
25951 /* Single register is easy. */
25952 if (REG_P (addr) || SUBREG_P (addr))
25953 return INSN_FORM_BASE_REG;
25954
25955 /* If the non prefixed instruction format doesn't support offset addressing,
25956 make sure only indexed addressing is allowed.
25957
25958 We special case SDmode so that the register allocator does not try to move
25959 SDmode through GPR registers, but instead uses the 32-bit integer load and
25960 store instructions for the floating point registers. */
25961 if (non_prefixed_format == NON_PREFIXED_X || (mode == SDmode && TARGET_DFP))
25962 {
25963 if (GET_CODE (addr) != PLUS)
25964 return INSN_FORM_BAD;
25965
25966 rtx op0 = XEXP (addr, 0);
25967 rtx op1 = XEXP (addr, 1);
25968 if (!REG_P (op0) && !SUBREG_P (op0))
25969 return INSN_FORM_BAD;
25970
25971 if (!REG_P (op1) && !SUBREG_P (op1))
25972 return INSN_FORM_BAD;
25973
25974 return INSN_FORM_X;
25975 }
25976
25977 /* Deal with update forms. */
25978 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
25979 return INSN_FORM_UPDATE;
25980
25981 /* Handle PC-relative symbols and labels. Check for both local and
25982 external symbols. Assume labels are always local. TLS symbols
25983 are not PC-relative for rs6000. */
25984 if (TARGET_PCREL)
25985 {
25986 if (LABEL_REF_P (addr))
25987 return INSN_FORM_PCREL_LOCAL;
25988
25989 if (SYMBOL_REF_P (addr) && !SYMBOL_REF_TLS_MODEL (addr))
25990 {
25991 if (!SYMBOL_REF_LOCAL_P (addr))
25992 return INSN_FORM_PCREL_EXTERNAL;
25993 else
25994 return INSN_FORM_PCREL_LOCAL;
25995 }
25996 }
25997
25998 if (GET_CODE (addr) == CONST)
25999 addr = XEXP (addr, 0);
26000
26001 /* Recognize LO_SUM addresses used with TOC and 32-bit addressing. */
26002 if (GET_CODE (addr) == LO_SUM)
26003 return INSN_FORM_LO_SUM;
26004
26005 /* Everything below must be an offset address of some form. */
26006 if (GET_CODE (addr) != PLUS)
26007 return INSN_FORM_BAD;
26008
26009 rtx op0 = XEXP (addr, 0);
26010 rtx op1 = XEXP (addr, 1);
26011
26012 /* Check for indexed addresses. */
26013 if (REG_P (op1) || SUBREG_P (op1))
26014 {
26015 if (REG_P (op0) || SUBREG_P (op0))
26016 return INSN_FORM_X;
26017
26018 return INSN_FORM_BAD;
26019 }
26020
26021 if (!CONST_INT_P (op1))
26022 return INSN_FORM_BAD;
26023
26024 HOST_WIDE_INT offset = INTVAL (op1);
26025 if (!SIGNED_INTEGER_34BIT_P (offset))
26026 return INSN_FORM_BAD;
26027
26028 /* Check for local and external PC-relative addresses. Labels are always
26029 local. TLS symbols are not PC-relative for rs6000. */
26030 if (TARGET_PCREL)
26031 {
26032 if (LABEL_REF_P (op0))
26033 return INSN_FORM_PCREL_LOCAL;
26034
26035 if (SYMBOL_REF_P (op0) && !SYMBOL_REF_TLS_MODEL (op0))
26036 {
26037 if (!SYMBOL_REF_LOCAL_P (op0))
26038 return INSN_FORM_PCREL_EXTERNAL;
26039 else
26040 return INSN_FORM_PCREL_LOCAL;
26041 }
26042 }
26043
26044 /* If it isn't PC-relative, the address must use a base register. */
26045 if (!REG_P (op0) && !SUBREG_P (op0))
26046 return INSN_FORM_BAD;
26047
26048 /* Large offsets must be prefixed. */
26049 if (!SIGNED_INTEGER_16BIT_P (offset))
26050 {
26051 if (TARGET_PREFIXED)
26052 return INSN_FORM_PREFIXED_NUMERIC;
26053
26054 return INSN_FORM_BAD;
26055 }
26056
26057 /* We have a 16-bit offset, see what default instruction format to use. */
26058 if (non_prefixed_format == NON_PREFIXED_DEFAULT)
26059 {
26060 unsigned size = GET_MODE_SIZE (mode);
26061
26062 /* On 64-bit systems, assume 64-bit integers need to use DS form
26063 addresses (for LD/STD). VSX vectors need to use DQ form addresses
26064 (for LXV and STXV). TImode is problematical in that its normal usage
26065 is expected to be GPRs where it wants a DS instruction format, but if
26066 it goes into the vector registers, it wants a DQ instruction
26067 format. */
26068 if (TARGET_POWERPC64 && size >= 8 && GET_MODE_CLASS (mode) == MODE_INT)
26069 non_prefixed_format = NON_PREFIXED_DS;
26070
26071 else if (TARGET_VSX && size >= 16
26072 && (VECTOR_MODE_P (mode) || VECTOR_ALIGNMENT_P (mode)))
26073 non_prefixed_format = NON_PREFIXED_DQ;
26074
26075 else
26076 non_prefixed_format = NON_PREFIXED_D;
26077 }
26078
26079 /* Classify the D/DS/DQ-form addresses. */
26080 switch (non_prefixed_format)
26081 {
26082 /* Instruction format D, all 16 bits are valid. */
26083 case NON_PREFIXED_D:
26084 return INSN_FORM_D;
26085
26086 /* Instruction format DS, bottom 2 bits must be 0. */
26087 case NON_PREFIXED_DS:
26088 if ((offset & 3) == 0)
26089 return INSN_FORM_DS;
26090
26091 else if (TARGET_PREFIXED)
26092 return INSN_FORM_PREFIXED_NUMERIC;
26093
26094 else
26095 return INSN_FORM_BAD;
26096
26097 /* Instruction format DQ, bottom 4 bits must be 0. */
26098 case NON_PREFIXED_DQ:
26099 if ((offset & 15) == 0)
26100 return INSN_FORM_DQ;
26101
26102 else if (TARGET_PREFIXED)
26103 return INSN_FORM_PREFIXED_NUMERIC;
26104
26105 else
26106 return INSN_FORM_BAD;
26107
26108 default:
26109 break;
26110 }
26111
26112 return INSN_FORM_BAD;
26113 }
26114
26115 /* Given address rtx ADDR for a load of MODE, is this legitimate for a
26116 non-prefixed D-form or X-form instruction? NON_PREFIXED_FORMAT is
26117 given NON_PREFIXED_D or NON_PREFIXED_DS to indicate whether we want
26118 a D-form or DS-form instruction. X-form and base_reg are always
26119 allowed. */
26120 bool
26121 address_is_non_pfx_d_or_x (rtx addr, machine_mode mode,
26122 enum non_prefixed_form non_prefixed_format)
26123 {
26124 enum insn_form result_form;
26125
26126 result_form = address_to_insn_form (addr, mode, non_prefixed_format);
26127
26128 switch (non_prefixed_format)
26129 {
26130 case NON_PREFIXED_D:
26131 switch (result_form)
26132 {
26133 case INSN_FORM_X:
26134 case INSN_FORM_D:
26135 case INSN_FORM_DS:
26136 case INSN_FORM_BASE_REG:
26137 return true;
26138 default:
26139 return false;
26140 }
26141 break;
26142 case NON_PREFIXED_DS:
26143 switch (result_form)
26144 {
26145 case INSN_FORM_X:
26146 case INSN_FORM_DS:
26147 case INSN_FORM_BASE_REG:
26148 return true;
26149 default:
26150 return false;
26151 }
26152 break;
26153 default:
26154 break;
26155 }
26156 return false;
26157 }
26158
26159 /* Return true if an REG with a given MODE is loaded from or stored into a MEM
26160 location uses a non-prefixed D/DS/DQ-form address. This is used to validate
26161 the load or store with the PCREL_OPT optimization to make sure it is an
26162 instruction that can be optimized.
26163
26164 We need to specify the MODE separately from the REG to allow for loads that
26165 include zero/sign/float extension. */
26166
26167 bool
26168 pcrel_opt_valid_mem_p (rtx reg, machine_mode mode, rtx mem)
26169 {
26170 /* If the instruction is indexed only like LFIWAX/LXSIWAX we cannot do the
26171 PCREL_OPT optimization. */
26172 enum non_prefixed_form non_prefixed = reg_to_non_prefixed (reg, mode);
26173 if (non_prefixed == NON_PREFIXED_X)
26174 return false;
26175
26176 /* Check if this is a non-prefixed D/DS/DQ-form instruction. */
26177 rtx addr = XEXP (mem, 0);
26178 enum insn_form iform = address_to_insn_form (addr, mode, non_prefixed);
26179 return (iform == INSN_FORM_BASE_REG
26180 || iform == INSN_FORM_D
26181 || iform == INSN_FORM_DS
26182 || iform == INSN_FORM_DQ);
26183 }
26184
26185 /* Helper function to see if we're potentially looking at lfs/stfs.
26186 - PARALLEL containing a SET and a CLOBBER
26187 - stfs:
26188 - SET is from UNSPEC_SI_FROM_SF to MEM:SI
26189 - CLOBBER is a V4SF
26190 - lfs:
26191 - SET is from UNSPEC_SF_FROM_SI to REG:SF
26192 - CLOBBER is a DI
26193 */
26194
26195 static bool
26196 is_lfs_stfs_insn (rtx_insn *insn)
26197 {
26198 rtx pattern = PATTERN (insn);
26199 if (GET_CODE (pattern) != PARALLEL)
26200 return false;
26201
26202 /* This should be a parallel with exactly one set and one clobber. */
26203 if (XVECLEN (pattern, 0) != 2)
26204 return false;
26205
26206 rtx set = XVECEXP (pattern, 0, 0);
26207 if (GET_CODE (set) != SET)
26208 return false;
26209
26210 rtx clobber = XVECEXP (pattern, 0, 1);
26211 if (GET_CODE (clobber) != CLOBBER)
26212 return false;
26213
26214 /* All we care is that the destination of the SET is a mem:SI,
26215 the source should be an UNSPEC_SI_FROM_SF, and the clobber
26216 should be a scratch:V4SF. */
26217
26218 rtx dest = SET_DEST (set);
26219 rtx src = SET_SRC (set);
26220 rtx scratch = SET_DEST (clobber);
26221
26222 if (GET_CODE (src) != UNSPEC)
26223 return false;
26224
26225 /* stfs case. */
26226 if (XINT (src, 1) == UNSPEC_SI_FROM_SF
26227 && GET_CODE (dest) == MEM && GET_MODE (dest) == SImode
26228 && GET_CODE (scratch) == SCRATCH && GET_MODE (scratch) == V4SFmode)
26229 return true;
26230
26231 /* lfs case. */
26232 if (XINT (src, 1) == UNSPEC_SF_FROM_SI
26233 && GET_CODE (dest) == REG && GET_MODE (dest) == SFmode
26234 && GET_CODE (scratch) == SCRATCH && GET_MODE (scratch) == DImode)
26235 return true;
26236
26237 return false;
26238 }
26239
26240 /* Helper function to take a REG and a MODE and turn it into the non-prefixed
26241 instruction format (D/DS/DQ) used for offset memory. */
26242
26243 enum non_prefixed_form
26244 reg_to_non_prefixed (rtx reg, machine_mode mode)
26245 {
26246 /* If it isn't a register, use the defaults. */
26247 if (!REG_P (reg) && !SUBREG_P (reg))
26248 return NON_PREFIXED_DEFAULT;
26249
26250 unsigned int r = reg_or_subregno (reg);
26251
26252 /* If we have a pseudo, use the default instruction format. */
26253 if (!HARD_REGISTER_NUM_P (r))
26254 return NON_PREFIXED_DEFAULT;
26255
26256 unsigned size = GET_MODE_SIZE (mode);
26257
26258 /* FPR registers use D-mode for scalars, and DQ-mode for vectors, IEEE
26259 128-bit floating point, and 128-bit integers. Before power9, only indexed
26260 addressing was available for vectors. */
26261 if (FP_REGNO_P (r))
26262 {
26263 if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
26264 return NON_PREFIXED_D;
26265
26266 else if (size < 8)
26267 return NON_PREFIXED_X;
26268
26269 else if (TARGET_VSX && size >= 16
26270 && (VECTOR_MODE_P (mode)
26271 || VECTOR_ALIGNMENT_P (mode)
26272 || mode == TImode || mode == CTImode))
26273 return (TARGET_P9_VECTOR) ? NON_PREFIXED_DQ : NON_PREFIXED_X;
26274
26275 else
26276 return NON_PREFIXED_DEFAULT;
26277 }
26278
26279 /* Altivec registers use DS-mode for scalars, and DQ-mode for vectors, IEEE
26280 128-bit floating point, and 128-bit integers. Before power9, only indexed
26281 addressing was available. */
26282 else if (ALTIVEC_REGNO_P (r))
26283 {
26284 if (!TARGET_P9_VECTOR)
26285 return NON_PREFIXED_X;
26286
26287 if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
26288 return NON_PREFIXED_DS;
26289
26290 else if (size < 8)
26291 return NON_PREFIXED_X;
26292
26293 else if (TARGET_VSX && size >= 16
26294 && (VECTOR_MODE_P (mode)
26295 || VECTOR_ALIGNMENT_P (mode)
26296 || mode == TImode || mode == CTImode))
26297 return NON_PREFIXED_DQ;
26298
26299 else
26300 return NON_PREFIXED_DEFAULT;
26301 }
26302
26303 /* GPR registers use DS-mode for 64-bit items on 64-bit systems, and D-mode
26304 otherwise. Assume that any other register, such as LR, CRs, etc. will go
26305 through the GPR registers for memory operations. */
26306 else if (TARGET_POWERPC64 && size >= 8)
26307 return NON_PREFIXED_DS;
26308
26309 return NON_PREFIXED_D;
26310 }
26311
26312 \f
26313 /* Whether a load instruction is a prefixed instruction. This is called from
26314 the prefixed attribute processing. */
26315
26316 bool
26317 prefixed_load_p (rtx_insn *insn)
26318 {
26319 /* Validate the insn to make sure it is a normal load insn. */
26320 extract_insn_cached (insn);
26321 if (recog_data.n_operands < 2)
26322 return false;
26323
26324 rtx reg = recog_data.operand[0];
26325 rtx mem = recog_data.operand[1];
26326
26327 if (!REG_P (reg) && !SUBREG_P (reg))
26328 return false;
26329
26330 if (!MEM_P (mem))
26331 return false;
26332
26333 /* Prefixed load instructions do not support update or indexed forms. */
26334 if (get_attr_indexed (insn) == INDEXED_YES
26335 || get_attr_update (insn) == UPDATE_YES)
26336 return false;
26337
26338 /* LWA uses the DS format instead of the D format that LWZ uses. */
26339 enum non_prefixed_form non_prefixed;
26340 machine_mode reg_mode = GET_MODE (reg);
26341 machine_mode mem_mode = GET_MODE (mem);
26342
26343 if (mem_mode == SImode && reg_mode == DImode
26344 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
26345 non_prefixed = NON_PREFIXED_DS;
26346
26347 else
26348 non_prefixed = reg_to_non_prefixed (reg, mem_mode);
26349
26350 if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn))
26351 return address_is_prefixed (XEXP (mem, 0), mem_mode, NON_PREFIXED_DEFAULT);
26352 else
26353 return address_is_prefixed (XEXP (mem, 0), mem_mode, non_prefixed);
26354 }
26355
26356 /* Whether a store instruction is a prefixed instruction. This is called from
26357 the prefixed attribute processing. */
26358
26359 bool
26360 prefixed_store_p (rtx_insn *insn)
26361 {
26362 /* Validate the insn to make sure it is a normal store insn. */
26363 extract_insn_cached (insn);
26364 if (recog_data.n_operands < 2)
26365 return false;
26366
26367 rtx mem = recog_data.operand[0];
26368 rtx reg = recog_data.operand[1];
26369
26370 if (!REG_P (reg) && !SUBREG_P (reg))
26371 return false;
26372
26373 if (!MEM_P (mem))
26374 return false;
26375
26376 /* Prefixed store instructions do not support update or indexed forms. */
26377 if (get_attr_indexed (insn) == INDEXED_YES
26378 || get_attr_update (insn) == UPDATE_YES)
26379 return false;
26380
26381 machine_mode mem_mode = GET_MODE (mem);
26382 rtx addr = XEXP (mem, 0);
26383 enum non_prefixed_form non_prefixed = reg_to_non_prefixed (reg, mem_mode);
26384
26385 /* Need to make sure we aren't looking at a stfs which doesn't look
26386 like the other things reg_to_non_prefixed/address_is_prefixed
26387 looks for. */
26388 if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn))
26389 return address_is_prefixed (addr, mem_mode, NON_PREFIXED_DEFAULT);
26390 else
26391 return address_is_prefixed (addr, mem_mode, non_prefixed);
26392 }
26393
26394 /* Whether a load immediate or add instruction is a prefixed instruction. This
26395 is called from the prefixed attribute processing. */
26396
26397 bool
26398 prefixed_paddi_p (rtx_insn *insn)
26399 {
26400 rtx set = single_set (insn);
26401 if (!set)
26402 return false;
26403
26404 rtx dest = SET_DEST (set);
26405 rtx src = SET_SRC (set);
26406
26407 if (!REG_P (dest) && !SUBREG_P (dest))
26408 return false;
26409
26410 /* Is this a load immediate that can't be done with a simple ADDI or
26411 ADDIS? */
26412 if (CONST_INT_P (src))
26413 return (satisfies_constraint_eI (src)
26414 && !satisfies_constraint_I (src)
26415 && !satisfies_constraint_L (src));
26416
26417 /* Is this a PADDI instruction that can't be done with a simple ADDI or
26418 ADDIS? */
26419 if (GET_CODE (src) == PLUS)
26420 {
26421 rtx op1 = XEXP (src, 1);
26422
26423 return (CONST_INT_P (op1)
26424 && satisfies_constraint_eI (op1)
26425 && !satisfies_constraint_I (op1)
26426 && !satisfies_constraint_L (op1));
26427 }
26428
26429 /* If not, is it a load of a PC-relative address? */
26430 if (!TARGET_PCREL || GET_MODE (dest) != Pmode)
26431 return false;
26432
26433 if (!SYMBOL_REF_P (src) && !LABEL_REF_P (src) && GET_CODE (src) != CONST)
26434 return false;
26435
26436 enum insn_form iform = address_to_insn_form (src, Pmode,
26437 NON_PREFIXED_DEFAULT);
26438
26439 return (iform == INSN_FORM_PCREL_EXTERNAL || iform == INSN_FORM_PCREL_LOCAL);
26440 }
26441
26442 /* Whether the next instruction needs a 'p' prefix issued before the
26443 instruction is printed out. */
26444 static bool prepend_p_to_next_insn;
26445
26446 /* Define FINAL_PRESCAN_INSN if some processing needs to be done before
26447 outputting the assembler code. On the PowerPC, we remember if the current
26448 insn is a prefixed insn where we need to emit a 'p' before the insn.
26449
26450 In addition, if the insn is part of a PC-relative reference to an external
26451 label optimization, this is recorded also. */
26452 void
26453 rs6000_final_prescan_insn (rtx_insn *insn, rtx [], int)
26454 {
26455 prepend_p_to_next_insn = (get_attr_maybe_prefixed (insn)
26456 == MAYBE_PREFIXED_YES
26457 && get_attr_prefixed (insn) == PREFIXED_YES);
26458 return;
26459 }
26460
26461 /* Define ASM_OUTPUT_OPCODE to do anything special before emitting an opcode.
26462 We use it to emit a 'p' for prefixed insns that is set in
26463 FINAL_PRESCAN_INSN. */
26464 void
26465 rs6000_asm_output_opcode (FILE *stream)
26466 {
26467 if (prepend_p_to_next_insn)
26468 {
26469 fprintf (stream, "p");
26470
26471 /* Reset the flag in the case where there are separate insn lines in the
26472 sequence, so the 'p' is only emitted for the first line. This shows up
26473 when we are doing the PCREL_OPT optimization, in that the label created
26474 with %r<n> would have a leading 'p' printed. */
26475 prepend_p_to_next_insn = false;
26476 }
26477
26478 return;
26479 }
26480
26481 /* Emit the relocation to tie the next instruction to a previous instruction
26482 that loads up an external address. This is used to do the PCREL_OPT
26483 optimization. Note, the label is generated after the PLD of the got
26484 pc-relative address to allow for the assembler to insert NOPs before the PLD
26485 instruction. The operand is a constant integer that is the label
26486 number. */
26487
26488 void
26489 output_pcrel_opt_reloc (rtx label_num)
26490 {
26491 rtx operands[1] = { label_num };
26492 output_asm_insn (".reloc .Lpcrel%0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel%0-8)",
26493 operands);
26494 }
26495
26496 /* Adjust the length of an INSN. LENGTH is the currently-computed length and
26497 should be adjusted to reflect any required changes. This macro is used when
26498 there is some systematic length adjustment required that would be difficult
26499 to express in the length attribute.
26500
26501 In the PowerPC, we use this to adjust the length of an instruction if one or
26502 more prefixed instructions are generated, using the attribute
26503 num_prefixed_insns. A prefixed instruction is 8 bytes instead of 4, but the
26504 hardware requires that a prefied instruciton does not cross a 64-byte
26505 boundary. This means the compiler has to assume the length of the first
26506 prefixed instruction is 12 bytes instead of 8 bytes. Since the length is
26507 already set for the non-prefixed instruction, we just need to udpate for the
26508 difference. */
26509
26510 int
26511 rs6000_adjust_insn_length (rtx_insn *insn, int length)
26512 {
26513 if (TARGET_PREFIXED && NONJUMP_INSN_P (insn))
26514 {
26515 rtx pattern = PATTERN (insn);
26516 if (GET_CODE (pattern) != USE && GET_CODE (pattern) != CLOBBER
26517 && get_attr_prefixed (insn) == PREFIXED_YES)
26518 {
26519 int num_prefixed = get_attr_max_prefixed_insns (insn);
26520 length += 4 * (num_prefixed + 1);
26521 }
26522 }
26523
26524 return length;
26525 }
26526
26527 \f
26528 #ifdef HAVE_GAS_HIDDEN
26529 # define USE_HIDDEN_LINKONCE 1
26530 #else
26531 # define USE_HIDDEN_LINKONCE 0
26532 #endif
26533
26534 /* Fills in the label name that should be used for a 476 link stack thunk. */
26535
26536 void
26537 get_ppc476_thunk_name (char name[32])
26538 {
26539 gcc_assert (TARGET_LINK_STACK);
26540
26541 if (USE_HIDDEN_LINKONCE)
26542 sprintf (name, "__ppc476.get_thunk");
26543 else
26544 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
26545 }
26546
26547 /* This function emits the simple thunk routine that is used to preserve
26548 the link stack on the 476 cpu. */
26549
26550 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
26551 static void
26552 rs6000_code_end (void)
26553 {
26554 char name[32];
26555 tree decl;
26556
26557 if (!TARGET_LINK_STACK)
26558 return;
26559
26560 get_ppc476_thunk_name (name);
26561
26562 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
26563 build_function_type_list (void_type_node, NULL_TREE));
26564 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
26565 NULL_TREE, void_type_node);
26566 TREE_PUBLIC (decl) = 1;
26567 TREE_STATIC (decl) = 1;
26568
26569 #if RS6000_WEAK
26570 if (USE_HIDDEN_LINKONCE && !TARGET_XCOFF)
26571 {
26572 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
26573 targetm.asm_out.unique_section (decl, 0);
26574 switch_to_section (get_named_section (decl, NULL, 0));
26575 DECL_WEAK (decl) = 1;
26576 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
26577 targetm.asm_out.globalize_label (asm_out_file, name);
26578 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
26579 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
26580 }
26581 else
26582 #endif
26583 {
26584 switch_to_section (text_section);
26585 ASM_OUTPUT_LABEL (asm_out_file, name);
26586 }
26587
26588 DECL_INITIAL (decl) = make_node (BLOCK);
26589 current_function_decl = decl;
26590 allocate_struct_function (decl, false);
26591 init_function_start (decl);
26592 first_function_block_is_cold = false;
26593 /* Make sure unwind info is emitted for the thunk if needed. */
26594 final_start_function (emit_barrier (), asm_out_file, 1);
26595
26596 fputs ("\tblr\n", asm_out_file);
26597
26598 final_end_function ();
26599 init_insn_lengths ();
26600 free_after_compilation (cfun);
26601 set_cfun (NULL);
26602 current_function_decl = NULL;
26603 }
26604
26605 /* Add r30 to hard reg set if the prologue sets it up and it is not
26606 pic_offset_table_rtx. */
26607
26608 static void
26609 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
26610 {
26611 if (!TARGET_SINGLE_PIC_BASE
26612 && TARGET_TOC
26613 && TARGET_MINIMAL_TOC
26614 && !constant_pool_empty_p ())
26615 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
26616 if (cfun->machine->split_stack_argp_used)
26617 add_to_hard_reg_set (&set->set, Pmode, 12);
26618
26619 /* Make sure the hard reg set doesn't include r2, which was possibly added
26620 via PIC_OFFSET_TABLE_REGNUM. */
26621 if (TARGET_TOC)
26622 remove_from_hard_reg_set (&set->set, Pmode, TOC_REGNUM);
26623 }
26624
26625 \f
26626 /* Helper function for rs6000_split_logical to emit a logical instruction after
26627 spliting the operation to single GPR registers.
26628
26629 DEST is the destination register.
26630 OP1 and OP2 are the input source registers.
26631 CODE is the base operation (AND, IOR, XOR, NOT).
26632 MODE is the machine mode.
26633 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
26634 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
26635 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
26636
26637 static void
26638 rs6000_split_logical_inner (rtx dest,
26639 rtx op1,
26640 rtx op2,
26641 enum rtx_code code,
26642 machine_mode mode,
26643 bool complement_final_p,
26644 bool complement_op1_p,
26645 bool complement_op2_p)
26646 {
26647 rtx bool_rtx;
26648
26649 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
26650 if (op2 && CONST_INT_P (op2)
26651 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
26652 && !complement_final_p && !complement_op1_p && !complement_op2_p)
26653 {
26654 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
26655 HOST_WIDE_INT value = INTVAL (op2) & mask;
26656
26657 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
26658 if (code == AND)
26659 {
26660 if (value == 0)
26661 {
26662 emit_insn (gen_rtx_SET (dest, const0_rtx));
26663 return;
26664 }
26665
26666 else if (value == mask)
26667 {
26668 if (!rtx_equal_p (dest, op1))
26669 emit_insn (gen_rtx_SET (dest, op1));
26670 return;
26671 }
26672 }
26673
26674 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
26675 into separate ORI/ORIS or XORI/XORIS instrucitons. */
26676 else if (code == IOR || code == XOR)
26677 {
26678 if (value == 0)
26679 {
26680 if (!rtx_equal_p (dest, op1))
26681 emit_insn (gen_rtx_SET (dest, op1));
26682 return;
26683 }
26684 }
26685 }
26686
26687 if (code == AND && mode == SImode
26688 && !complement_final_p && !complement_op1_p && !complement_op2_p)
26689 {
26690 emit_insn (gen_andsi3 (dest, op1, op2));
26691 return;
26692 }
26693
26694 if (complement_op1_p)
26695 op1 = gen_rtx_NOT (mode, op1);
26696
26697 if (complement_op2_p)
26698 op2 = gen_rtx_NOT (mode, op2);
26699
26700 /* For canonical RTL, if only one arm is inverted it is the first. */
26701 if (!complement_op1_p && complement_op2_p)
26702 std::swap (op1, op2);
26703
26704 bool_rtx = ((code == NOT)
26705 ? gen_rtx_NOT (mode, op1)
26706 : gen_rtx_fmt_ee (code, mode, op1, op2));
26707
26708 if (complement_final_p)
26709 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
26710
26711 emit_insn (gen_rtx_SET (dest, bool_rtx));
26712 }
26713
26714 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
26715 operations are split immediately during RTL generation to allow for more
26716 optimizations of the AND/IOR/XOR.
26717
26718 OPERANDS is an array containing the destination and two input operands.
26719 CODE is the base operation (AND, IOR, XOR, NOT).
26720 MODE is the machine mode.
26721 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
26722 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
26723 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
26724 CLOBBER_REG is either NULL or a scratch register of type CC to allow
26725 formation of the AND instructions. */
26726
26727 static void
26728 rs6000_split_logical_di (rtx operands[3],
26729 enum rtx_code code,
26730 bool complement_final_p,
26731 bool complement_op1_p,
26732 bool complement_op2_p)
26733 {
26734 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
26735 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
26736 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
26737 enum hi_lo { hi = 0, lo = 1 };
26738 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
26739 size_t i;
26740
26741 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
26742 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
26743 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
26744 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
26745
26746 if (code == NOT)
26747 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
26748 else
26749 {
26750 if (!CONST_INT_P (operands[2]))
26751 {
26752 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
26753 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
26754 }
26755 else
26756 {
26757 HOST_WIDE_INT value = INTVAL (operands[2]);
26758 HOST_WIDE_INT value_hi_lo[2];
26759
26760 gcc_assert (!complement_final_p);
26761 gcc_assert (!complement_op1_p);
26762 gcc_assert (!complement_op2_p);
26763
26764 value_hi_lo[hi] = value >> 32;
26765 value_hi_lo[lo] = value & lower_32bits;
26766
26767 for (i = 0; i < 2; i++)
26768 {
26769 HOST_WIDE_INT sub_value = value_hi_lo[i];
26770
26771 if (sub_value & sign_bit)
26772 sub_value |= upper_32bits;
26773
26774 op2_hi_lo[i] = GEN_INT (sub_value);
26775
26776 /* If this is an AND instruction, check to see if we need to load
26777 the value in a register. */
26778 if (code == AND && sub_value != -1 && sub_value != 0
26779 && !and_operand (op2_hi_lo[i], SImode))
26780 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
26781 }
26782 }
26783 }
26784
26785 for (i = 0; i < 2; i++)
26786 {
26787 /* Split large IOR/XOR operations. */
26788 if ((code == IOR || code == XOR)
26789 && CONST_INT_P (op2_hi_lo[i])
26790 && !complement_final_p
26791 && !complement_op1_p
26792 && !complement_op2_p
26793 && !logical_const_operand (op2_hi_lo[i], SImode))
26794 {
26795 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
26796 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
26797 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
26798 rtx tmp = gen_reg_rtx (SImode);
26799
26800 /* Make sure the constant is sign extended. */
26801 if ((hi_16bits & sign_bit) != 0)
26802 hi_16bits |= upper_32bits;
26803
26804 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
26805 code, SImode, false, false, false);
26806
26807 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
26808 code, SImode, false, false, false);
26809 }
26810 else
26811 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
26812 code, SImode, complement_final_p,
26813 complement_op1_p, complement_op2_p);
26814 }
26815
26816 return;
26817 }
26818
26819 /* Split the insns that make up boolean operations operating on multiple GPR
26820 registers. The boolean MD patterns ensure that the inputs either are
26821 exactly the same as the output registers, or there is no overlap.
26822
26823 OPERANDS is an array containing the destination and two input operands.
26824 CODE is the base operation (AND, IOR, XOR, NOT).
26825 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
26826 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
26827 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
26828
26829 void
26830 rs6000_split_logical (rtx operands[3],
26831 enum rtx_code code,
26832 bool complement_final_p,
26833 bool complement_op1_p,
26834 bool complement_op2_p)
26835 {
26836 machine_mode mode = GET_MODE (operands[0]);
26837 machine_mode sub_mode;
26838 rtx op0, op1, op2;
26839 int sub_size, regno0, regno1, nregs, i;
26840
26841 /* If this is DImode, use the specialized version that can run before
26842 register allocation. */
26843 if (mode == DImode && !TARGET_POWERPC64)
26844 {
26845 rs6000_split_logical_di (operands, code, complement_final_p,
26846 complement_op1_p, complement_op2_p);
26847 return;
26848 }
26849
26850 op0 = operands[0];
26851 op1 = operands[1];
26852 op2 = (code == NOT) ? NULL_RTX : operands[2];
26853 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
26854 sub_size = GET_MODE_SIZE (sub_mode);
26855 regno0 = REGNO (op0);
26856 regno1 = REGNO (op1);
26857
26858 gcc_assert (reload_completed);
26859 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
26860 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
26861
26862 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
26863 gcc_assert (nregs > 1);
26864
26865 if (op2 && REG_P (op2))
26866 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
26867
26868 for (i = 0; i < nregs; i++)
26869 {
26870 int offset = i * sub_size;
26871 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
26872 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
26873 rtx sub_op2 = ((code == NOT)
26874 ? NULL_RTX
26875 : simplify_subreg (sub_mode, op2, mode, offset));
26876
26877 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
26878 complement_final_p, complement_op1_p,
26879 complement_op2_p);
26880 }
26881
26882 return;
26883 }
26884
26885 /* Emit instructions to move SRC to DST. Called by splitters for
26886 multi-register moves. It will emit at most one instruction for
26887 each register that is accessed; that is, it won't emit li/lis pairs
26888 (or equivalent for 64-bit code). One of SRC or DST must be a hard
26889 register. */
26890
26891 void
26892 rs6000_split_multireg_move (rtx dst, rtx src)
26893 {
26894 /* The register number of the first register being moved. */
26895 int reg;
26896 /* The mode that is to be moved. */
26897 machine_mode mode;
26898 /* The mode that the move is being done in, and its size. */
26899 machine_mode reg_mode;
26900 int reg_mode_size;
26901 /* The number of registers that will be moved. */
26902 int nregs;
26903
26904 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
26905 mode = GET_MODE (dst);
26906 nregs = hard_regno_nregs (reg, mode);
26907
26908 /* If we have a vector quad register for MMA, and this is a load or store,
26909 see if we can use vector paired load/stores. */
26910 if (mode == XOmode && TARGET_MMA
26911 && (MEM_P (dst) || MEM_P (src)))
26912 {
26913 reg_mode = OOmode;
26914 nregs /= 2;
26915 }
26916 /* If we have a vector pair/quad mode, split it into two/four separate
26917 vectors. */
26918 else if (mode == OOmode || mode == XOmode)
26919 reg_mode = V1TImode;
26920 else if (FP_REGNO_P (reg))
26921 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
26922 (TARGET_HARD_FLOAT ? DFmode : SFmode);
26923 else if (ALTIVEC_REGNO_P (reg))
26924 reg_mode = V16QImode;
26925 else
26926 reg_mode = word_mode;
26927 reg_mode_size = GET_MODE_SIZE (reg_mode);
26928
26929 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
26930
26931 /* TDmode residing in FP registers is special, since the ISA requires that
26932 the lower-numbered word of a register pair is always the most significant
26933 word, even in little-endian mode. This does not match the usual subreg
26934 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
26935 the appropriate constituent registers "by hand" in little-endian mode.
26936
26937 Note we do not need to check for destructive overlap here since TDmode
26938 can only reside in even/odd register pairs. */
26939 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
26940 {
26941 rtx p_src, p_dst;
26942 int i;
26943
26944 for (i = 0; i < nregs; i++)
26945 {
26946 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
26947 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
26948 else
26949 p_src = simplify_gen_subreg (reg_mode, src, mode,
26950 i * reg_mode_size);
26951
26952 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
26953 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
26954 else
26955 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
26956 i * reg_mode_size);
26957
26958 emit_insn (gen_rtx_SET (p_dst, p_src));
26959 }
26960
26961 return;
26962 }
26963
26964 /* The __vector_pair and __vector_quad modes are multi-register
26965 modes, so if we have to load or store the registers, we have to be
26966 careful to properly swap them if we're in little endian mode
26967 below. This means the last register gets the first memory
26968 location. We also need to be careful of using the right register
26969 numbers if we are splitting XO to OO. */
26970 if (mode == OOmode || mode == XOmode)
26971 {
26972 nregs = hard_regno_nregs (reg, mode);
26973 int reg_mode_nregs = hard_regno_nregs (reg, reg_mode);
26974 if (MEM_P (dst))
26975 {
26976 unsigned offset = 0;
26977 unsigned size = GET_MODE_SIZE (reg_mode);
26978
26979 /* If we are reading an accumulator register, we have to
26980 deprime it before we can access it. */
26981 if (TARGET_MMA
26982 && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
26983 emit_insn (gen_mma_xxmfacc (src, src));
26984
26985 for (int i = 0; i < nregs; i += reg_mode_nregs)
26986 {
26987 unsigned subreg
26988 = WORDS_BIG_ENDIAN ? i : (nregs - reg_mode_nregs - i);
26989 rtx dst2 = adjust_address (dst, reg_mode, offset);
26990 rtx src2 = gen_rtx_REG (reg_mode, reg + subreg);
26991 offset += size;
26992 emit_insn (gen_rtx_SET (dst2, src2));
26993 }
26994
26995 return;
26996 }
26997
26998 if (MEM_P (src))
26999 {
27000 unsigned offset = 0;
27001 unsigned size = GET_MODE_SIZE (reg_mode);
27002
27003 for (int i = 0; i < nregs; i += reg_mode_nregs)
27004 {
27005 unsigned subreg
27006 = WORDS_BIG_ENDIAN ? i : (nregs - reg_mode_nregs - i);
27007 rtx dst2 = gen_rtx_REG (reg_mode, reg + subreg);
27008 rtx src2 = adjust_address (src, reg_mode, offset);
27009 offset += size;
27010 emit_insn (gen_rtx_SET (dst2, src2));
27011 }
27012
27013 /* If we are writing an accumulator register, we have to
27014 prime it after we've written it. */
27015 if (TARGET_MMA
27016 && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
27017 emit_insn (gen_mma_xxmtacc (dst, dst));
27018
27019 return;
27020 }
27021
27022 if (GET_CODE (src) == UNSPEC
27023 || GET_CODE (src) == UNSPEC_VOLATILE)
27024 {
27025 gcc_assert (XINT (src, 1) == UNSPEC_VSX_ASSEMBLE
27026 || XINT (src, 1) == UNSPECV_MMA_ASSEMBLE);
27027 gcc_assert (REG_P (dst));
27028 if (GET_MODE (src) == XOmode)
27029 gcc_assert (FP_REGNO_P (REGNO (dst)));
27030 if (GET_MODE (src) == OOmode)
27031 gcc_assert (VSX_REGNO_P (REGNO (dst)));
27032
27033 int nvecs = XVECLEN (src, 0);
27034 for (int i = 0; i < nvecs; i++)
27035 {
27036 rtx op;
27037 int regno = reg + i;
27038
27039 if (WORDS_BIG_ENDIAN)
27040 {
27041 op = XVECEXP (src, 0, i);
27042
27043 /* If we are loading an even VSX register and the memory location
27044 is adjacent to the next register's memory location (if any),
27045 then we can load them both with one LXVP instruction. */
27046 if ((regno & 1) == 0)
27047 {
27048 rtx op2 = XVECEXP (src, 0, i + 1);
27049 if (adjacent_mem_locations (op, op2) == op)
27050 {
27051 op = adjust_address (op, OOmode, 0);
27052 /* Skip the next register, since we're going to
27053 load it together with this register. */
27054 i++;
27055 }
27056 }
27057 }
27058 else
27059 {
27060 op = XVECEXP (src, 0, nvecs - i - 1);
27061
27062 /* If we are loading an even VSX register and the memory location
27063 is adjacent to the next register's memory location (if any),
27064 then we can load them both with one LXVP instruction. */
27065 if ((regno & 1) == 0)
27066 {
27067 rtx op2 = XVECEXP (src, 0, nvecs - i - 2);
27068 if (adjacent_mem_locations (op2, op) == op2)
27069 {
27070 op = adjust_address (op2, OOmode, 0);
27071 /* Skip the next register, since we're going to
27072 load it together with this register. */
27073 i++;
27074 }
27075 }
27076 }
27077
27078 rtx dst_i = gen_rtx_REG (GET_MODE (op), regno);
27079 emit_insn (gen_rtx_SET (dst_i, op));
27080 }
27081
27082 /* We are writing an accumulator register, so we have to
27083 prime it after we've written it. */
27084 if (GET_MODE (src) == XOmode)
27085 emit_insn (gen_mma_xxmtacc (dst, dst));
27086
27087 return;
27088 }
27089
27090 /* Register -> register moves can use common code. */
27091 }
27092
27093 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
27094 {
27095 /* If we are reading an accumulator register, we have to
27096 deprime it before we can access it. */
27097 if (TARGET_MMA
27098 && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
27099 emit_insn (gen_mma_xxmfacc (src, src));
27100
27101 /* Move register range backwards, if we might have destructive
27102 overlap. */
27103 int i;
27104 /* XO/OO are opaque so cannot use subregs. */
27105 if (mode == OOmode || mode == XOmode )
27106 {
27107 for (i = nregs - 1; i >= 0; i--)
27108 {
27109 rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + i);
27110 rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + i);
27111 emit_insn (gen_rtx_SET (dst_i, src_i));
27112 }
27113 }
27114 else
27115 {
27116 for (i = nregs - 1; i >= 0; i--)
27117 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
27118 i * reg_mode_size),
27119 simplify_gen_subreg (reg_mode, src, mode,
27120 i * reg_mode_size)));
27121 }
27122
27123 /* If we are writing an accumulator register, we have to
27124 prime it after we've written it. */
27125 if (TARGET_MMA
27126 && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
27127 emit_insn (gen_mma_xxmtacc (dst, dst));
27128 }
27129 else
27130 {
27131 int i;
27132 int j = -1;
27133 bool used_update = false;
27134 rtx restore_basereg = NULL_RTX;
27135
27136 if (MEM_P (src) && INT_REGNO_P (reg))
27137 {
27138 rtx breg;
27139
27140 if (GET_CODE (XEXP (src, 0)) == PRE_INC
27141 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
27142 {
27143 rtx delta_rtx;
27144 breg = XEXP (XEXP (src, 0), 0);
27145 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
27146 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
27147 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
27148 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
27149 src = replace_equiv_address (src, breg);
27150 }
27151 else if (! rs6000_offsettable_memref_p (src, reg_mode, true))
27152 {
27153 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
27154 {
27155 rtx basereg = XEXP (XEXP (src, 0), 0);
27156 if (TARGET_UPDATE)
27157 {
27158 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
27159 emit_insn (gen_rtx_SET (ndst,
27160 gen_rtx_MEM (reg_mode,
27161 XEXP (src, 0))));
27162 used_update = true;
27163 }
27164 else
27165 emit_insn (gen_rtx_SET (basereg,
27166 XEXP (XEXP (src, 0), 1)));
27167 src = replace_equiv_address (src, basereg);
27168 }
27169 else
27170 {
27171 rtx basereg = gen_rtx_REG (Pmode, reg);
27172 emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
27173 src = replace_equiv_address (src, basereg);
27174 }
27175 }
27176
27177 breg = XEXP (src, 0);
27178 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
27179 breg = XEXP (breg, 0);
27180
27181 /* If the base register we are using to address memory is
27182 also a destination reg, then change that register last. */
27183 if (REG_P (breg)
27184 && REGNO (breg) >= REGNO (dst)
27185 && REGNO (breg) < REGNO (dst) + nregs)
27186 j = REGNO (breg) - REGNO (dst);
27187 }
27188 else if (MEM_P (dst) && INT_REGNO_P (reg))
27189 {
27190 rtx breg;
27191
27192 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
27193 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
27194 {
27195 rtx delta_rtx;
27196 breg = XEXP (XEXP (dst, 0), 0);
27197 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
27198 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
27199 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
27200
27201 /* We have to update the breg before doing the store.
27202 Use store with update, if available. */
27203
27204 if (TARGET_UPDATE)
27205 {
27206 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
27207 emit_insn (TARGET_32BIT
27208 ? (TARGET_POWERPC64
27209 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
27210 : gen_movsi_si_update (breg, breg, delta_rtx, nsrc))
27211 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
27212 used_update = true;
27213 }
27214 else
27215 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
27216 dst = replace_equiv_address (dst, breg);
27217 }
27218 else if (!rs6000_offsettable_memref_p (dst, reg_mode, true)
27219 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
27220 {
27221 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
27222 {
27223 rtx basereg = XEXP (XEXP (dst, 0), 0);
27224 if (TARGET_UPDATE)
27225 {
27226 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
27227 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
27228 XEXP (dst, 0)),
27229 nsrc));
27230 used_update = true;
27231 }
27232 else
27233 emit_insn (gen_rtx_SET (basereg,
27234 XEXP (XEXP (dst, 0), 1)));
27235 dst = replace_equiv_address (dst, basereg);
27236 }
27237 else
27238 {
27239 rtx basereg = XEXP (XEXP (dst, 0), 0);
27240 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
27241 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
27242 && REG_P (basereg)
27243 && REG_P (offsetreg)
27244 && REGNO (basereg) != REGNO (offsetreg));
27245 if (REGNO (basereg) == 0)
27246 {
27247 rtx tmp = offsetreg;
27248 offsetreg = basereg;
27249 basereg = tmp;
27250 }
27251 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
27252 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
27253 dst = replace_equiv_address (dst, basereg);
27254 }
27255 }
27256 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
27257 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode, true));
27258 }
27259
27260 /* If we are reading an accumulator register, we have to
27261 deprime it before we can access it. */
27262 if (TARGET_MMA && REG_P (src)
27263 && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
27264 emit_insn (gen_mma_xxmfacc (src, src));
27265
27266 for (i = 0; i < nregs; i++)
27267 {
27268 /* Calculate index to next subword. */
27269 ++j;
27270 if (j == nregs)
27271 j = 0;
27272
27273 /* If compiler already emitted move of first word by
27274 store with update, no need to do anything. */
27275 if (j == 0 && used_update)
27276 continue;
27277
27278 /* XO/OO are opaque so cannot use subregs. */
27279 if (mode == OOmode || mode == XOmode )
27280 {
27281 rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + j);
27282 rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + j);
27283 emit_insn (gen_rtx_SET (dst_i, src_i));
27284 }
27285 else
27286 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
27287 j * reg_mode_size),
27288 simplify_gen_subreg (reg_mode, src, mode,
27289 j * reg_mode_size)));
27290 }
27291
27292 /* If we are writing an accumulator register, we have to
27293 prime it after we've written it. */
27294 if (TARGET_MMA && REG_P (dst)
27295 && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
27296 emit_insn (gen_mma_xxmtacc (dst, dst));
27297
27298 if (restore_basereg != NULL_RTX)
27299 emit_insn (restore_basereg);
27300 }
27301 }
27302 \f
27303 /* Return true if the peephole2 can combine a load involving a combination of
27304 an addis instruction and a load with an offset that can be fused together on
27305 a power8. */
27306
27307 bool
27308 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
27309 rtx addis_value, /* addis value. */
27310 rtx target, /* target register that is loaded. */
27311 rtx mem) /* bottom part of the memory addr. */
27312 {
27313 rtx addr;
27314 rtx base_reg;
27315
27316 /* Validate arguments. */
27317 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
27318 return false;
27319
27320 if (!base_reg_operand (target, GET_MODE (target)))
27321 return false;
27322
27323 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
27324 return false;
27325
27326 /* Allow sign/zero extension. */
27327 if (GET_CODE (mem) == ZERO_EXTEND
27328 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
27329 mem = XEXP (mem, 0);
27330
27331 if (!MEM_P (mem))
27332 return false;
27333
27334 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
27335 return false;
27336
27337 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
27338 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
27339 return false;
27340
27341 /* Validate that the register used to load the high value is either the
27342 register being loaded, or we can safely replace its use.
27343
27344 This function is only called from the peephole2 pass and we assume that
27345 there are 2 instructions in the peephole (addis and load), so we want to
27346 check if the target register was not used in the memory address and the
27347 register to hold the addis result is dead after the peephole. */
27348 if (REGNO (addis_reg) != REGNO (target))
27349 {
27350 if (reg_mentioned_p (target, mem))
27351 return false;
27352
27353 if (!peep2_reg_dead_p (2, addis_reg))
27354 return false;
27355
27356 /* If the target register being loaded is the stack pointer, we must
27357 avoid loading any other value into it, even temporarily. */
27358 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
27359 return false;
27360 }
27361
27362 base_reg = XEXP (addr, 0);
27363 return REGNO (addis_reg) == REGNO (base_reg);
27364 }
27365
27366 /* During the peephole2 pass, adjust and expand the insns for a load fusion
27367 sequence. We adjust the addis register to use the target register. If the
27368 load sign extends, we adjust the code to do the zero extending load, and an
27369 explicit sign extension later since the fusion only covers zero extending
27370 loads.
27371
27372 The operands are:
27373 operands[0] register set with addis (to be replaced with target)
27374 operands[1] value set via addis
27375 operands[2] target register being loaded
27376 operands[3] D-form memory reference using operands[0]. */
27377
27378 void
27379 expand_fusion_gpr_load (rtx *operands)
27380 {
27381 rtx addis_value = operands[1];
27382 rtx target = operands[2];
27383 rtx orig_mem = operands[3];
27384 rtx new_addr, new_mem, orig_addr, offset;
27385 enum rtx_code plus_or_lo_sum;
27386 machine_mode target_mode = GET_MODE (target);
27387 machine_mode extend_mode = target_mode;
27388 machine_mode ptr_mode = Pmode;
27389 enum rtx_code extend = UNKNOWN;
27390
27391 if (GET_CODE (orig_mem) == ZERO_EXTEND
27392 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
27393 {
27394 extend = GET_CODE (orig_mem);
27395 orig_mem = XEXP (orig_mem, 0);
27396 target_mode = GET_MODE (orig_mem);
27397 }
27398
27399 gcc_assert (MEM_P (orig_mem));
27400
27401 orig_addr = XEXP (orig_mem, 0);
27402 plus_or_lo_sum = GET_CODE (orig_addr);
27403 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
27404
27405 offset = XEXP (orig_addr, 1);
27406 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
27407 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
27408
27409 if (extend != UNKNOWN)
27410 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
27411
27412 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
27413 UNSPEC_FUSION_GPR);
27414 emit_insn (gen_rtx_SET (target, new_mem));
27415
27416 if (extend == SIGN_EXTEND)
27417 {
27418 int sub_off = ((BYTES_BIG_ENDIAN)
27419 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
27420 : 0);
27421 rtx sign_reg
27422 = simplify_subreg (target_mode, target, extend_mode, sub_off);
27423
27424 emit_insn (gen_rtx_SET (target,
27425 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
27426 }
27427
27428 return;
27429 }
27430
27431 /* Emit the addis instruction that will be part of a fused instruction
27432 sequence. */
27433
27434 void
27435 emit_fusion_addis (rtx target, rtx addis_value)
27436 {
27437 rtx fuse_ops[10];
27438 const char *addis_str = NULL;
27439
27440 /* Emit the addis instruction. */
27441 fuse_ops[0] = target;
27442 if (satisfies_constraint_L (addis_value))
27443 {
27444 fuse_ops[1] = addis_value;
27445 addis_str = "lis %0,%v1";
27446 }
27447
27448 else if (GET_CODE (addis_value) == PLUS)
27449 {
27450 rtx op0 = XEXP (addis_value, 0);
27451 rtx op1 = XEXP (addis_value, 1);
27452
27453 if (REG_P (op0) && CONST_INT_P (op1)
27454 && satisfies_constraint_L (op1))
27455 {
27456 fuse_ops[1] = op0;
27457 fuse_ops[2] = op1;
27458 addis_str = "addis %0,%1,%v2";
27459 }
27460 }
27461
27462 else if (GET_CODE (addis_value) == HIGH)
27463 {
27464 rtx value = XEXP (addis_value, 0);
27465 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
27466 {
27467 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
27468 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
27469 if (TARGET_ELF)
27470 addis_str = "addis %0,%2,%1@toc@ha";
27471
27472 else if (TARGET_XCOFF)
27473 addis_str = "addis %0,%1@u(%2)";
27474
27475 else
27476 gcc_unreachable ();
27477 }
27478
27479 else if (GET_CODE (value) == PLUS)
27480 {
27481 rtx op0 = XEXP (value, 0);
27482 rtx op1 = XEXP (value, 1);
27483
27484 if (GET_CODE (op0) == UNSPEC
27485 && XINT (op0, 1) == UNSPEC_TOCREL
27486 && CONST_INT_P (op1))
27487 {
27488 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
27489 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
27490 fuse_ops[3] = op1;
27491 if (TARGET_ELF)
27492 addis_str = "addis %0,%2,%1+%3@toc@ha";
27493
27494 else if (TARGET_XCOFF)
27495 addis_str = "addis %0,%1+%3@u(%2)";
27496
27497 else
27498 gcc_unreachable ();
27499 }
27500 }
27501
27502 else if (satisfies_constraint_L (value))
27503 {
27504 fuse_ops[1] = value;
27505 addis_str = "lis %0,%v1";
27506 }
27507
27508 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
27509 {
27510 fuse_ops[1] = value;
27511 addis_str = "lis %0,%1@ha";
27512 }
27513 }
27514
27515 if (!addis_str)
27516 fatal_insn ("Could not generate addis value for fusion", addis_value);
27517
27518 output_asm_insn (addis_str, fuse_ops);
27519 }
27520
27521 /* Emit a D-form load or store instruction that is the second instruction
27522 of a fusion sequence. */
27523
27524 static void
27525 emit_fusion_load (rtx load_reg, rtx addis_reg, rtx offset, const char *insn_str)
27526 {
27527 rtx fuse_ops[10];
27528 char insn_template[80];
27529
27530 fuse_ops[0] = load_reg;
27531 fuse_ops[1] = addis_reg;
27532
27533 if (CONST_INT_P (offset) && satisfies_constraint_I (offset))
27534 {
27535 sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str);
27536 fuse_ops[2] = offset;
27537 output_asm_insn (insn_template, fuse_ops);
27538 }
27539
27540 else if (GET_CODE (offset) == UNSPEC
27541 && XINT (offset, 1) == UNSPEC_TOCREL)
27542 {
27543 if (TARGET_ELF)
27544 sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str);
27545
27546 else if (TARGET_XCOFF)
27547 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
27548
27549 else
27550 gcc_unreachable ();
27551
27552 fuse_ops[2] = XVECEXP (offset, 0, 0);
27553 output_asm_insn (insn_template, fuse_ops);
27554 }
27555
27556 else if (GET_CODE (offset) == PLUS
27557 && GET_CODE (XEXP (offset, 0)) == UNSPEC
27558 && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL
27559 && CONST_INT_P (XEXP (offset, 1)))
27560 {
27561 rtx tocrel_unspec = XEXP (offset, 0);
27562 if (TARGET_ELF)
27563 sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str);
27564
27565 else if (TARGET_XCOFF)
27566 sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str);
27567
27568 else
27569 gcc_unreachable ();
27570
27571 fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0);
27572 fuse_ops[3] = XEXP (offset, 1);
27573 output_asm_insn (insn_template, fuse_ops);
27574 }
27575
27576 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset))
27577 {
27578 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
27579
27580 fuse_ops[2] = offset;
27581 output_asm_insn (insn_template, fuse_ops);
27582 }
27583
27584 else
27585 fatal_insn ("Unable to generate load/store offset for fusion", offset);
27586
27587 return;
27588 }
27589
27590 /* Given an address, convert it into the addis and load offset parts. Addresses
27591 created during the peephole2 process look like:
27592 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
27593 (unspec [(...)] UNSPEC_TOCREL)) */
27594
27595 static void
27596 fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo)
27597 {
27598 rtx hi, lo;
27599
27600 if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
27601 {
27602 hi = XEXP (addr, 0);
27603 lo = XEXP (addr, 1);
27604 }
27605 else
27606 gcc_unreachable ();
27607
27608 *p_hi = hi;
27609 *p_lo = lo;
27610 }
27611
27612 /* Return a string to fuse an addis instruction with a gpr load to the same
27613 register that we loaded up the addis instruction. The address that is used
27614 is the logical address that was formed during peephole2:
27615 (lo_sum (high) (low-part))
27616
27617 The code is complicated, so we call output_asm_insn directly, and just
27618 return "". */
27619
27620 const char *
27621 emit_fusion_gpr_load (rtx target, rtx mem)
27622 {
27623 rtx addis_value;
27624 rtx addr;
27625 rtx load_offset;
27626 const char *load_str = NULL;
27627 machine_mode mode;
27628
27629 if (GET_CODE (mem) == ZERO_EXTEND)
27630 mem = XEXP (mem, 0);
27631
27632 gcc_assert (REG_P (target) && MEM_P (mem));
27633
27634 addr = XEXP (mem, 0);
27635 fusion_split_address (addr, &addis_value, &load_offset);
27636
27637 /* Now emit the load instruction to the same register. */
27638 mode = GET_MODE (mem);
27639 switch (mode)
27640 {
27641 case E_QImode:
27642 load_str = "lbz";
27643 break;
27644
27645 case E_HImode:
27646 load_str = "lhz";
27647 break;
27648
27649 case E_SImode:
27650 case E_SFmode:
27651 load_str = "lwz";
27652 break;
27653
27654 case E_DImode:
27655 case E_DFmode:
27656 gcc_assert (TARGET_POWERPC64);
27657 load_str = "ld";
27658 break;
27659
27660 default:
27661 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem));
27662 }
27663
27664 /* Emit the addis instruction. */
27665 emit_fusion_addis (target, addis_value);
27666
27667 /* Emit the D-form load instruction. */
27668 emit_fusion_load (target, target, load_offset, load_str);
27669
27670 return "";
27671 }
27672 \f
27673
27674 #ifdef RS6000_GLIBC_ATOMIC_FENV
27675 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
27676 static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl;
27677 #endif
27678
27679 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
27680
27681 static void
27682 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
27683 {
27684 if (!TARGET_HARD_FLOAT)
27685 {
27686 #ifdef RS6000_GLIBC_ATOMIC_FENV
27687 if (atomic_hold_decl == NULL_TREE)
27688 {
27689 atomic_hold_decl
27690 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
27691 get_identifier ("__atomic_feholdexcept"),
27692 build_function_type_list (void_type_node,
27693 double_ptr_type_node,
27694 NULL_TREE));
27695 TREE_PUBLIC (atomic_hold_decl) = 1;
27696 DECL_EXTERNAL (atomic_hold_decl) = 1;
27697 }
27698
27699 if (atomic_clear_decl == NULL_TREE)
27700 {
27701 atomic_clear_decl
27702 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
27703 get_identifier ("__atomic_feclearexcept"),
27704 build_function_type_list (void_type_node,
27705 NULL_TREE));
27706 TREE_PUBLIC (atomic_clear_decl) = 1;
27707 DECL_EXTERNAL (atomic_clear_decl) = 1;
27708 }
27709
27710 tree const_double = build_qualified_type (double_type_node,
27711 TYPE_QUAL_CONST);
27712 tree const_double_ptr = build_pointer_type (const_double);
27713 if (atomic_update_decl == NULL_TREE)
27714 {
27715 atomic_update_decl
27716 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
27717 get_identifier ("__atomic_feupdateenv"),
27718 build_function_type_list (void_type_node,
27719 const_double_ptr,
27720 NULL_TREE));
27721 TREE_PUBLIC (atomic_update_decl) = 1;
27722 DECL_EXTERNAL (atomic_update_decl) = 1;
27723 }
27724
27725 tree fenv_var = create_tmp_var_raw (double_type_node);
27726 TREE_ADDRESSABLE (fenv_var) = 1;
27727 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node,
27728 build4 (TARGET_EXPR, double_type_node, fenv_var,
27729 void_node, NULL_TREE, NULL_TREE));
27730
27731 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
27732 *clear = build_call_expr (atomic_clear_decl, 0);
27733 *update = build_call_expr (atomic_update_decl, 1,
27734 fold_convert (const_double_ptr, fenv_addr));
27735 #endif
27736 return;
27737 }
27738
27739 tree mffs = rs6000_builtin_decls[RS6000_BIF_MFFS];
27740 tree mtfsf = rs6000_builtin_decls[RS6000_BIF_MTFSF];
27741 tree call_mffs = build_call_expr (mffs, 0);
27742
27743 /* Generates the equivalent of feholdexcept (&fenv_var)
27744
27745 *fenv_var = __builtin_mffs ();
27746 double fenv_hold;
27747 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
27748 __builtin_mtfsf (0xff, fenv_hold); */
27749
27750 /* Mask to clear everything except for the rounding modes and non-IEEE
27751 arithmetic flag. */
27752 const unsigned HOST_WIDE_INT hold_exception_mask
27753 = HOST_WIDE_INT_C (0xffffffff00000007);
27754
27755 tree fenv_var = create_tmp_var_raw (double_type_node);
27756
27757 tree hold_mffs = build4 (TARGET_EXPR, double_type_node, fenv_var, call_mffs,
27758 NULL_TREE, NULL_TREE);
27759
27760 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
27761 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
27762 build_int_cst (uint64_type_node,
27763 hold_exception_mask));
27764
27765 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
27766 fenv_llu_and);
27767
27768 tree hold_mtfsf = build_call_expr (mtfsf, 2,
27769 build_int_cst (unsigned_type_node, 0xff),
27770 fenv_hold_mtfsf);
27771
27772 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
27773
27774 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
27775
27776 double fenv_clear = __builtin_mffs ();
27777 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
27778 __builtin_mtfsf (0xff, fenv_clear); */
27779
27780 /* Mask to clear everything except for the rounding modes and non-IEEE
27781 arithmetic flag. */
27782 const unsigned HOST_WIDE_INT clear_exception_mask
27783 = HOST_WIDE_INT_C (0xffffffff00000000);
27784
27785 tree fenv_clear = create_tmp_var_raw (double_type_node);
27786
27787 tree clear_mffs = build4 (TARGET_EXPR, double_type_node, fenv_clear,
27788 call_mffs, NULL_TREE, NULL_TREE);
27789
27790 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
27791 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
27792 fenv_clean_llu,
27793 build_int_cst (uint64_type_node,
27794 clear_exception_mask));
27795
27796 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
27797 fenv_clear_llu_and);
27798
27799 tree clear_mtfsf = build_call_expr (mtfsf, 2,
27800 build_int_cst (unsigned_type_node, 0xff),
27801 fenv_clear_mtfsf);
27802
27803 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
27804
27805 /* Generates the equivalent of feupdateenv (&fenv_var)
27806
27807 double old_fenv = __builtin_mffs ();
27808 double fenv_update;
27809 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
27810 (*(uint64_t*)fenv_var 0x1ff80fff);
27811 __builtin_mtfsf (0xff, fenv_update); */
27812
27813 const unsigned HOST_WIDE_INT update_exception_mask
27814 = HOST_WIDE_INT_C (0xffffffff1fffff00);
27815 const unsigned HOST_WIDE_INT new_exception_mask
27816 = HOST_WIDE_INT_C (0x1ff80fff);
27817
27818 tree old_fenv = create_tmp_var_raw (double_type_node);
27819 tree update_mffs = build4 (TARGET_EXPR, double_type_node, old_fenv,
27820 call_mffs, NULL_TREE, NULL_TREE);
27821
27822 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
27823 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
27824 build_int_cst (uint64_type_node,
27825 update_exception_mask));
27826
27827 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
27828 build_int_cst (uint64_type_node,
27829 new_exception_mask));
27830
27831 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
27832 old_llu_and, new_llu_and);
27833
27834 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
27835 new_llu_mask);
27836
27837 tree update_mtfsf = build_call_expr (mtfsf, 2,
27838 build_int_cst (unsigned_type_node, 0xff),
27839 fenv_update_mtfsf);
27840
27841 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
27842 }
27843
27844 void
27845 rs6000_generate_float2_double_code (rtx dst, rtx src1, rtx src2)
27846 {
27847 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
27848
27849 rtx_tmp0 = gen_reg_rtx (V2DFmode);
27850 rtx_tmp1 = gen_reg_rtx (V2DFmode);
27851
27852 /* The destination of the vmrgew instruction layout is:
27853 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
27854 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
27855 vmrgew instruction will be correct. */
27856 if (BYTES_BIG_ENDIAN)
27857 {
27858 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp0, src1, src2,
27859 GEN_INT (0)));
27860 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp1, src1, src2,
27861 GEN_INT (3)));
27862 }
27863 else
27864 {
27865 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (3)));
27866 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (0)));
27867 }
27868
27869 rtx_tmp2 = gen_reg_rtx (V4SFmode);
27870 rtx_tmp3 = gen_reg_rtx (V4SFmode);
27871
27872 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp2, rtx_tmp0));
27873 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp3, rtx_tmp1));
27874
27875 if (BYTES_BIG_ENDIAN)
27876 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
27877 else
27878 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
27879 }
27880
27881 void
27882 rs6000_generate_float2_code (bool signed_convert, rtx dst, rtx src1, rtx src2)
27883 {
27884 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
27885
27886 rtx_tmp0 = gen_reg_rtx (V2DImode);
27887 rtx_tmp1 = gen_reg_rtx (V2DImode);
27888
27889 /* The destination of the vmrgew instruction layout is:
27890 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
27891 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
27892 vmrgew instruction will be correct. */
27893 if (BYTES_BIG_ENDIAN)
27894 {
27895 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp0, src1, src2, GEN_INT (0)));
27896 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp1, src1, src2, GEN_INT (3)));
27897 }
27898 else
27899 {
27900 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp0, src1, src2, GEN_INT (3)));
27901 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp1, src1, src2, GEN_INT (0)));
27902 }
27903
27904 rtx_tmp2 = gen_reg_rtx (V4SFmode);
27905 rtx_tmp3 = gen_reg_rtx (V4SFmode);
27906
27907 if (signed_convert)
27908 {
27909 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp2, rtx_tmp0));
27910 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp3, rtx_tmp1));
27911 }
27912 else
27913 {
27914 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp2, rtx_tmp0));
27915 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp3, rtx_tmp1));
27916 }
27917
27918 if (BYTES_BIG_ENDIAN)
27919 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
27920 else
27921 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
27922 }
27923
27924 void
27925 rs6000_generate_vsigned2_code (bool signed_convert, rtx dst, rtx src1,
27926 rtx src2)
27927 {
27928 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
27929
27930 rtx_tmp0 = gen_reg_rtx (V2DFmode);
27931 rtx_tmp1 = gen_reg_rtx (V2DFmode);
27932
27933 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (0)));
27934 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (3)));
27935
27936 rtx_tmp2 = gen_reg_rtx (V4SImode);
27937 rtx_tmp3 = gen_reg_rtx (V4SImode);
27938
27939 if (signed_convert)
27940 {
27941 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp2, rtx_tmp0));
27942 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp3, rtx_tmp1));
27943 }
27944 else
27945 {
27946 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp2, rtx_tmp0));
27947 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp3, rtx_tmp1));
27948 }
27949
27950 emit_insn (gen_p8_vmrgew_v4si (dst, rtx_tmp2, rtx_tmp3));
27951 }
27952
27953 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
27954
27955 static bool
27956 rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode,
27957 optimization_type opt_type)
27958 {
27959 switch (op)
27960 {
27961 case rsqrt_optab:
27962 return (opt_type == OPTIMIZE_FOR_SPEED
27963 && RS6000_RECIP_AUTO_RSQRTE_P (mode1));
27964
27965 default:
27966 return true;
27967 }
27968 }
27969
27970 /* Implement TARGET_CONSTANT_ALIGNMENT. */
27971
27972 static HOST_WIDE_INT
27973 rs6000_constant_alignment (const_tree exp, HOST_WIDE_INT align)
27974 {
27975 if (TREE_CODE (exp) == STRING_CST
27976 && (STRICT_ALIGNMENT || !optimize_size))
27977 return MAX (align, BITS_PER_WORD);
27978 return align;
27979 }
27980
27981 /* Implement TARGET_STARTING_FRAME_OFFSET. */
27982
27983 static HOST_WIDE_INT
27984 rs6000_starting_frame_offset (void)
27985 {
27986 if (FRAME_GROWS_DOWNWARD)
27987 return 0;
27988 return RS6000_STARTING_FRAME_OFFSET;
27989 }
27990 \f
27991
27992 /* On 64-bit Linux and Freebsd systems, possibly switch the long double library
27993 function names from <foo>l to <foo>f128 if the default long double type is
27994 IEEE 128-bit. Typically, with the C and C++ languages, the standard math.h
27995 include file switches the names on systems that support long double as IEEE
27996 128-bit, but that doesn't work if the user uses __builtin_<foo>l directly.
27997 In the future, glibc will export names like __ieee128_sinf128 and we can
27998 switch to using those instead of using sinf128, which pollutes the user's
27999 namespace.
28000
28001 This will switch the names for Fortran math functions as well (which doesn't
28002 use math.h). However, Fortran needs other changes to the compiler and
28003 library before you can switch the real*16 type at compile time.
28004
28005 We use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change this name. We
28006 only do this transformation if the __float128 type is enabled. This
28007 prevents us from doing the transformation on older 32-bit ports that might
28008 have enabled using IEEE 128-bit floating point as the default long double
28009 type. */
28010
28011 static tree
28012 rs6000_mangle_decl_assembler_name (tree decl, tree id)
28013 {
28014 if (TARGET_FLOAT128_TYPE && TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128
28015 && TREE_CODE (decl) == FUNCTION_DECL
28016 && DECL_IS_UNDECLARED_BUILTIN (decl)
28017 && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
28018 {
28019 size_t len = IDENTIFIER_LENGTH (id);
28020 const char *name = IDENTIFIER_POINTER (id);
28021 char *newname = NULL;
28022
28023 /* See if it is one of the built-in functions with an unusual name. */
28024 switch (DECL_FUNCTION_CODE (decl))
28025 {
28026 case BUILT_IN_DREML:
28027 newname = xstrdup ("__remainderieee128");
28028 break;
28029
28030 case BUILT_IN_GAMMAL:
28031 newname = xstrdup ("__lgammaieee128");
28032 break;
28033
28034 case BUILT_IN_GAMMAL_R:
28035 case BUILT_IN_LGAMMAL_R:
28036 newname = xstrdup ("__lgammaieee128_r");
28037 break;
28038
28039 case BUILT_IN_NEXTTOWARD:
28040 newname = xstrdup ("__nexttoward_to_ieee128");
28041 break;
28042
28043 case BUILT_IN_NEXTTOWARDF:
28044 newname = xstrdup ("__nexttowardf_to_ieee128");
28045 break;
28046
28047 case BUILT_IN_NEXTTOWARDL:
28048 newname = xstrdup ("__nexttowardieee128");
28049 break;
28050
28051 case BUILT_IN_POW10L:
28052 newname = xstrdup ("__exp10ieee128");
28053 break;
28054
28055 case BUILT_IN_SCALBL:
28056 newname = xstrdup ("__scalbieee128");
28057 break;
28058
28059 case BUILT_IN_SIGNIFICANDL:
28060 newname = xstrdup ("__significandieee128");
28061 break;
28062
28063 case BUILT_IN_SINCOSL:
28064 newname = xstrdup ("__sincosieee128");
28065 break;
28066
28067 default:
28068 break;
28069 }
28070
28071 /* Update the __builtin_*printf and __builtin_*scanf functions. */
28072 if (!newname)
28073 {
28074 size_t printf_len = strlen ("printf");
28075 size_t scanf_len = strlen ("scanf");
28076
28077 if (len >= printf_len
28078 && strcmp (name + len - printf_len, "printf") == 0)
28079 newname = xasprintf ("__%sieee128", name);
28080
28081 else if (len >= scanf_len
28082 && strcmp (name + len - scanf_len, "scanf") == 0)
28083 newname = xasprintf ("__isoc99_%sieee128", name);
28084
28085 else if (name[len - 1] == 'l')
28086 {
28087 bool uses_ieee128_p = false;
28088 tree type = TREE_TYPE (decl);
28089 machine_mode ret_mode = TYPE_MODE (type);
28090
28091 /* See if the function returns a IEEE 128-bit floating point type or
28092 complex type. */
28093 if (ret_mode == TFmode || ret_mode == TCmode)
28094 uses_ieee128_p = true;
28095 else
28096 {
28097 function_args_iterator args_iter;
28098 tree arg;
28099
28100 /* See if the function passes a IEEE 128-bit floating point type
28101 or complex type. */
28102 FOREACH_FUNCTION_ARGS (type, arg, args_iter)
28103 {
28104 machine_mode arg_mode = TYPE_MODE (arg);
28105 if (arg_mode == TFmode || arg_mode == TCmode)
28106 {
28107 uses_ieee128_p = true;
28108 break;
28109 }
28110 }
28111 }
28112
28113 /* If we passed or returned an IEEE 128-bit floating point type,
28114 change the name. Use __<name>ieee128, instead of <name>l. */
28115 if (uses_ieee128_p)
28116 newname = xasprintf ("__%.*sieee128", (int)(len - 1), name);
28117 }
28118 }
28119
28120 if (newname)
28121 {
28122 if (TARGET_DEBUG_BUILTIN)
28123 fprintf (stderr, "Map %s => %s\n", name, newname);
28124
28125 id = get_identifier (newname);
28126 free (newname);
28127 }
28128 }
28129
28130 return id;
28131 }
28132
28133 /* Predict whether the given loop in gimple will be transformed in the RTL
28134 doloop_optimize pass. */
28135
28136 static bool
28137 rs6000_predict_doloop_p (struct loop *loop)
28138 {
28139 gcc_assert (loop);
28140
28141 /* On rs6000, targetm.can_use_doloop_p is actually
28142 can_use_doloop_if_innermost. Just ensure the loop is innermost. */
28143 if (loop->inner != NULL)
28144 {
28145 if (dump_file && (dump_flags & TDF_DETAILS))
28146 fprintf (dump_file, "Predict doloop failure due to"
28147 " loop nesting.\n");
28148 return false;
28149 }
28150
28151 return true;
28152 }
28153
28154 /* Implement TARGET_PREFERRED_DOLOOP_MODE. */
28155
28156 static machine_mode
28157 rs6000_preferred_doloop_mode (machine_mode)
28158 {
28159 return word_mode;
28160 }
28161
28162 /* Implement TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P. */
28163
28164 static bool
28165 rs6000_cannot_substitute_mem_equiv_p (rtx mem)
28166 {
28167 gcc_assert (MEM_P (mem));
28168
28169 /* curr_insn_transform()'s handling of subregs cannot handle altivec AND:
28170 type addresses, so don't allow MEMs with those address types to be
28171 substituted as an equivalent expression. See PR93974 for details. */
28172 if (GET_CODE (XEXP (mem, 0)) == AND)
28173 return true;
28174
28175 return false;
28176 }
28177
28178 /* Implement TARGET_INVALID_CONVERSION. */
28179
28180 static const char *
28181 rs6000_invalid_conversion (const_tree fromtype, const_tree totype)
28182 {
28183 /* Make sure we're working with the canonical types. */
28184 if (TYPE_CANONICAL (fromtype) != NULL_TREE)
28185 fromtype = TYPE_CANONICAL (fromtype);
28186 if (TYPE_CANONICAL (totype) != NULL_TREE)
28187 totype = TYPE_CANONICAL (totype);
28188
28189 machine_mode frommode = TYPE_MODE (fromtype);
28190 machine_mode tomode = TYPE_MODE (totype);
28191
28192 if (frommode != tomode)
28193 {
28194 /* Do not allow conversions to/from XOmode and OOmode types. */
28195 if (frommode == XOmode)
28196 return N_("invalid conversion from type %<__vector_quad%>");
28197 if (tomode == XOmode)
28198 return N_("invalid conversion to type %<__vector_quad%>");
28199 if (frommode == OOmode)
28200 return N_("invalid conversion from type %<__vector_pair%>");
28201 if (tomode == OOmode)
28202 return N_("invalid conversion to type %<__vector_pair%>");
28203 }
28204 else if (POINTER_TYPE_P (fromtype) && POINTER_TYPE_P (totype))
28205 {
28206 /* We really care about the modes of the base types. */
28207 frommode = TYPE_MODE (TREE_TYPE (fromtype));
28208 tomode = TYPE_MODE (TREE_TYPE (totype));
28209
28210 /* Do not allow conversions to/from XOmode and OOmode pointer
28211 types, except to/from void pointers. */
28212 if (frommode != tomode
28213 && frommode != VOIDmode
28214 && tomode != VOIDmode)
28215 {
28216 if (frommode == XOmode)
28217 return N_("invalid conversion from type %<* __vector_quad%>");
28218 if (tomode == XOmode)
28219 return N_("invalid conversion to type %<* __vector_quad%>");
28220 if (frommode == OOmode)
28221 return N_("invalid conversion from type %<* __vector_pair%>");
28222 if (tomode == OOmode)
28223 return N_("invalid conversion to type %<* __vector_pair%>");
28224 }
28225 }
28226
28227 /* Conversion allowed. */
28228 return NULL;
28229 }
28230
28231 /* Convert a SFmode constant to the integer bit pattern. */
28232
28233 long
28234 rs6000_const_f32_to_i32 (rtx operand)
28235 {
28236 long value;
28237 const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (operand);
28238
28239 gcc_assert (GET_MODE (operand) == SFmode);
28240 REAL_VALUE_TO_TARGET_SINGLE (*rv, value);
28241 return value;
28242 }
28243
28244 void
28245 rs6000_emit_xxspltidp_v2df (rtx dst, long value)
28246 {
28247 if (((value & 0x7F800000) == 0) && ((value & 0x7FFFFF) != 0))
28248 inform (input_location,
28249 "the result for the xxspltidp instruction "
28250 "is undefined for subnormal input values");
28251 emit_insn( gen_xxspltidp_v2df_inst (dst, GEN_INT (value)));
28252 }
28253
28254 /* Implement TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC. */
28255
28256 static bool
28257 rs6000_gen_pic_addr_diff_vec (void)
28258 {
28259 return rs6000_relative_jumptables;
28260 }
28261
28262 void
28263 rs6000_output_addr_vec_elt (FILE *file, int value)
28264 {
28265 const char *directive = TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t";
28266 char buf[100];
28267
28268 fprintf (file, "%s", directive);
28269 ASM_GENERATE_INTERNAL_LABEL (buf, "L", value);
28270 assemble_name (file, buf);
28271 fprintf (file, "\n");
28272 }
28273
28274 \f
28275 /* Copy an integer constant to the vector constant structure. */
28276
28277 static void
28278 constant_int_to_128bit_vector (rtx op,
28279 machine_mode mode,
28280 size_t byte_num,
28281 vec_const_128bit_type *info)
28282 {
28283 unsigned HOST_WIDE_INT uvalue = UINTVAL (op);
28284 unsigned bitsize = GET_MODE_BITSIZE (mode);
28285
28286 for (int shift = bitsize - 8; shift >= 0; shift -= 8)
28287 info->bytes[byte_num++] = (uvalue >> shift) & 0xff;
28288 }
28289
28290 /* Copy a floating point constant to the vector constant structure. */
28291
28292 static void
28293 constant_fp_to_128bit_vector (rtx op,
28294 machine_mode mode,
28295 size_t byte_num,
28296 vec_const_128bit_type *info)
28297 {
28298 unsigned bitsize = GET_MODE_BITSIZE (mode);
28299 unsigned num_words = bitsize / 32;
28300 const REAL_VALUE_TYPE *rtype = CONST_DOUBLE_REAL_VALUE (op);
28301 long real_words[VECTOR_128BIT_WORDS];
28302
28303 /* Make sure we don't overflow the real_words array and that it is
28304 filled completely. */
28305 gcc_assert (num_words <= VECTOR_128BIT_WORDS && (bitsize % 32) == 0);
28306
28307 real_to_target (real_words, rtype, mode);
28308
28309 /* Iterate over each 32-bit word in the floating point constant. The
28310 real_to_target function puts out words in target endian fashion. We need
28311 to arrange the order so that the bytes are written in big endian order. */
28312 for (unsigned num = 0; num < num_words; num++)
28313 {
28314 unsigned endian_num = (BYTES_BIG_ENDIAN
28315 ? num
28316 : num_words - 1 - num);
28317
28318 unsigned uvalue = real_words[endian_num];
28319 for (int shift = 32 - 8; shift >= 0; shift -= 8)
28320 info->bytes[byte_num++] = (uvalue >> shift) & 0xff;
28321 }
28322
28323 /* Mark that this constant involves floating point. */
28324 info->fp_constant_p = true;
28325 }
28326
28327 /* Convert a vector constant OP with mode MODE to a vector 128-bit constant
28328 structure INFO.
28329
28330 Break out the constant out to bytes, half words, words, and double words.
28331 Return true if we have successfully converted the constant.
28332
28333 We handle CONST_INT, CONST_DOUBLE, CONST_VECTOR, and VEC_DUPLICATE of
28334 constants. Integer and floating point scalar constants are splatted to fill
28335 out the vector. */
28336
28337 bool
28338 vec_const_128bit_to_bytes (rtx op,
28339 machine_mode mode,
28340 vec_const_128bit_type *info)
28341 {
28342 /* Initialize the constant structure. */
28343 memset ((void *)info, 0, sizeof (vec_const_128bit_type));
28344
28345 /* Assume CONST_INTs are DImode. */
28346 if (mode == VOIDmode)
28347 mode = CONST_INT_P (op) ? DImode : GET_MODE (op);
28348
28349 if (mode == VOIDmode)
28350 return false;
28351
28352 unsigned size = GET_MODE_SIZE (mode);
28353 bool splat_p = false;
28354
28355 if (size > VECTOR_128BIT_BYTES)
28356 return false;
28357
28358 /* Set up the bits. */
28359 switch (GET_CODE (op))
28360 {
28361 /* Integer constants, default to double word. */
28362 case CONST_INT:
28363 {
28364 constant_int_to_128bit_vector (op, mode, 0, info);
28365 splat_p = true;
28366 break;
28367 }
28368
28369 /* Floating point constants. */
28370 case CONST_DOUBLE:
28371 {
28372 /* Fail if the floating point constant is the wrong mode. */
28373 if (GET_MODE (op) != mode)
28374 return false;
28375
28376 /* SFmode stored as scalars are stored in DFmode format. */
28377 if (mode == SFmode)
28378 {
28379 mode = DFmode;
28380 size = GET_MODE_SIZE (DFmode);
28381 }
28382
28383 constant_fp_to_128bit_vector (op, mode, 0, info);
28384 splat_p = true;
28385 break;
28386 }
28387
28388 /* Vector constants, iterate over each element. On little endian
28389 systems, we have to reverse the element numbers. */
28390 case CONST_VECTOR:
28391 {
28392 /* Fail if the vector constant is the wrong mode or size. */
28393 if (GET_MODE (op) != mode
28394 || GET_MODE_SIZE (mode) != VECTOR_128BIT_BYTES)
28395 return false;
28396
28397 machine_mode ele_mode = GET_MODE_INNER (mode);
28398 size_t ele_size = GET_MODE_SIZE (ele_mode);
28399 size_t nunits = GET_MODE_NUNITS (mode);
28400
28401 for (size_t num = 0; num < nunits; num++)
28402 {
28403 rtx ele = CONST_VECTOR_ELT (op, num);
28404 size_t byte_num = (BYTES_BIG_ENDIAN
28405 ? num
28406 : nunits - 1 - num) * ele_size;
28407
28408 if (CONST_INT_P (ele))
28409 constant_int_to_128bit_vector (ele, ele_mode, byte_num, info);
28410 else if (CONST_DOUBLE_P (ele))
28411 constant_fp_to_128bit_vector (ele, ele_mode, byte_num, info);
28412 else
28413 return false;
28414 }
28415
28416 break;
28417 }
28418
28419 /* Treat VEC_DUPLICATE of a constant just like a vector constant.
28420 Since we are duplicating the element, we don't have to worry about
28421 endian issues. */
28422 case VEC_DUPLICATE:
28423 {
28424 /* Fail if the vector duplicate is the wrong mode or size. */
28425 if (GET_MODE (op) != mode
28426 || GET_MODE_SIZE (mode) != VECTOR_128BIT_BYTES)
28427 return false;
28428
28429 machine_mode ele_mode = GET_MODE_INNER (mode);
28430 size_t ele_size = GET_MODE_SIZE (ele_mode);
28431 rtx ele = XEXP (op, 0);
28432 size_t nunits = GET_MODE_NUNITS (mode);
28433
28434 if (!CONST_INT_P (ele) && !CONST_DOUBLE_P (ele))
28435 return false;
28436
28437 for (size_t num = 0; num < nunits; num++)
28438 {
28439 size_t byte_num = num * ele_size;
28440
28441 if (CONST_INT_P (ele))
28442 constant_int_to_128bit_vector (ele, ele_mode, byte_num, info);
28443 else
28444 constant_fp_to_128bit_vector (ele, ele_mode, byte_num, info);
28445 }
28446
28447 break;
28448 }
28449
28450 /* Any thing else, just return failure. */
28451 default:
28452 return false;
28453 }
28454
28455 /* Splat the constant to fill 128 bits if desired. */
28456 if (splat_p && size < VECTOR_128BIT_BYTES)
28457 {
28458 if ((VECTOR_128BIT_BYTES % size) != 0)
28459 return false;
28460
28461 for (size_t offset = size;
28462 offset < VECTOR_128BIT_BYTES;
28463 offset += size)
28464 memcpy ((void *) &info->bytes[offset],
28465 (void *) &info->bytes[0],
28466 size);
28467 }
28468
28469 /* Remember original size. */
28470 info->original_size = size;
28471
28472 /* Determine if the bytes are all the same. */
28473 unsigned char first_byte = info->bytes[0];
28474 info->all_bytes_same = true;
28475 for (size_t i = 1; i < VECTOR_128BIT_BYTES; i++)
28476 if (first_byte != info->bytes[i])
28477 {
28478 info->all_bytes_same = false;
28479 break;
28480 }
28481
28482 /* Pack half words together & determine if all of the half words are the
28483 same. */
28484 for (size_t i = 0; i < VECTOR_128BIT_HALF_WORDS; i++)
28485 info->half_words[i] = ((info->bytes[i * 2] << 8)
28486 | info->bytes[(i * 2) + 1]);
28487
28488 unsigned short first_hword = info->half_words[0];
28489 info->all_half_words_same = true;
28490 for (size_t i = 1; i < VECTOR_128BIT_HALF_WORDS; i++)
28491 if (first_hword != info->half_words[i])
28492 {
28493 info->all_half_words_same = false;
28494 break;
28495 }
28496
28497 /* Pack words together & determine if all of the words are the same. */
28498 for (size_t i = 0; i < VECTOR_128BIT_WORDS; i++)
28499 info->words[i] = ((info->bytes[i * 4] << 24)
28500 | (info->bytes[(i * 4) + 1] << 16)
28501 | (info->bytes[(i * 4) + 2] << 8)
28502 | info->bytes[(i * 4) + 3]);
28503
28504 info->all_words_same
28505 = (info->words[0] == info->words[1]
28506 && info->words[0] == info->words[1]
28507 && info->words[0] == info->words[2]
28508 && info->words[0] == info->words[3]);
28509
28510 /* Pack double words together & determine if all of the double words are the
28511 same. */
28512 for (size_t i = 0; i < VECTOR_128BIT_DOUBLE_WORDS; i++)
28513 {
28514 unsigned HOST_WIDE_INT d_word = 0;
28515 for (size_t j = 0; j < 8; j++)
28516 d_word = (d_word << 8) | info->bytes[(i * 8) + j];
28517
28518 info->double_words[i] = d_word;
28519 }
28520
28521 info->all_double_words_same
28522 = (info->double_words[0] == info->double_words[1]);
28523
28524 return true;
28525 }
28526
28527 /* Determine if an IEEE 128-bit constant can be loaded with LXVKQ. Return zero
28528 if the LXVKQ instruction cannot be used. Otherwise return the immediate
28529 value to be used with the LXVKQ instruction. */
28530
28531 unsigned
28532 constant_generates_lxvkq (vec_const_128bit_type *vsx_const)
28533 {
28534 /* Is the instruction supported with power10 code generation, IEEE 128-bit
28535 floating point hardware and VSX registers are available. */
28536 if (!TARGET_IEEE128_CONSTANT || !TARGET_FLOAT128_HW || !TARGET_POWER10
28537 || !TARGET_VSX)
28538 return 0;
28539
28540 /* All of the constants that are generated by LXVKQ have the bottom 3 words
28541 that are 0. */
28542 if (vsx_const->words[1] != 0
28543 || vsx_const->words[2] != 0
28544 || vsx_const->words[3] != 0)
28545 return 0;
28546
28547 /* See if we have a match for the first word. */
28548 switch (vsx_const->words[0])
28549 {
28550 case 0x3FFF0000U: return 1; /* IEEE 128-bit +1.0. */
28551 case 0x40000000U: return 2; /* IEEE 128-bit +2.0. */
28552 case 0x40008000U: return 3; /* IEEE 128-bit +3.0. */
28553 case 0x40010000U: return 4; /* IEEE 128-bit +4.0. */
28554 case 0x40014000U: return 5; /* IEEE 128-bit +5.0. */
28555 case 0x40018000U: return 6; /* IEEE 128-bit +6.0. */
28556 case 0x4001C000U: return 7; /* IEEE 128-bit +7.0. */
28557 case 0x7FFF0000U: return 8; /* IEEE 128-bit +Infinity. */
28558 case 0x7FFF8000U: return 9; /* IEEE 128-bit quiet NaN. */
28559 case 0x80000000U: return 16; /* IEEE 128-bit -0.0. */
28560 case 0xBFFF0000U: return 17; /* IEEE 128-bit -1.0. */
28561 case 0xC0000000U: return 18; /* IEEE 128-bit -2.0. */
28562 case 0xC0008000U: return 19; /* IEEE 128-bit -3.0. */
28563 case 0xC0010000U: return 20; /* IEEE 128-bit -4.0. */
28564 case 0xC0014000U: return 21; /* IEEE 128-bit -5.0. */
28565 case 0xC0018000U: return 22; /* IEEE 128-bit -6.0. */
28566 case 0xC001C000U: return 23; /* IEEE 128-bit -7.0. */
28567 case 0xFFFF0000U: return 24; /* IEEE 128-bit -Infinity. */
28568
28569 /* anything else cannot be loaded. */
28570 default:
28571 break;
28572 }
28573
28574 return 0;
28575 }
28576
28577 /* Determine if a vector constant can be loaded with XXSPLTIW. Return zero if
28578 the XXSPLTIW instruction cannot be used. Otherwise return the immediate
28579 value to be used with the XXSPLTIW instruction. */
28580
28581 unsigned
28582 constant_generates_xxspltiw (vec_const_128bit_type *vsx_const)
28583 {
28584 if (!TARGET_SPLAT_WORD_CONSTANT || !TARGET_PREFIXED || !TARGET_VSX)
28585 return 0;
28586
28587 if (!vsx_const->all_words_same)
28588 return 0;
28589
28590 /* If we can use XXSPLTIB, don't generate XXSPLTIW. */
28591 if (vsx_const->all_bytes_same)
28592 return 0;
28593
28594 /* See if we can use VSPLTISH or VSPLTISW. */
28595 if (vsx_const->all_half_words_same)
28596 {
28597 unsigned short h_word = vsx_const->half_words[0];
28598 short sign_h_word = ((h_word & 0xffff) ^ 0x8000) - 0x8000;
28599 if (EASY_VECTOR_15 (sign_h_word))
28600 return 0;
28601 }
28602
28603 unsigned int word = vsx_const->words[0];
28604 int sign_word = ((word & 0xffffffff) ^ 0x80000000) - 0x80000000;
28605 if (EASY_VECTOR_15 (sign_word))
28606 return 0;
28607
28608 return vsx_const->words[0];
28609 }
28610
28611 /* Determine if a vector constant can be loaded with XXSPLTIDP. Return zero if
28612 the XXSPLTIDP instruction cannot be used. Otherwise return the immediate
28613 value to be used with the XXSPLTIDP instruction. */
28614
28615 unsigned
28616 constant_generates_xxspltidp (vec_const_128bit_type *vsx_const)
28617 {
28618 if (!TARGET_SPLAT_FLOAT_CONSTANT || !TARGET_PREFIXED || !TARGET_VSX)
28619 return 0;
28620
28621 /* Reject if the two 64-bit segments are not the same. */
28622 if (!vsx_const->all_double_words_same)
28623 return 0;
28624
28625 /* If the bytes, half words, or words are all the same, don't use XXSPLTIDP.
28626 Use a simpler instruction (XXSPLTIB, VSPLTISB, VSPLTISH, or VSPLTISW). */
28627 if (vsx_const->all_bytes_same
28628 || vsx_const->all_half_words_same
28629 || vsx_const->all_words_same)
28630 return 0;
28631
28632 unsigned HOST_WIDE_INT value = vsx_const->double_words[0];
28633
28634 /* Avoid values that look like DFmode NaN's, except for the normal NaN bit
28635 pattern and the signalling NaN bit pattern. Recognize infinity and
28636 negative infinity. */
28637
28638 /* Bit representation of DFmode normal quiet NaN. */
28639 #define RS6000_CONST_DF_NAN HOST_WIDE_INT_UC (0x7ff8000000000000)
28640
28641 /* Bit representation of DFmode normal signaling NaN. */
28642 #define RS6000_CONST_DF_NANS HOST_WIDE_INT_UC (0x7ff4000000000000)
28643
28644 /* Bit representation of DFmode positive infinity. */
28645 #define RS6000_CONST_DF_INF HOST_WIDE_INT_UC (0x7ff0000000000000)
28646
28647 /* Bit representation of DFmode negative infinity. */
28648 #define RS6000_CONST_DF_NEG_INF HOST_WIDE_INT_UC (0xfff0000000000000)
28649
28650 if (value != RS6000_CONST_DF_NAN
28651 && value != RS6000_CONST_DF_NANS
28652 && value != RS6000_CONST_DF_INF
28653 && value != RS6000_CONST_DF_NEG_INF)
28654 {
28655 /* The IEEE 754 64-bit floating format has 1 bit for sign, 11 bits for
28656 the exponent, and 52 bits for the mantissa (not counting the hidden
28657 bit used for normal numbers). NaN values have the exponent set to all
28658 1 bits, and the mantissa non-zero (mantissa == 0 is infinity). */
28659
28660 int df_exponent = (value >> 52) & 0x7ff;
28661 unsigned HOST_WIDE_INT
28662 df_mantissa = value & ((HOST_WIDE_INT_1U << 52) - HOST_WIDE_INT_1U);
28663
28664 if (df_exponent == 0x7ff && df_mantissa != 0) /* other NaNs. */
28665 return 0;
28666
28667 /* Avoid values that are DFmode subnormal values. Subnormal numbers have
28668 the exponent all 0 bits, and the mantissa non-zero. If the value is
28669 subnormal, then the hidden bit in the mantissa is not set. */
28670 if (df_exponent == 0 && df_mantissa != 0) /* subnormal. */
28671 return 0;
28672 }
28673
28674 /* Change the representation to DFmode constant. */
28675 long df_words[2] = { vsx_const->words[0], vsx_const->words[1] };
28676
28677 /* real_from_target takes the target words in target order. */
28678 if (!BYTES_BIG_ENDIAN)
28679 std::swap (df_words[0], df_words[1]);
28680
28681 REAL_VALUE_TYPE rv_type;
28682 real_from_target (&rv_type, df_words, DFmode);
28683
28684 const REAL_VALUE_TYPE *rv = &rv_type;
28685
28686 /* Validate that the number can be stored as a SFmode value. */
28687 if (!exact_real_truncate (SFmode, rv))
28688 return 0;
28689
28690 /* Validate that the number is not a SFmode subnormal value (exponent is 0,
28691 mantissa field is non-zero) which is undefined for the XXSPLTIDP
28692 instruction. */
28693 long sf_value;
28694 real_to_target (&sf_value, rv, SFmode);
28695
28696 /* IEEE 754 32-bit values have 1 bit for the sign, 8 bits for the exponent,
28697 and 23 bits for the mantissa. Subnormal numbers have the exponent all
28698 0 bits, and the mantissa non-zero. */
28699 long sf_exponent = (sf_value >> 23) & 0xFF;
28700 long sf_mantissa = sf_value & 0x7FFFFF;
28701
28702 if (sf_exponent == 0 && sf_mantissa != 0)
28703 return 0;
28704
28705 /* Return the immediate to be used. */
28706 return sf_value;
28707 }
28708
28709 \f
28710 struct gcc_target targetm = TARGET_INITIALIZER;
28711
28712 #include "gt-rs6000.h"
This page took 1.347801 seconds and 5 git commands to generate.