]> gcc.gnu.org Git - gcc.git/blob - gcc/config/rs6000/rs6000.cc
rs6000: Fix bootstrap
[gcc.git] / gcc / config / rs6000 / rs6000.cc
1 // SPDX-License-Identifier: GPL-3.0-or-later
2 /* Subroutines used for code generation on IBM RS/6000.
3 Copyright (C) 1991-2022 Free Software Foundation, Inc.
4 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published
10 by the Free Software Foundation; either version 3, or (at your
11 option) any later version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
16 License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #define IN_TARGET_CODE 1
23
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "backend.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "memmodel.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "cfgloop.h"
34 #include "df.h"
35 #include "tm_p.h"
36 #include "stringpool.h"
37 #include "expmed.h"
38 #include "optabs.h"
39 #include "regs.h"
40 #include "ira.h"
41 #include "recog.h"
42 #include "cgraph.h"
43 #include "diagnostic-core.h"
44 #include "insn-attr.h"
45 #include "flags.h"
46 #include "alias.h"
47 #include "fold-const.h"
48 #include "attribs.h"
49 #include "stor-layout.h"
50 #include "calls.h"
51 #include "print-tree.h"
52 #include "varasm.h"
53 #include "explow.h"
54 #include "expr.h"
55 #include "output.h"
56 #include "common/common-target.h"
57 #include "langhooks.h"
58 #include "reload.h"
59 #include "sched-int.h"
60 #include "gimplify.h"
61 #include "gimple-fold.h"
62 #include "gimple-iterator.h"
63 #include "gimple-walk.h"
64 #include "ssa.h"
65 #include "tree-vectorizer.h"
66 #include "tree-ssa-propagate.h"
67 #include "intl.h"
68 #include "tm-constrs.h"
69 #include "target-globals.h"
70 #include "builtins.h"
71 #include "tree-vector-builder.h"
72 #include "context.h"
73 #include "tree-pass.h"
74 #include "symbol-summary.h"
75 #include "ipa-prop.h"
76 #include "ipa-fnsummary.h"
77 #include "except.h"
78 #if TARGET_XCOFF
79 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
80 #endif
81 #include "case-cfn-macros.h"
82 #include "ppc-auxv.h"
83 #include "rs6000-internal.h"
84 #include "opts.h"
85
86 /* This file should be included last. */
87 #include "target-def.h"
88
89 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
90 systems will also set long double to be IEEE 128-bit. AIX and Darwin
91 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
92 those systems will not pick up this default. This needs to be after all
93 of the include files, so that POWERPC_LINUX and POWERPC_FREEBSD are
94 properly defined. */
95 #ifndef TARGET_IEEEQUAD_DEFAULT
96 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
97 #define TARGET_IEEEQUAD_DEFAULT 1
98 #else
99 #define TARGET_IEEEQUAD_DEFAULT 0
100 #endif
101 #endif
102
103 /* Don't enable PC-relative addressing if the target does not support it. */
104 #ifndef PCREL_SUPPORTED_BY_OS
105 #define PCREL_SUPPORTED_BY_OS 0
106 #endif
107
108 /* Support targetm.vectorize.builtin_mask_for_load. */
109 tree altivec_builtin_mask_for_load;
110
111 #ifdef USING_ELFOS_H
112 /* Counter for labels which are to be placed in .fixup. */
113 int fixuplabelno = 0;
114 #endif
115
116 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
117 int dot_symbols;
118
119 /* Specify the machine mode that pointers have. After generation of rtl, the
120 compiler makes no further distinction between pointers and any other objects
121 of this machine mode. */
122 scalar_int_mode rs6000_pmode;
123
124 #if TARGET_ELF
125 /* Note whether IEEE 128-bit floating point was passed or returned, either as
126 the __float128/_Float128 explicit type, or when long double is IEEE 128-bit
127 floating point. We changed the default C++ mangling for these types and we
128 may want to generate a weak alias of the old mangling (U10__float128) to the
129 new mangling (u9__ieee128). */
130 bool rs6000_passes_ieee128 = false;
131 #endif
132
133 /* Track use of r13 in 64bit AIX TLS. */
134 static bool xcoff_tls_exec_model_detected = false;
135
136 /* Generate the manged name (i.e. U10__float128) used in GCC 8.1, and not the
137 name used in current releases (i.e. u9__ieee128). */
138 static bool ieee128_mangling_gcc_8_1;
139
140 /* Width in bits of a pointer. */
141 unsigned rs6000_pointer_size;
142
143 #ifdef HAVE_AS_GNU_ATTRIBUTE
144 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
145 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
146 # endif
147 /* Flag whether floating point values have been passed/returned.
148 Note that this doesn't say whether fprs are used, since the
149 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
150 should be set for soft-float values passed in gprs and ieee128
151 values passed in vsx registers. */
152 bool rs6000_passes_float = false;
153 bool rs6000_passes_long_double = false;
154 /* Flag whether vector values have been passed/returned. */
155 bool rs6000_passes_vector = false;
156 /* Flag whether small (<= 8 byte) structures have been returned. */
157 bool rs6000_returns_struct = false;
158 #endif
159
160 /* Value is TRUE if register/mode pair is acceptable. */
161 static bool rs6000_hard_regno_mode_ok_p
162 [NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
163
164 /* Maximum number of registers needed for a given register class and mode. */
165 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
166
167 /* How many registers are needed for a given register and mode. */
168 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
169
170 /* Map register number to register class. */
171 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
172
173 static int dbg_cost_ctrl;
174
175 /* Built in types. */
176 tree rs6000_builtin_types[RS6000_BTI_MAX];
177
178 /* Flag to say the TOC is initialized */
179 int toc_initialized, need_toc_init;
180 char toc_label_name[10];
181
182 /* Cached value of rs6000_variable_issue. This is cached in
183 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
184 static short cached_can_issue_more;
185
186 static GTY(()) section *read_only_data_section;
187 static GTY(()) section *private_data_section;
188 static GTY(()) section *tls_data_section;
189 static GTY(()) section *tls_private_data_section;
190 static GTY(()) section *read_only_private_data_section;
191 static GTY(()) section *sdata2_section;
192
193 section *toc_section = 0;
194
195 /* Describe the vector unit used for modes. */
196 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
197 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
198
199 /* Register classes for various constraints that are based on the target
200 switches. */
201 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
202
203 /* Describe the alignment of a vector. */
204 int rs6000_vector_align[NUM_MACHINE_MODES];
205
206 /* Map selected modes to types for builtins. */
207 tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
208
209 /* What modes to automatically generate reciprocal divide estimate (fre) and
210 reciprocal sqrt (frsqrte) for. */
211 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
212
213 /* Masks to determine which reciprocal esitmate instructions to generate
214 automatically. */
215 enum rs6000_recip_mask {
216 RECIP_SF_DIV = 0x001, /* Use divide estimate */
217 RECIP_DF_DIV = 0x002,
218 RECIP_V4SF_DIV = 0x004,
219 RECIP_V2DF_DIV = 0x008,
220
221 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
222 RECIP_DF_RSQRT = 0x020,
223 RECIP_V4SF_RSQRT = 0x040,
224 RECIP_V2DF_RSQRT = 0x080,
225
226 /* Various combination of flags for -mrecip=xxx. */
227 RECIP_NONE = 0,
228 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
229 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
230 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
231
232 RECIP_HIGH_PRECISION = RECIP_ALL,
233
234 /* On low precision machines like the power5, don't enable double precision
235 reciprocal square root estimate, since it isn't accurate enough. */
236 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
237 };
238
239 /* -mrecip options. */
240 static struct
241 {
242 const char *string; /* option name */
243 unsigned int mask; /* mask bits to set */
244 } recip_options[] = {
245 { "all", RECIP_ALL },
246 { "none", RECIP_NONE },
247 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
248 | RECIP_V2DF_DIV) },
249 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
250 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
251 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
252 | RECIP_V2DF_RSQRT) },
253 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
254 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
255 };
256
257 /* On PowerPC, we have a limited number of target clones that we care about
258 which means we can use an array to hold the options, rather than having more
259 elaborate data structures to identify each possible variation. Order the
260 clones from the default to the highest ISA. */
261 enum {
262 CLONE_DEFAULT = 0, /* default clone. */
263 CLONE_ISA_2_05, /* ISA 2.05 (power6). */
264 CLONE_ISA_2_06, /* ISA 2.06 (power7). */
265 CLONE_ISA_2_07, /* ISA 2.07 (power8). */
266 CLONE_ISA_3_00, /* ISA 3.0 (power9). */
267 CLONE_ISA_3_1, /* ISA 3.1 (power10). */
268 CLONE_MAX
269 };
270
271 /* Map compiler ISA bits into HWCAP names. */
272 struct clone_map {
273 HOST_WIDE_INT isa_mask; /* rs6000_isa mask */
274 const char *name; /* name to use in __builtin_cpu_supports. */
275 };
276
277 static const struct clone_map rs6000_clone_map[CLONE_MAX] = {
278 { 0, "" }, /* Default options. */
279 { OPTION_MASK_CMPB, "arch_2_05" }, /* ISA 2.05 (power6). */
280 { OPTION_MASK_POPCNTD, "arch_2_06" }, /* ISA 2.06 (power7). */
281 { OPTION_MASK_P8_VECTOR, "arch_2_07" }, /* ISA 2.07 (power8). */
282 { OPTION_MASK_P9_VECTOR, "arch_3_00" }, /* ISA 3.0 (power9). */
283 { OPTION_MASK_POWER10, "arch_3_1" }, /* ISA 3.1 (power10). */
284 };
285
286
287 /* Newer LIBCs explicitly export this symbol to declare that they provide
288 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
289 reference to this symbol whenever we expand a CPU builtin, so that
290 we never link against an old LIBC. */
291 const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform";
292
293 /* True if we have expanded a CPU builtin. */
294 bool cpu_builtin_p = false;
295
296 /* Pointer to function (in rs6000-c.cc) that can define or undefine target
297 macros that have changed. Languages that don't support the preprocessor
298 don't link in rs6000-c.cc, so we can't call it directly. */
299 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
300
301 /* Simplfy register classes into simpler classifications. We assume
302 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
303 check for standard register classes (gpr/floating/altivec/vsx) and
304 floating/vector classes (float/altivec/vsx). */
305
306 enum rs6000_reg_type {
307 NO_REG_TYPE,
308 PSEUDO_REG_TYPE,
309 GPR_REG_TYPE,
310 VSX_REG_TYPE,
311 ALTIVEC_REG_TYPE,
312 FPR_REG_TYPE,
313 SPR_REG_TYPE,
314 CR_REG_TYPE
315 };
316
317 /* Map register class to register type. */
318 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
319
320 /* First/last register type for the 'normal' register types (i.e. general
321 purpose, floating point, altivec, and VSX registers). */
322 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
323
324 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
325
326
327 /* Register classes we care about in secondary reload or go if legitimate
328 address. We only need to worry about GPR, FPR, and Altivec registers here,
329 along an ANY field that is the OR of the 3 register classes. */
330
331 enum rs6000_reload_reg_type {
332 RELOAD_REG_GPR, /* General purpose registers. */
333 RELOAD_REG_FPR, /* Traditional floating point regs. */
334 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
335 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
336 N_RELOAD_REG
337 };
338
339 /* For setting up register classes, loop through the 3 register classes mapping
340 into real registers, and skip the ANY class, which is just an OR of the
341 bits. */
342 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
343 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
344
345 /* Map reload register type to a register in the register class. */
346 struct reload_reg_map_type {
347 const char *name; /* Register class name. */
348 int reg; /* Register in the register class. */
349 };
350
351 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
352 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
353 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
354 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
355 { "Any", -1 }, /* RELOAD_REG_ANY. */
356 };
357
358 /* Mask bits for each register class, indexed per mode. Historically the
359 compiler has been more restrictive which types can do PRE_MODIFY instead of
360 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
361 typedef unsigned char addr_mask_type;
362
363 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
364 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
365 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
366 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
367 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
368 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
369 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
370 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
371
372 /* Register type masks based on the type, of valid addressing modes. */
373 struct rs6000_reg_addr {
374 enum insn_code reload_load; /* INSN to reload for loading. */
375 enum insn_code reload_store; /* INSN to reload for storing. */
376 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
377 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
378 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
379 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
380 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
381 };
382
383 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
384
385 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
386 static inline bool
387 mode_supports_pre_incdec_p (machine_mode mode)
388 {
389 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
390 != 0);
391 }
392
393 /* Helper function to say whether a mode supports PRE_MODIFY. */
394 static inline bool
395 mode_supports_pre_modify_p (machine_mode mode)
396 {
397 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
398 != 0);
399 }
400
401 /* Return true if we have D-form addressing in altivec registers. */
402 static inline bool
403 mode_supports_vmx_dform (machine_mode mode)
404 {
405 return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0);
406 }
407
408 /* Return true if we have D-form addressing in VSX registers. This addressing
409 is more limited than normal d-form addressing in that the offset must be
410 aligned on a 16-byte boundary. */
411 static inline bool
412 mode_supports_dq_form (machine_mode mode)
413 {
414 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET)
415 != 0);
416 }
417
418 /* Given that there exists at least one variable that is set (produced)
419 by OUT_INSN and read (consumed) by IN_INSN, return true iff
420 IN_INSN represents one or more memory store operations and none of
421 the variables set by OUT_INSN is used by IN_INSN as the address of a
422 store operation. If either IN_INSN or OUT_INSN does not represent
423 a "single" RTL SET expression (as loosely defined by the
424 implementation of the single_set function) or a PARALLEL with only
425 SETs, CLOBBERs, and USEs inside, this function returns false.
426
427 This rs6000-specific version of store_data_bypass_p checks for
428 certain conditions that result in assertion failures (and internal
429 compiler errors) in the generic store_data_bypass_p function and
430 returns false rather than calling store_data_bypass_p if one of the
431 problematic conditions is detected. */
432
433 int
434 rs6000_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
435 {
436 rtx out_set, in_set;
437 rtx out_pat, in_pat;
438 rtx out_exp, in_exp;
439 int i, j;
440
441 in_set = single_set (in_insn);
442 if (in_set)
443 {
444 if (MEM_P (SET_DEST (in_set)))
445 {
446 out_set = single_set (out_insn);
447 if (!out_set)
448 {
449 out_pat = PATTERN (out_insn);
450 if (GET_CODE (out_pat) == PARALLEL)
451 {
452 for (i = 0; i < XVECLEN (out_pat, 0); i++)
453 {
454 out_exp = XVECEXP (out_pat, 0, i);
455 if ((GET_CODE (out_exp) == CLOBBER)
456 || (GET_CODE (out_exp) == USE))
457 continue;
458 else if (GET_CODE (out_exp) != SET)
459 return false;
460 }
461 }
462 }
463 }
464 }
465 else
466 {
467 in_pat = PATTERN (in_insn);
468 if (GET_CODE (in_pat) != PARALLEL)
469 return false;
470
471 for (i = 0; i < XVECLEN (in_pat, 0); i++)
472 {
473 in_exp = XVECEXP (in_pat, 0, i);
474 if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE))
475 continue;
476 else if (GET_CODE (in_exp) != SET)
477 return false;
478
479 if (MEM_P (SET_DEST (in_exp)))
480 {
481 out_set = single_set (out_insn);
482 if (!out_set)
483 {
484 out_pat = PATTERN (out_insn);
485 if (GET_CODE (out_pat) != PARALLEL)
486 return false;
487 for (j = 0; j < XVECLEN (out_pat, 0); j++)
488 {
489 out_exp = XVECEXP (out_pat, 0, j);
490 if ((GET_CODE (out_exp) == CLOBBER)
491 || (GET_CODE (out_exp) == USE))
492 continue;
493 else if (GET_CODE (out_exp) != SET)
494 return false;
495 }
496 }
497 }
498 }
499 }
500 return store_data_bypass_p (out_insn, in_insn);
501 }
502
503 \f
504 /* Processor costs (relative to an add) */
505
506 const struct processor_costs *rs6000_cost;
507
508 /* Instruction size costs on 32bit processors. */
509 static const
510 struct processor_costs size32_cost = {
511 COSTS_N_INSNS (1), /* mulsi */
512 COSTS_N_INSNS (1), /* mulsi_const */
513 COSTS_N_INSNS (1), /* mulsi_const9 */
514 COSTS_N_INSNS (1), /* muldi */
515 COSTS_N_INSNS (1), /* divsi */
516 COSTS_N_INSNS (1), /* divdi */
517 COSTS_N_INSNS (1), /* fp */
518 COSTS_N_INSNS (1), /* dmul */
519 COSTS_N_INSNS (1), /* sdiv */
520 COSTS_N_INSNS (1), /* ddiv */
521 32, /* cache line size */
522 0, /* l1 cache */
523 0, /* l2 cache */
524 0, /* streams */
525 0, /* SF->DF convert */
526 };
527
528 /* Instruction size costs on 64bit processors. */
529 static const
530 struct processor_costs size64_cost = {
531 COSTS_N_INSNS (1), /* mulsi */
532 COSTS_N_INSNS (1), /* mulsi_const */
533 COSTS_N_INSNS (1), /* mulsi_const9 */
534 COSTS_N_INSNS (1), /* muldi */
535 COSTS_N_INSNS (1), /* divsi */
536 COSTS_N_INSNS (1), /* divdi */
537 COSTS_N_INSNS (1), /* fp */
538 COSTS_N_INSNS (1), /* dmul */
539 COSTS_N_INSNS (1), /* sdiv */
540 COSTS_N_INSNS (1), /* ddiv */
541 128, /* cache line size */
542 0, /* l1 cache */
543 0, /* l2 cache */
544 0, /* streams */
545 0, /* SF->DF convert */
546 };
547
548 /* Instruction costs on RS64A processors. */
549 static const
550 struct processor_costs rs64a_cost = {
551 COSTS_N_INSNS (20), /* mulsi */
552 COSTS_N_INSNS (12), /* mulsi_const */
553 COSTS_N_INSNS (8), /* mulsi_const9 */
554 COSTS_N_INSNS (34), /* muldi */
555 COSTS_N_INSNS (65), /* divsi */
556 COSTS_N_INSNS (67), /* divdi */
557 COSTS_N_INSNS (4), /* fp */
558 COSTS_N_INSNS (4), /* dmul */
559 COSTS_N_INSNS (31), /* sdiv */
560 COSTS_N_INSNS (31), /* ddiv */
561 128, /* cache line size */
562 128, /* l1 cache */
563 2048, /* l2 cache */
564 1, /* streams */
565 0, /* SF->DF convert */
566 };
567
568 /* Instruction costs on MPCCORE processors. */
569 static const
570 struct processor_costs mpccore_cost = {
571 COSTS_N_INSNS (2), /* mulsi */
572 COSTS_N_INSNS (2), /* mulsi_const */
573 COSTS_N_INSNS (2), /* mulsi_const9 */
574 COSTS_N_INSNS (2), /* muldi */
575 COSTS_N_INSNS (6), /* divsi */
576 COSTS_N_INSNS (6), /* divdi */
577 COSTS_N_INSNS (4), /* fp */
578 COSTS_N_INSNS (5), /* dmul */
579 COSTS_N_INSNS (10), /* sdiv */
580 COSTS_N_INSNS (17), /* ddiv */
581 32, /* cache line size */
582 4, /* l1 cache */
583 16, /* l2 cache */
584 1, /* streams */
585 0, /* SF->DF convert */
586 };
587
588 /* Instruction costs on PPC403 processors. */
589 static const
590 struct processor_costs ppc403_cost = {
591 COSTS_N_INSNS (4), /* mulsi */
592 COSTS_N_INSNS (4), /* mulsi_const */
593 COSTS_N_INSNS (4), /* mulsi_const9 */
594 COSTS_N_INSNS (4), /* muldi */
595 COSTS_N_INSNS (33), /* divsi */
596 COSTS_N_INSNS (33), /* divdi */
597 COSTS_N_INSNS (11), /* fp */
598 COSTS_N_INSNS (11), /* dmul */
599 COSTS_N_INSNS (11), /* sdiv */
600 COSTS_N_INSNS (11), /* ddiv */
601 32, /* cache line size */
602 4, /* l1 cache */
603 16, /* l2 cache */
604 1, /* streams */
605 0, /* SF->DF convert */
606 };
607
608 /* Instruction costs on PPC405 processors. */
609 static const
610 struct processor_costs ppc405_cost = {
611 COSTS_N_INSNS (5), /* mulsi */
612 COSTS_N_INSNS (4), /* mulsi_const */
613 COSTS_N_INSNS (3), /* mulsi_const9 */
614 COSTS_N_INSNS (5), /* muldi */
615 COSTS_N_INSNS (35), /* divsi */
616 COSTS_N_INSNS (35), /* divdi */
617 COSTS_N_INSNS (11), /* fp */
618 COSTS_N_INSNS (11), /* dmul */
619 COSTS_N_INSNS (11), /* sdiv */
620 COSTS_N_INSNS (11), /* ddiv */
621 32, /* cache line size */
622 16, /* l1 cache */
623 128, /* l2 cache */
624 1, /* streams */
625 0, /* SF->DF convert */
626 };
627
628 /* Instruction costs on PPC440 processors. */
629 static const
630 struct processor_costs ppc440_cost = {
631 COSTS_N_INSNS (3), /* mulsi */
632 COSTS_N_INSNS (2), /* mulsi_const */
633 COSTS_N_INSNS (2), /* mulsi_const9 */
634 COSTS_N_INSNS (3), /* muldi */
635 COSTS_N_INSNS (34), /* divsi */
636 COSTS_N_INSNS (34), /* divdi */
637 COSTS_N_INSNS (5), /* fp */
638 COSTS_N_INSNS (5), /* dmul */
639 COSTS_N_INSNS (19), /* sdiv */
640 COSTS_N_INSNS (33), /* ddiv */
641 32, /* cache line size */
642 32, /* l1 cache */
643 256, /* l2 cache */
644 1, /* streams */
645 0, /* SF->DF convert */
646 };
647
648 /* Instruction costs on PPC476 processors. */
649 static const
650 struct processor_costs ppc476_cost = {
651 COSTS_N_INSNS (4), /* mulsi */
652 COSTS_N_INSNS (4), /* mulsi_const */
653 COSTS_N_INSNS (4), /* mulsi_const9 */
654 COSTS_N_INSNS (4), /* muldi */
655 COSTS_N_INSNS (11), /* divsi */
656 COSTS_N_INSNS (11), /* divdi */
657 COSTS_N_INSNS (6), /* fp */
658 COSTS_N_INSNS (6), /* dmul */
659 COSTS_N_INSNS (19), /* sdiv */
660 COSTS_N_INSNS (33), /* ddiv */
661 32, /* l1 cache line size */
662 32, /* l1 cache */
663 512, /* l2 cache */
664 1, /* streams */
665 0, /* SF->DF convert */
666 };
667
668 /* Instruction costs on PPC601 processors. */
669 static const
670 struct processor_costs ppc601_cost = {
671 COSTS_N_INSNS (5), /* mulsi */
672 COSTS_N_INSNS (5), /* mulsi_const */
673 COSTS_N_INSNS (5), /* mulsi_const9 */
674 COSTS_N_INSNS (5), /* muldi */
675 COSTS_N_INSNS (36), /* divsi */
676 COSTS_N_INSNS (36), /* divdi */
677 COSTS_N_INSNS (4), /* fp */
678 COSTS_N_INSNS (5), /* dmul */
679 COSTS_N_INSNS (17), /* sdiv */
680 COSTS_N_INSNS (31), /* ddiv */
681 32, /* cache line size */
682 32, /* l1 cache */
683 256, /* l2 cache */
684 1, /* streams */
685 0, /* SF->DF convert */
686 };
687
688 /* Instruction costs on PPC603 processors. */
689 static const
690 struct processor_costs ppc603_cost = {
691 COSTS_N_INSNS (5), /* mulsi */
692 COSTS_N_INSNS (3), /* mulsi_const */
693 COSTS_N_INSNS (2), /* mulsi_const9 */
694 COSTS_N_INSNS (5), /* muldi */
695 COSTS_N_INSNS (37), /* divsi */
696 COSTS_N_INSNS (37), /* divdi */
697 COSTS_N_INSNS (3), /* fp */
698 COSTS_N_INSNS (4), /* dmul */
699 COSTS_N_INSNS (18), /* sdiv */
700 COSTS_N_INSNS (33), /* ddiv */
701 32, /* cache line size */
702 8, /* l1 cache */
703 64, /* l2 cache */
704 1, /* streams */
705 0, /* SF->DF convert */
706 };
707
708 /* Instruction costs on PPC604 processors. */
709 static const
710 struct processor_costs ppc604_cost = {
711 COSTS_N_INSNS (4), /* mulsi */
712 COSTS_N_INSNS (4), /* mulsi_const */
713 COSTS_N_INSNS (4), /* mulsi_const9 */
714 COSTS_N_INSNS (4), /* muldi */
715 COSTS_N_INSNS (20), /* divsi */
716 COSTS_N_INSNS (20), /* divdi */
717 COSTS_N_INSNS (3), /* fp */
718 COSTS_N_INSNS (3), /* dmul */
719 COSTS_N_INSNS (18), /* sdiv */
720 COSTS_N_INSNS (32), /* ddiv */
721 32, /* cache line size */
722 16, /* l1 cache */
723 512, /* l2 cache */
724 1, /* streams */
725 0, /* SF->DF convert */
726 };
727
728 /* Instruction costs on PPC604e processors. */
729 static const
730 struct processor_costs ppc604e_cost = {
731 COSTS_N_INSNS (2), /* mulsi */
732 COSTS_N_INSNS (2), /* mulsi_const */
733 COSTS_N_INSNS (2), /* mulsi_const9 */
734 COSTS_N_INSNS (2), /* muldi */
735 COSTS_N_INSNS (20), /* divsi */
736 COSTS_N_INSNS (20), /* divdi */
737 COSTS_N_INSNS (3), /* fp */
738 COSTS_N_INSNS (3), /* dmul */
739 COSTS_N_INSNS (18), /* sdiv */
740 COSTS_N_INSNS (32), /* ddiv */
741 32, /* cache line size */
742 32, /* l1 cache */
743 1024, /* l2 cache */
744 1, /* streams */
745 0, /* SF->DF convert */
746 };
747
748 /* Instruction costs on PPC620 processors. */
749 static const
750 struct processor_costs ppc620_cost = {
751 COSTS_N_INSNS (5), /* mulsi */
752 COSTS_N_INSNS (4), /* mulsi_const */
753 COSTS_N_INSNS (3), /* mulsi_const9 */
754 COSTS_N_INSNS (7), /* muldi */
755 COSTS_N_INSNS (21), /* divsi */
756 COSTS_N_INSNS (37), /* divdi */
757 COSTS_N_INSNS (3), /* fp */
758 COSTS_N_INSNS (3), /* dmul */
759 COSTS_N_INSNS (18), /* sdiv */
760 COSTS_N_INSNS (32), /* ddiv */
761 128, /* cache line size */
762 32, /* l1 cache */
763 1024, /* l2 cache */
764 1, /* streams */
765 0, /* SF->DF convert */
766 };
767
768 /* Instruction costs on PPC630 processors. */
769 static const
770 struct processor_costs ppc630_cost = {
771 COSTS_N_INSNS (5), /* mulsi */
772 COSTS_N_INSNS (4), /* mulsi_const */
773 COSTS_N_INSNS (3), /* mulsi_const9 */
774 COSTS_N_INSNS (7), /* muldi */
775 COSTS_N_INSNS (21), /* divsi */
776 COSTS_N_INSNS (37), /* divdi */
777 COSTS_N_INSNS (3), /* fp */
778 COSTS_N_INSNS (3), /* dmul */
779 COSTS_N_INSNS (17), /* sdiv */
780 COSTS_N_INSNS (21), /* ddiv */
781 128, /* cache line size */
782 64, /* l1 cache */
783 1024, /* l2 cache */
784 1, /* streams */
785 0, /* SF->DF convert */
786 };
787
788 /* Instruction costs on Cell processor. */
789 /* COSTS_N_INSNS (1) ~ one add. */
790 static const
791 struct processor_costs ppccell_cost = {
792 COSTS_N_INSNS (9/2)+2, /* mulsi */
793 COSTS_N_INSNS (6/2), /* mulsi_const */
794 COSTS_N_INSNS (6/2), /* mulsi_const9 */
795 COSTS_N_INSNS (15/2)+2, /* muldi */
796 COSTS_N_INSNS (38/2), /* divsi */
797 COSTS_N_INSNS (70/2), /* divdi */
798 COSTS_N_INSNS (10/2), /* fp */
799 COSTS_N_INSNS (10/2), /* dmul */
800 COSTS_N_INSNS (74/2), /* sdiv */
801 COSTS_N_INSNS (74/2), /* ddiv */
802 128, /* cache line size */
803 32, /* l1 cache */
804 512, /* l2 cache */
805 6, /* streams */
806 0, /* SF->DF convert */
807 };
808
809 /* Instruction costs on PPC750 and PPC7400 processors. */
810 static const
811 struct processor_costs ppc750_cost = {
812 COSTS_N_INSNS (5), /* mulsi */
813 COSTS_N_INSNS (3), /* mulsi_const */
814 COSTS_N_INSNS (2), /* mulsi_const9 */
815 COSTS_N_INSNS (5), /* muldi */
816 COSTS_N_INSNS (17), /* divsi */
817 COSTS_N_INSNS (17), /* divdi */
818 COSTS_N_INSNS (3), /* fp */
819 COSTS_N_INSNS (3), /* dmul */
820 COSTS_N_INSNS (17), /* sdiv */
821 COSTS_N_INSNS (31), /* ddiv */
822 32, /* cache line size */
823 32, /* l1 cache */
824 512, /* l2 cache */
825 1, /* streams */
826 0, /* SF->DF convert */
827 };
828
829 /* Instruction costs on PPC7450 processors. */
830 static const
831 struct processor_costs ppc7450_cost = {
832 COSTS_N_INSNS (4), /* mulsi */
833 COSTS_N_INSNS (3), /* mulsi_const */
834 COSTS_N_INSNS (3), /* mulsi_const9 */
835 COSTS_N_INSNS (4), /* muldi */
836 COSTS_N_INSNS (23), /* divsi */
837 COSTS_N_INSNS (23), /* divdi */
838 COSTS_N_INSNS (5), /* fp */
839 COSTS_N_INSNS (5), /* dmul */
840 COSTS_N_INSNS (21), /* sdiv */
841 COSTS_N_INSNS (35), /* ddiv */
842 32, /* cache line size */
843 32, /* l1 cache */
844 1024, /* l2 cache */
845 1, /* streams */
846 0, /* SF->DF convert */
847 };
848
849 /* Instruction costs on PPC8540 processors. */
850 static const
851 struct processor_costs ppc8540_cost = {
852 COSTS_N_INSNS (4), /* mulsi */
853 COSTS_N_INSNS (4), /* mulsi_const */
854 COSTS_N_INSNS (4), /* mulsi_const9 */
855 COSTS_N_INSNS (4), /* muldi */
856 COSTS_N_INSNS (19), /* divsi */
857 COSTS_N_INSNS (19), /* divdi */
858 COSTS_N_INSNS (4), /* fp */
859 COSTS_N_INSNS (4), /* dmul */
860 COSTS_N_INSNS (29), /* sdiv */
861 COSTS_N_INSNS (29), /* ddiv */
862 32, /* cache line size */
863 32, /* l1 cache */
864 256, /* l2 cache */
865 1, /* prefetch streams /*/
866 0, /* SF->DF convert */
867 };
868
869 /* Instruction costs on E300C2 and E300C3 cores. */
870 static const
871 struct processor_costs ppce300c2c3_cost = {
872 COSTS_N_INSNS (4), /* mulsi */
873 COSTS_N_INSNS (4), /* mulsi_const */
874 COSTS_N_INSNS (4), /* mulsi_const9 */
875 COSTS_N_INSNS (4), /* muldi */
876 COSTS_N_INSNS (19), /* divsi */
877 COSTS_N_INSNS (19), /* divdi */
878 COSTS_N_INSNS (3), /* fp */
879 COSTS_N_INSNS (4), /* dmul */
880 COSTS_N_INSNS (18), /* sdiv */
881 COSTS_N_INSNS (33), /* ddiv */
882 32,
883 16, /* l1 cache */
884 16, /* l2 cache */
885 1, /* prefetch streams /*/
886 0, /* SF->DF convert */
887 };
888
889 /* Instruction costs on PPCE500MC processors. */
890 static const
891 struct processor_costs ppce500mc_cost = {
892 COSTS_N_INSNS (4), /* mulsi */
893 COSTS_N_INSNS (4), /* mulsi_const */
894 COSTS_N_INSNS (4), /* mulsi_const9 */
895 COSTS_N_INSNS (4), /* muldi */
896 COSTS_N_INSNS (14), /* divsi */
897 COSTS_N_INSNS (14), /* divdi */
898 COSTS_N_INSNS (8), /* fp */
899 COSTS_N_INSNS (10), /* dmul */
900 COSTS_N_INSNS (36), /* sdiv */
901 COSTS_N_INSNS (66), /* ddiv */
902 64, /* cache line size */
903 32, /* l1 cache */
904 128, /* l2 cache */
905 1, /* prefetch streams /*/
906 0, /* SF->DF convert */
907 };
908
909 /* Instruction costs on PPCE500MC64 processors. */
910 static const
911 struct processor_costs ppce500mc64_cost = {
912 COSTS_N_INSNS (4), /* mulsi */
913 COSTS_N_INSNS (4), /* mulsi_const */
914 COSTS_N_INSNS (4), /* mulsi_const9 */
915 COSTS_N_INSNS (4), /* muldi */
916 COSTS_N_INSNS (14), /* divsi */
917 COSTS_N_INSNS (14), /* divdi */
918 COSTS_N_INSNS (4), /* fp */
919 COSTS_N_INSNS (10), /* dmul */
920 COSTS_N_INSNS (36), /* sdiv */
921 COSTS_N_INSNS (66), /* ddiv */
922 64, /* cache line size */
923 32, /* l1 cache */
924 128, /* l2 cache */
925 1, /* prefetch streams /*/
926 0, /* SF->DF convert */
927 };
928
929 /* Instruction costs on PPCE5500 processors. */
930 static const
931 struct processor_costs ppce5500_cost = {
932 COSTS_N_INSNS (5), /* mulsi */
933 COSTS_N_INSNS (5), /* mulsi_const */
934 COSTS_N_INSNS (4), /* mulsi_const9 */
935 COSTS_N_INSNS (5), /* muldi */
936 COSTS_N_INSNS (14), /* divsi */
937 COSTS_N_INSNS (14), /* divdi */
938 COSTS_N_INSNS (7), /* fp */
939 COSTS_N_INSNS (10), /* dmul */
940 COSTS_N_INSNS (36), /* sdiv */
941 COSTS_N_INSNS (66), /* ddiv */
942 64, /* cache line size */
943 32, /* l1 cache */
944 128, /* l2 cache */
945 1, /* prefetch streams /*/
946 0, /* SF->DF convert */
947 };
948
949 /* Instruction costs on PPCE6500 processors. */
950 static const
951 struct processor_costs ppce6500_cost = {
952 COSTS_N_INSNS (5), /* mulsi */
953 COSTS_N_INSNS (5), /* mulsi_const */
954 COSTS_N_INSNS (4), /* mulsi_const9 */
955 COSTS_N_INSNS (5), /* muldi */
956 COSTS_N_INSNS (14), /* divsi */
957 COSTS_N_INSNS (14), /* divdi */
958 COSTS_N_INSNS (7), /* fp */
959 COSTS_N_INSNS (10), /* dmul */
960 COSTS_N_INSNS (36), /* sdiv */
961 COSTS_N_INSNS (66), /* ddiv */
962 64, /* cache line size */
963 32, /* l1 cache */
964 128, /* l2 cache */
965 1, /* prefetch streams /*/
966 0, /* SF->DF convert */
967 };
968
969 /* Instruction costs on AppliedMicro Titan processors. */
970 static const
971 struct processor_costs titan_cost = {
972 COSTS_N_INSNS (5), /* mulsi */
973 COSTS_N_INSNS (5), /* mulsi_const */
974 COSTS_N_INSNS (5), /* mulsi_const9 */
975 COSTS_N_INSNS (5), /* muldi */
976 COSTS_N_INSNS (18), /* divsi */
977 COSTS_N_INSNS (18), /* divdi */
978 COSTS_N_INSNS (10), /* fp */
979 COSTS_N_INSNS (10), /* dmul */
980 COSTS_N_INSNS (46), /* sdiv */
981 COSTS_N_INSNS (72), /* ddiv */
982 32, /* cache line size */
983 32, /* l1 cache */
984 512, /* l2 cache */
985 1, /* prefetch streams /*/
986 0, /* SF->DF convert */
987 };
988
989 /* Instruction costs on POWER4 and POWER5 processors. */
990 static const
991 struct processor_costs power4_cost = {
992 COSTS_N_INSNS (3), /* mulsi */
993 COSTS_N_INSNS (2), /* mulsi_const */
994 COSTS_N_INSNS (2), /* mulsi_const9 */
995 COSTS_N_INSNS (4), /* muldi */
996 COSTS_N_INSNS (18), /* divsi */
997 COSTS_N_INSNS (34), /* divdi */
998 COSTS_N_INSNS (3), /* fp */
999 COSTS_N_INSNS (3), /* dmul */
1000 COSTS_N_INSNS (17), /* sdiv */
1001 COSTS_N_INSNS (17), /* ddiv */
1002 128, /* cache line size */
1003 32, /* l1 cache */
1004 1024, /* l2 cache */
1005 8, /* prefetch streams /*/
1006 0, /* SF->DF convert */
1007 };
1008
1009 /* Instruction costs on POWER6 processors. */
1010 static const
1011 struct processor_costs power6_cost = {
1012 COSTS_N_INSNS (8), /* mulsi */
1013 COSTS_N_INSNS (8), /* mulsi_const */
1014 COSTS_N_INSNS (8), /* mulsi_const9 */
1015 COSTS_N_INSNS (8), /* muldi */
1016 COSTS_N_INSNS (22), /* divsi */
1017 COSTS_N_INSNS (28), /* divdi */
1018 COSTS_N_INSNS (3), /* fp */
1019 COSTS_N_INSNS (3), /* dmul */
1020 COSTS_N_INSNS (13), /* sdiv */
1021 COSTS_N_INSNS (16), /* ddiv */
1022 128, /* cache line size */
1023 64, /* l1 cache */
1024 2048, /* l2 cache */
1025 16, /* prefetch streams */
1026 0, /* SF->DF convert */
1027 };
1028
1029 /* Instruction costs on POWER7 processors. */
1030 static const
1031 struct processor_costs power7_cost = {
1032 COSTS_N_INSNS (2), /* mulsi */
1033 COSTS_N_INSNS (2), /* mulsi_const */
1034 COSTS_N_INSNS (2), /* mulsi_const9 */
1035 COSTS_N_INSNS (2), /* muldi */
1036 COSTS_N_INSNS (18), /* divsi */
1037 COSTS_N_INSNS (34), /* divdi */
1038 COSTS_N_INSNS (3), /* fp */
1039 COSTS_N_INSNS (3), /* dmul */
1040 COSTS_N_INSNS (13), /* sdiv */
1041 COSTS_N_INSNS (16), /* ddiv */
1042 128, /* cache line size */
1043 32, /* l1 cache */
1044 256, /* l2 cache */
1045 12, /* prefetch streams */
1046 COSTS_N_INSNS (3), /* SF->DF convert */
1047 };
1048
1049 /* Instruction costs on POWER8 processors. */
1050 static const
1051 struct processor_costs power8_cost = {
1052 COSTS_N_INSNS (3), /* mulsi */
1053 COSTS_N_INSNS (3), /* mulsi_const */
1054 COSTS_N_INSNS (3), /* mulsi_const9 */
1055 COSTS_N_INSNS (3), /* muldi */
1056 COSTS_N_INSNS (19), /* divsi */
1057 COSTS_N_INSNS (35), /* divdi */
1058 COSTS_N_INSNS (3), /* fp */
1059 COSTS_N_INSNS (3), /* dmul */
1060 COSTS_N_INSNS (14), /* sdiv */
1061 COSTS_N_INSNS (17), /* ddiv */
1062 128, /* cache line size */
1063 32, /* l1 cache */
1064 512, /* l2 cache */
1065 12, /* prefetch streams */
1066 COSTS_N_INSNS (3), /* SF->DF convert */
1067 };
1068
1069 /* Instruction costs on POWER9 processors. */
1070 static const
1071 struct processor_costs power9_cost = {
1072 COSTS_N_INSNS (3), /* mulsi */
1073 COSTS_N_INSNS (3), /* mulsi_const */
1074 COSTS_N_INSNS (3), /* mulsi_const9 */
1075 COSTS_N_INSNS (3), /* muldi */
1076 COSTS_N_INSNS (8), /* divsi */
1077 COSTS_N_INSNS (12), /* divdi */
1078 COSTS_N_INSNS (3), /* fp */
1079 COSTS_N_INSNS (3), /* dmul */
1080 COSTS_N_INSNS (13), /* sdiv */
1081 COSTS_N_INSNS (18), /* ddiv */
1082 128, /* cache line size */
1083 32, /* l1 cache */
1084 512, /* l2 cache */
1085 8, /* prefetch streams */
1086 COSTS_N_INSNS (3), /* SF->DF convert */
1087 };
1088
1089 /* Instruction costs on POWER10 processors. */
1090 static const
1091 struct processor_costs power10_cost = {
1092 COSTS_N_INSNS (2), /* mulsi */
1093 COSTS_N_INSNS (2), /* mulsi_const */
1094 COSTS_N_INSNS (2), /* mulsi_const9 */
1095 COSTS_N_INSNS (2), /* muldi */
1096 COSTS_N_INSNS (6), /* divsi */
1097 COSTS_N_INSNS (6), /* divdi */
1098 COSTS_N_INSNS (2), /* fp */
1099 COSTS_N_INSNS (2), /* dmul */
1100 COSTS_N_INSNS (11), /* sdiv */
1101 COSTS_N_INSNS (13), /* ddiv */
1102 128, /* cache line size */
1103 32, /* l1 cache */
1104 512, /* l2 cache */
1105 16, /* prefetch streams */
1106 COSTS_N_INSNS (2), /* SF->DF convert */
1107 };
1108
1109 /* Instruction costs on POWER A2 processors. */
1110 static const
1111 struct processor_costs ppca2_cost = {
1112 COSTS_N_INSNS (16), /* mulsi */
1113 COSTS_N_INSNS (16), /* mulsi_const */
1114 COSTS_N_INSNS (16), /* mulsi_const9 */
1115 COSTS_N_INSNS (16), /* muldi */
1116 COSTS_N_INSNS (22), /* divsi */
1117 COSTS_N_INSNS (28), /* divdi */
1118 COSTS_N_INSNS (3), /* fp */
1119 COSTS_N_INSNS (3), /* dmul */
1120 COSTS_N_INSNS (59), /* sdiv */
1121 COSTS_N_INSNS (72), /* ddiv */
1122 64,
1123 16, /* l1 cache */
1124 2048, /* l2 cache */
1125 16, /* prefetch streams */
1126 0, /* SF->DF convert */
1127 };
1128
1129 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1130 static tree (*rs6000_veclib_handler) (combined_fn, tree, tree);
1131
1132 \f
1133 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1134 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1135 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1136 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1137 static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
1138 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1139 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1140 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
1141 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1142 bool);
1143 static int rs6000_debug_adjust_cost (rtx_insn *, int, rtx_insn *, int,
1144 unsigned int);
1145 static bool is_microcoded_insn (rtx_insn *);
1146 static bool is_nonpipeline_insn (rtx_insn *);
1147 static bool is_cracked_insn (rtx_insn *);
1148 static bool is_load_insn (rtx, rtx *);
1149 static bool is_store_insn (rtx, rtx *);
1150 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1151 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1152 static bool insn_must_be_first_in_group (rtx_insn *);
1153 static bool insn_must_be_last_in_group (rtx_insn *);
1154 bool easy_vector_constant (rtx, machine_mode);
1155 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1156 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1157 #if TARGET_MACHO
1158 static tree get_prev_label (tree);
1159 #endif
1160 static bool rs6000_mode_dependent_address (const_rtx);
1161 static bool rs6000_debug_mode_dependent_address (const_rtx);
1162 static bool rs6000_offsettable_memref_p (rtx, machine_mode, bool);
1163 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1164 machine_mode, rtx);
1165 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1166 machine_mode,
1167 rtx);
1168 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1169 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1170 enum reg_class);
1171 static bool rs6000_debug_secondary_memory_needed (machine_mode,
1172 reg_class_t,
1173 reg_class_t);
1174 static bool rs6000_debug_can_change_mode_class (machine_mode,
1175 machine_mode,
1176 reg_class_t);
1177
1178 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1179 = rs6000_mode_dependent_address;
1180
1181 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1182 machine_mode, rtx)
1183 = rs6000_secondary_reload_class;
1184
1185 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1186 = rs6000_preferred_reload_class;
1187
1188 const int INSN_NOT_AVAILABLE = -1;
1189
1190 static void rs6000_print_isa_options (FILE *, int, const char *,
1191 HOST_WIDE_INT);
1192 static void rs6000_print_builtin_options (FILE *, int, const char *,
1193 HOST_WIDE_INT);
1194 static HOST_WIDE_INT rs6000_disable_incompatible_switches (void);
1195
1196 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1197 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1198 enum rs6000_reg_type,
1199 machine_mode,
1200 secondary_reload_info *,
1201 bool);
1202 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1203
1204 /* Hash table stuff for keeping track of TOC entries. */
1205
1206 struct GTY((for_user)) toc_hash_struct
1207 {
1208 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1209 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1210 rtx key;
1211 machine_mode key_mode;
1212 int labelno;
1213 };
1214
1215 struct toc_hasher : ggc_ptr_hash<toc_hash_struct>
1216 {
1217 static hashval_t hash (toc_hash_struct *);
1218 static bool equal (toc_hash_struct *, toc_hash_struct *);
1219 };
1220
1221 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1222
1223
1224 \f
1225 /* Default register names. */
1226 char rs6000_reg_names[][8] =
1227 {
1228 /* GPRs */
1229 "0", "1", "2", "3", "4", "5", "6", "7",
1230 "8", "9", "10", "11", "12", "13", "14", "15",
1231 "16", "17", "18", "19", "20", "21", "22", "23",
1232 "24", "25", "26", "27", "28", "29", "30", "31",
1233 /* FPRs */
1234 "0", "1", "2", "3", "4", "5", "6", "7",
1235 "8", "9", "10", "11", "12", "13", "14", "15",
1236 "16", "17", "18", "19", "20", "21", "22", "23",
1237 "24", "25", "26", "27", "28", "29", "30", "31",
1238 /* VRs */
1239 "0", "1", "2", "3", "4", "5", "6", "7",
1240 "8", "9", "10", "11", "12", "13", "14", "15",
1241 "16", "17", "18", "19", "20", "21", "22", "23",
1242 "24", "25", "26", "27", "28", "29", "30", "31",
1243 /* lr ctr ca ap */
1244 "lr", "ctr", "ca", "ap",
1245 /* cr0..cr7 */
1246 "0", "1", "2", "3", "4", "5", "6", "7",
1247 /* vrsave vscr sfp */
1248 "vrsave", "vscr", "sfp",
1249 };
1250
1251 #ifdef TARGET_REGNAMES
1252 static const char alt_reg_names[][8] =
1253 {
1254 /* GPRs */
1255 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1256 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1257 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1258 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1259 /* FPRs */
1260 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1261 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1262 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1263 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1264 /* VRs */
1265 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1266 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1267 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1268 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1269 /* lr ctr ca ap */
1270 "lr", "ctr", "ca", "ap",
1271 /* cr0..cr7 */
1272 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1273 /* vrsave vscr sfp */
1274 "vrsave", "vscr", "sfp",
1275 };
1276 #endif
1277
1278 /* Table of valid machine attributes. */
1279
1280 static const struct attribute_spec rs6000_attribute_table[] =
1281 {
1282 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
1283 affects_type_identity, handler, exclude } */
1284 { "altivec", 1, 1, false, true, false, false,
1285 rs6000_handle_altivec_attribute, NULL },
1286 { "longcall", 0, 0, false, true, true, false,
1287 rs6000_handle_longcall_attribute, NULL },
1288 { "shortcall", 0, 0, false, true, true, false,
1289 rs6000_handle_longcall_attribute, NULL },
1290 { "ms_struct", 0, 0, false, false, false, false,
1291 rs6000_handle_struct_attribute, NULL },
1292 { "gcc_struct", 0, 0, false, false, false, false,
1293 rs6000_handle_struct_attribute, NULL },
1294 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1295 SUBTARGET_ATTRIBUTE_TABLE,
1296 #endif
1297 { NULL, 0, 0, false, false, false, false, NULL, NULL }
1298 };
1299 \f
1300 #ifndef TARGET_PROFILE_KERNEL
1301 #define TARGET_PROFILE_KERNEL 0
1302 #endif
1303 \f
1304 /* Initialize the GCC target structure. */
1305 #undef TARGET_ATTRIBUTE_TABLE
1306 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1307 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1308 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1309 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1310 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1311
1312 #undef TARGET_ASM_ALIGNED_DI_OP
1313 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1314
1315 /* Default unaligned ops are only provided for ELF. Find the ops needed
1316 for non-ELF systems. */
1317 #ifndef OBJECT_FORMAT_ELF
1318 #if TARGET_XCOFF
1319 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1320 64-bit targets. */
1321 #undef TARGET_ASM_UNALIGNED_HI_OP
1322 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1323 #undef TARGET_ASM_UNALIGNED_SI_OP
1324 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1325 #undef TARGET_ASM_UNALIGNED_DI_OP
1326 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1327 #else
1328 /* For Darwin. */
1329 #undef TARGET_ASM_UNALIGNED_HI_OP
1330 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1331 #undef TARGET_ASM_UNALIGNED_SI_OP
1332 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1333 #undef TARGET_ASM_UNALIGNED_DI_OP
1334 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1335 #undef TARGET_ASM_ALIGNED_DI_OP
1336 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1337 #endif
1338 #endif
1339
1340 /* This hook deals with fixups for relocatable code and DI-mode objects
1341 in 64-bit code. */
1342 #undef TARGET_ASM_INTEGER
1343 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1344
1345 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1346 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1347 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1348 #endif
1349
1350 #undef TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY
1351 #define TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY \
1352 rs6000_print_patchable_function_entry
1353
1354 #undef TARGET_SET_UP_BY_PROLOGUE
1355 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1356
1357 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1358 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1359 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1360 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1361 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1362 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1363 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1364 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1365 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1366 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1367 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1368 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1369
1370 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1371 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1372
1373 #undef TARGET_INTERNAL_ARG_POINTER
1374 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1375
1376 #undef TARGET_HAVE_TLS
1377 #define TARGET_HAVE_TLS HAVE_AS_TLS
1378
1379 #undef TARGET_CANNOT_FORCE_CONST_MEM
1380 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1381
1382 #undef TARGET_DELEGITIMIZE_ADDRESS
1383 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1384
1385 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1386 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1387
1388 #undef TARGET_LEGITIMATE_COMBINED_INSN
1389 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1390
1391 #undef TARGET_ASM_FUNCTION_PROLOGUE
1392 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1393 #undef TARGET_ASM_FUNCTION_EPILOGUE
1394 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1395
1396 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1397 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1398
1399 #undef TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC
1400 #define TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC rs6000_gen_pic_addr_diff_vec
1401
1402 #undef TARGET_LEGITIMIZE_ADDRESS
1403 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1404
1405 #undef TARGET_SCHED_VARIABLE_ISSUE
1406 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1407
1408 #undef TARGET_SCHED_ISSUE_RATE
1409 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1410 #undef TARGET_SCHED_ADJUST_COST
1411 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1412 #undef TARGET_SCHED_ADJUST_PRIORITY
1413 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1414 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1415 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1416 #undef TARGET_SCHED_INIT
1417 #define TARGET_SCHED_INIT rs6000_sched_init
1418 #undef TARGET_SCHED_FINISH
1419 #define TARGET_SCHED_FINISH rs6000_sched_finish
1420 #undef TARGET_SCHED_REORDER
1421 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1422 #undef TARGET_SCHED_REORDER2
1423 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1424
1425 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1426 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1427
1428 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1429 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1430
1431 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1432 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1433 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1434 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1435 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1436 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1437 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1438 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1439
1440 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1441 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1442
1443 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1444 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1445 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1446 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1447 rs6000_builtin_support_vector_misalignment
1448 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1449 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1450 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1451 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1452 rs6000_builtin_vectorization_cost
1453 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1454 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1455 rs6000_preferred_simd_mode
1456 #undef TARGET_VECTORIZE_CREATE_COSTS
1457 #define TARGET_VECTORIZE_CREATE_COSTS rs6000_vectorize_create_costs
1458
1459 #undef TARGET_LOOP_UNROLL_ADJUST
1460 #define TARGET_LOOP_UNROLL_ADJUST rs6000_loop_unroll_adjust
1461
1462 #undef TARGET_INIT_BUILTINS
1463 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1464 #undef TARGET_BUILTIN_DECL
1465 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1466
1467 #undef TARGET_FOLD_BUILTIN
1468 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1469 #undef TARGET_GIMPLE_FOLD_BUILTIN
1470 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1471
1472 #undef TARGET_EXPAND_BUILTIN
1473 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1474
1475 #undef TARGET_MANGLE_TYPE
1476 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1477
1478 #undef TARGET_INIT_LIBFUNCS
1479 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1480
1481 #if TARGET_MACHO
1482 #undef TARGET_BINDS_LOCAL_P
1483 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1484 #endif
1485
1486 #undef TARGET_MS_BITFIELD_LAYOUT_P
1487 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1488
1489 #undef TARGET_ASM_OUTPUT_MI_THUNK
1490 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1491
1492 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1493 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1494
1495 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1496 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1497
1498 #undef TARGET_REGISTER_MOVE_COST
1499 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1500 #undef TARGET_MEMORY_MOVE_COST
1501 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1502 #undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
1503 #define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
1504 rs6000_ira_change_pseudo_allocno_class
1505 #undef TARGET_CANNOT_COPY_INSN_P
1506 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1507 #undef TARGET_RTX_COSTS
1508 #define TARGET_RTX_COSTS rs6000_rtx_costs
1509 #undef TARGET_ADDRESS_COST
1510 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1511 #undef TARGET_INSN_COST
1512 #define TARGET_INSN_COST rs6000_insn_cost
1513
1514 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1515 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1516
1517 #undef TARGET_PROMOTE_FUNCTION_MODE
1518 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1519
1520 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
1521 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE rs6000_override_options_after_change
1522
1523 #undef TARGET_RETURN_IN_MEMORY
1524 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1525
1526 #undef TARGET_RETURN_IN_MSB
1527 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1528
1529 #undef TARGET_SETUP_INCOMING_VARARGS
1530 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1531
1532 /* Always strict argument naming on rs6000. */
1533 #undef TARGET_STRICT_ARGUMENT_NAMING
1534 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1535 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1536 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1537 #undef TARGET_SPLIT_COMPLEX_ARG
1538 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1539 #undef TARGET_MUST_PASS_IN_STACK
1540 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1541 #undef TARGET_PASS_BY_REFERENCE
1542 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1543 #undef TARGET_ARG_PARTIAL_BYTES
1544 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1545 #undef TARGET_FUNCTION_ARG_ADVANCE
1546 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1547 #undef TARGET_FUNCTION_ARG
1548 #define TARGET_FUNCTION_ARG rs6000_function_arg
1549 #undef TARGET_FUNCTION_ARG_PADDING
1550 #define TARGET_FUNCTION_ARG_PADDING rs6000_function_arg_padding
1551 #undef TARGET_FUNCTION_ARG_BOUNDARY
1552 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1553
1554 #undef TARGET_BUILD_BUILTIN_VA_LIST
1555 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1556
1557 #undef TARGET_EXPAND_BUILTIN_VA_START
1558 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1559
1560 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1561 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1562
1563 #undef TARGET_EH_RETURN_FILTER_MODE
1564 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1565
1566 #undef TARGET_TRANSLATE_MODE_ATTRIBUTE
1567 #define TARGET_TRANSLATE_MODE_ATTRIBUTE rs6000_translate_mode_attribute
1568
1569 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1570 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1571
1572 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
1573 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
1574 rs6000_libgcc_floating_mode_supported_p
1575
1576 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1577 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1578
1579 #undef TARGET_FLOATN_MODE
1580 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1581
1582 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1583 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1584
1585 #undef TARGET_MD_ASM_ADJUST
1586 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1587
1588 #undef TARGET_OPTION_OVERRIDE
1589 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1590
1591 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1592 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1593 rs6000_builtin_vectorized_function
1594
1595 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1596 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1597 rs6000_builtin_md_vectorized_function
1598
1599 #undef TARGET_STACK_PROTECT_GUARD
1600 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1601
1602 #if !TARGET_MACHO
1603 #undef TARGET_STACK_PROTECT_FAIL
1604 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1605 #endif
1606
1607 #ifdef HAVE_AS_TLS
1608 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1609 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1610 #endif
1611
1612 /* Use a 32-bit anchor range. This leads to sequences like:
1613
1614 addis tmp,anchor,high
1615 add dest,tmp,low
1616
1617 where tmp itself acts as an anchor, and can be shared between
1618 accesses to the same 64k page. */
1619 #undef TARGET_MIN_ANCHOR_OFFSET
1620 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1621 #undef TARGET_MAX_ANCHOR_OFFSET
1622 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1623 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1624 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1625 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1626 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1627
1628 #undef TARGET_BUILTIN_RECIPROCAL
1629 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1630
1631 #undef TARGET_SECONDARY_RELOAD
1632 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1633 #undef TARGET_SECONDARY_MEMORY_NEEDED
1634 #define TARGET_SECONDARY_MEMORY_NEEDED rs6000_secondary_memory_needed
1635 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
1636 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE rs6000_secondary_memory_needed_mode
1637
1638 #undef TARGET_LEGITIMATE_ADDRESS_P
1639 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1640
1641 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1642 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1643
1644 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1645 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1646
1647 #undef TARGET_CAN_ELIMINATE
1648 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1649
1650 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1651 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1652
1653 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1654 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1655
1656 #undef TARGET_TRAMPOLINE_INIT
1657 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1658
1659 #undef TARGET_FUNCTION_VALUE
1660 #define TARGET_FUNCTION_VALUE rs6000_function_value
1661
1662 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1663 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1664
1665 #undef TARGET_OPTION_SAVE
1666 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1667
1668 #undef TARGET_OPTION_RESTORE
1669 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1670
1671 #undef TARGET_OPTION_PRINT
1672 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1673
1674 #undef TARGET_CAN_INLINE_P
1675 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1676
1677 #undef TARGET_SET_CURRENT_FUNCTION
1678 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1679
1680 #undef TARGET_LEGITIMATE_CONSTANT_P
1681 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1682
1683 #undef TARGET_VECTORIZE_VEC_PERM_CONST
1684 #define TARGET_VECTORIZE_VEC_PERM_CONST rs6000_vectorize_vec_perm_const
1685
1686 #undef TARGET_CAN_USE_DOLOOP_P
1687 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1688
1689 #undef TARGET_PREDICT_DOLOOP_P
1690 #define TARGET_PREDICT_DOLOOP_P rs6000_predict_doloop_p
1691
1692 #undef TARGET_HAVE_COUNT_REG_DECR_P
1693 #define TARGET_HAVE_COUNT_REG_DECR_P true
1694
1695 /* 1000000000 is infinite cost in IVOPTs. */
1696 #undef TARGET_DOLOOP_COST_FOR_GENERIC
1697 #define TARGET_DOLOOP_COST_FOR_GENERIC 1000000000
1698
1699 #undef TARGET_DOLOOP_COST_FOR_ADDRESS
1700 #define TARGET_DOLOOP_COST_FOR_ADDRESS 1000000000
1701
1702 #undef TARGET_PREFERRED_DOLOOP_MODE
1703 #define TARGET_PREFERRED_DOLOOP_MODE rs6000_preferred_doloop_mode
1704
1705 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1706 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1707
1708 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1709 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1710 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1711 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1712 #undef TARGET_UNWIND_WORD_MODE
1713 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1714
1715 #undef TARGET_OFFLOAD_OPTIONS
1716 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1717
1718 #undef TARGET_C_MODE_FOR_SUFFIX
1719 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1720
1721 #undef TARGET_INVALID_BINARY_OP
1722 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1723
1724 #undef TARGET_OPTAB_SUPPORTED_P
1725 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1726
1727 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1728 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1729
1730 #undef TARGET_COMPARE_VERSION_PRIORITY
1731 #define TARGET_COMPARE_VERSION_PRIORITY rs6000_compare_version_priority
1732
1733 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
1734 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
1735 rs6000_generate_version_dispatcher_body
1736
1737 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
1738 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
1739 rs6000_get_function_versions_dispatcher
1740
1741 #undef TARGET_OPTION_FUNCTION_VERSIONS
1742 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
1743
1744 #undef TARGET_HARD_REGNO_NREGS
1745 #define TARGET_HARD_REGNO_NREGS rs6000_hard_regno_nregs_hook
1746 #undef TARGET_HARD_REGNO_MODE_OK
1747 #define TARGET_HARD_REGNO_MODE_OK rs6000_hard_regno_mode_ok
1748
1749 #undef TARGET_MODES_TIEABLE_P
1750 #define TARGET_MODES_TIEABLE_P rs6000_modes_tieable_p
1751
1752 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
1753 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
1754 rs6000_hard_regno_call_part_clobbered
1755
1756 #undef TARGET_SLOW_UNALIGNED_ACCESS
1757 #define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access
1758
1759 #undef TARGET_CAN_CHANGE_MODE_CLASS
1760 #define TARGET_CAN_CHANGE_MODE_CLASS rs6000_can_change_mode_class
1761
1762 #undef TARGET_CONSTANT_ALIGNMENT
1763 #define TARGET_CONSTANT_ALIGNMENT rs6000_constant_alignment
1764
1765 #undef TARGET_STARTING_FRAME_OFFSET
1766 #define TARGET_STARTING_FRAME_OFFSET rs6000_starting_frame_offset
1767
1768 #if TARGET_ELF && RS6000_WEAK
1769 #undef TARGET_ASM_GLOBALIZE_DECL_NAME
1770 #define TARGET_ASM_GLOBALIZE_DECL_NAME rs6000_globalize_decl_name
1771 #endif
1772
1773 #undef TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P
1774 #define TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P hook_bool_void_true
1775
1776 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
1777 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME rs6000_mangle_decl_assembler_name
1778
1779 #undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P
1780 #define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P \
1781 rs6000_cannot_substitute_mem_equiv_p
1782
1783 #undef TARGET_INVALID_CONVERSION
1784 #define TARGET_INVALID_CONVERSION rs6000_invalid_conversion
1785
1786 #undef TARGET_NEED_IPA_FN_TARGET_INFO
1787 #define TARGET_NEED_IPA_FN_TARGET_INFO rs6000_need_ipa_fn_target_info
1788
1789 #undef TARGET_UPDATE_IPA_FN_TARGET_INFO
1790 #define TARGET_UPDATE_IPA_FN_TARGET_INFO rs6000_update_ipa_fn_target_info
1791 \f
1792
1793 /* Processor table. */
1794 struct rs6000_ptt
1795 {
1796 const char *const name; /* Canonical processor name. */
1797 const enum processor_type processor; /* Processor type enum value. */
1798 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1799 };
1800
1801 static struct rs6000_ptt const processor_target_table[] =
1802 {
1803 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1804 #include "rs6000-cpus.def"
1805 #undef RS6000_CPU
1806 };
1807
1808 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1809 name is invalid. */
1810
1811 static int
1812 rs6000_cpu_name_lookup (const char *name)
1813 {
1814 size_t i;
1815
1816 if (name != NULL)
1817 {
1818 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
1819 if (! strcmp (name, processor_target_table[i].name))
1820 return (int)i;
1821 }
1822
1823 return -1;
1824 }
1825
1826 \f
1827 /* Return number of consecutive hard regs needed starting at reg REGNO
1828 to hold something of mode MODE.
1829 This is ordinarily the length in words of a value of mode MODE
1830 but can be less for certain modes in special long registers.
1831
1832 POWER and PowerPC GPRs hold 32 bits worth;
1833 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1834
1835 static int
1836 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
1837 {
1838 unsigned HOST_WIDE_INT reg_size;
1839
1840 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
1841 128-bit floating point that can go in vector registers, which has VSX
1842 memory addressing. */
1843 if (FP_REGNO_P (regno))
1844 reg_size = (VECTOR_MEM_VSX_P (mode) || VECTOR_ALIGNMENT_P (mode)
1845 ? UNITS_PER_VSX_WORD
1846 : UNITS_PER_FP_WORD);
1847
1848 else if (ALTIVEC_REGNO_P (regno))
1849 reg_size = UNITS_PER_ALTIVEC_WORD;
1850
1851 else
1852 reg_size = UNITS_PER_WORD;
1853
1854 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
1855 }
1856
1857 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1858 MODE. */
1859 static int
1860 rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
1861 {
1862 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
1863
1864 if (COMPLEX_MODE_P (mode))
1865 mode = GET_MODE_INNER (mode);
1866
1867 /* Vector pair modes need even/odd VSX register pairs. Only allow vector
1868 registers. */
1869 if (mode == OOmode)
1870 return (TARGET_MMA && VSX_REGNO_P (regno) && (regno & 1) == 0);
1871
1872 /* MMA accumulator modes need FPR registers divisible by 4. */
1873 if (mode == XOmode)
1874 return (TARGET_MMA && FP_REGNO_P (regno) && (regno & 3) == 0);
1875
1876 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1877 register combinations, and use PTImode where we need to deal with quad
1878 word memory operations. Don't allow quad words in the argument or frame
1879 pointer registers, just registers 0..31. */
1880 if (mode == PTImode)
1881 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1882 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1883 && ((regno & 1) == 0));
1884
1885 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1886 implementations. Don't allow an item to be split between a FP register
1887 and an Altivec register. Allow TImode in all VSX registers if the user
1888 asked for it. */
1889 if (TARGET_VSX && VSX_REGNO_P (regno)
1890 && (VECTOR_MEM_VSX_P (mode)
1891 || VECTOR_ALIGNMENT_P (mode)
1892 || reg_addr[mode].scalar_in_vmx_p
1893 || mode == TImode
1894 || (TARGET_VADDUQM && mode == V1TImode)))
1895 {
1896 if (FP_REGNO_P (regno))
1897 return FP_REGNO_P (last_regno);
1898
1899 if (ALTIVEC_REGNO_P (regno))
1900 {
1901 if (GET_MODE_SIZE (mode) < 16 && !reg_addr[mode].scalar_in_vmx_p)
1902 return 0;
1903
1904 return ALTIVEC_REGNO_P (last_regno);
1905 }
1906 }
1907
1908 /* The GPRs can hold any mode, but values bigger than one register
1909 cannot go past R31. */
1910 if (INT_REGNO_P (regno))
1911 return INT_REGNO_P (last_regno);
1912
1913 /* The float registers (except for VSX vector modes) can only hold floating
1914 modes and DImode. */
1915 if (FP_REGNO_P (regno))
1916 {
1917 if (VECTOR_ALIGNMENT_P (mode))
1918 return false;
1919
1920 if (SCALAR_FLOAT_MODE_P (mode)
1921 && (mode != TDmode || (regno % 2) == 0)
1922 && FP_REGNO_P (last_regno))
1923 return 1;
1924
1925 if (GET_MODE_CLASS (mode) == MODE_INT)
1926 {
1927 if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
1928 return 1;
1929
1930 if (TARGET_P8_VECTOR && (mode == SImode))
1931 return 1;
1932
1933 if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode))
1934 return 1;
1935 }
1936
1937 return 0;
1938 }
1939
1940 /* The CR register can only hold CC modes. */
1941 if (CR_REGNO_P (regno))
1942 return GET_MODE_CLASS (mode) == MODE_CC;
1943
1944 if (CA_REGNO_P (regno))
1945 return mode == Pmode || mode == SImode;
1946
1947 /* AltiVec only in AldyVec registers. */
1948 if (ALTIVEC_REGNO_P (regno))
1949 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
1950 || mode == V1TImode);
1951
1952 /* We cannot put non-VSX TImode or PTImode anywhere except general register
1953 and it must be able to fit within the register set. */
1954
1955 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
1956 }
1957
1958 /* Implement TARGET_HARD_REGNO_NREGS. */
1959
1960 static unsigned int
1961 rs6000_hard_regno_nregs_hook (unsigned int regno, machine_mode mode)
1962 {
1963 return rs6000_hard_regno_nregs[mode][regno];
1964 }
1965
1966 /* Implement TARGET_HARD_REGNO_MODE_OK. */
1967
1968 static bool
1969 rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
1970 {
1971 return rs6000_hard_regno_mode_ok_p[mode][regno];
1972 }
1973
1974 /* Implement TARGET_MODES_TIEABLE_P.
1975
1976 PTImode cannot tie with other modes because PTImode is restricted to even
1977 GPR registers, and TImode can go in any GPR as well as VSX registers (PR
1978 57744).
1979
1980 Similarly, don't allow OOmode (vector pair, restricted to even VSX
1981 registers) or XOmode (vector quad, restricted to FPR registers divisible
1982 by 4) to tie with other modes.
1983
1984 Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
1985 128-bit floating point on VSX systems ties with other vectors. */
1986
1987 static bool
1988 rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
1989 {
1990 if (mode1 == PTImode || mode1 == OOmode || mode1 == XOmode
1991 || mode2 == PTImode || mode2 == OOmode || mode2 == XOmode)
1992 return mode1 == mode2;
1993
1994 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
1995 return ALTIVEC_OR_VSX_VECTOR_MODE (mode2);
1996 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode2))
1997 return false;
1998
1999 if (SCALAR_FLOAT_MODE_P (mode1))
2000 return SCALAR_FLOAT_MODE_P (mode2);
2001 if (SCALAR_FLOAT_MODE_P (mode2))
2002 return false;
2003
2004 if (GET_MODE_CLASS (mode1) == MODE_CC)
2005 return GET_MODE_CLASS (mode2) == MODE_CC;
2006 if (GET_MODE_CLASS (mode2) == MODE_CC)
2007 return false;
2008
2009 return true;
2010 }
2011
2012 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. */
2013
2014 static bool
2015 rs6000_hard_regno_call_part_clobbered (unsigned int, unsigned int regno,
2016 machine_mode mode)
2017 {
2018 if (TARGET_32BIT
2019 && TARGET_POWERPC64
2020 && GET_MODE_SIZE (mode) > 4
2021 && INT_REGNO_P (regno))
2022 return true;
2023
2024 if (TARGET_VSX
2025 && FP_REGNO_P (regno)
2026 && GET_MODE_SIZE (mode) > 8
2027 && !FLOAT128_2REG_P (mode))
2028 return true;
2029
2030 return false;
2031 }
2032
2033 /* Print interesting facts about registers. */
2034 static void
2035 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
2036 {
2037 int r, m;
2038
2039 for (r = first_regno; r <= last_regno; ++r)
2040 {
2041 const char *comma = "";
2042 int len;
2043
2044 if (first_regno == last_regno)
2045 fprintf (stderr, "%s:\t", reg_name);
2046 else
2047 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
2048
2049 len = 8;
2050 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2051 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
2052 {
2053 if (len > 70)
2054 {
2055 fprintf (stderr, ",\n\t");
2056 len = 8;
2057 comma = "";
2058 }
2059
2060 if (rs6000_hard_regno_nregs[m][r] > 1)
2061 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
2062 rs6000_hard_regno_nregs[m][r]);
2063 else
2064 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
2065
2066 comma = ", ";
2067 }
2068
2069 if (call_used_or_fixed_reg_p (r))
2070 {
2071 if (len > 70)
2072 {
2073 fprintf (stderr, ",\n\t");
2074 len = 8;
2075 comma = "";
2076 }
2077
2078 len += fprintf (stderr, "%s%s", comma, "call-used");
2079 comma = ", ";
2080 }
2081
2082 if (fixed_regs[r])
2083 {
2084 if (len > 70)
2085 {
2086 fprintf (stderr, ",\n\t");
2087 len = 8;
2088 comma = "";
2089 }
2090
2091 len += fprintf (stderr, "%s%s", comma, "fixed");
2092 comma = ", ";
2093 }
2094
2095 if (len > 70)
2096 {
2097 fprintf (stderr, ",\n\t");
2098 comma = "";
2099 }
2100
2101 len += fprintf (stderr, "%sreg-class = %s", comma,
2102 reg_class_names[(int)rs6000_regno_regclass[r]]);
2103 comma = ", ";
2104
2105 if (len > 70)
2106 {
2107 fprintf (stderr, ",\n\t");
2108 comma = "";
2109 }
2110
2111 fprintf (stderr, "%sregno = %d\n", comma, r);
2112 }
2113 }
2114
2115 static const char *
2116 rs6000_debug_vector_unit (enum rs6000_vector v)
2117 {
2118 const char *ret;
2119
2120 switch (v)
2121 {
2122 case VECTOR_NONE: ret = "none"; break;
2123 case VECTOR_ALTIVEC: ret = "altivec"; break;
2124 case VECTOR_VSX: ret = "vsx"; break;
2125 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
2126 default: ret = "unknown"; break;
2127 }
2128
2129 return ret;
2130 }
2131
2132 /* Inner function printing just the address mask for a particular reload
2133 register class. */
2134 DEBUG_FUNCTION char *
2135 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
2136 {
2137 static char ret[8];
2138 char *p = ret;
2139
2140 if ((mask & RELOAD_REG_VALID) != 0)
2141 *p++ = 'v';
2142 else if (keep_spaces)
2143 *p++ = ' ';
2144
2145 if ((mask & RELOAD_REG_MULTIPLE) != 0)
2146 *p++ = 'm';
2147 else if (keep_spaces)
2148 *p++ = ' ';
2149
2150 if ((mask & RELOAD_REG_INDEXED) != 0)
2151 *p++ = 'i';
2152 else if (keep_spaces)
2153 *p++ = ' ';
2154
2155 if ((mask & RELOAD_REG_QUAD_OFFSET) != 0)
2156 *p++ = 'O';
2157 else if ((mask & RELOAD_REG_OFFSET) != 0)
2158 *p++ = 'o';
2159 else if (keep_spaces)
2160 *p++ = ' ';
2161
2162 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2163 *p++ = '+';
2164 else if (keep_spaces)
2165 *p++ = ' ';
2166
2167 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2168 *p++ = '+';
2169 else if (keep_spaces)
2170 *p++ = ' ';
2171
2172 if ((mask & RELOAD_REG_AND_M16) != 0)
2173 *p++ = '&';
2174 else if (keep_spaces)
2175 *p++ = ' ';
2176
2177 *p = '\0';
2178
2179 return ret;
2180 }
2181
2182 /* Print the address masks in a human readble fashion. */
2183 DEBUG_FUNCTION void
2184 rs6000_debug_print_mode (ssize_t m)
2185 {
2186 ssize_t rc;
2187 int spaces = 0;
2188
2189 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2190 for (rc = 0; rc < N_RELOAD_REG; rc++)
2191 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2192 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2193
2194 if ((reg_addr[m].reload_store != CODE_FOR_nothing)
2195 || (reg_addr[m].reload_load != CODE_FOR_nothing))
2196 {
2197 fprintf (stderr, "%*s Reload=%c%c", spaces, "",
2198 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2199 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
2200 spaces = 0;
2201 }
2202 else
2203 spaces += strlen (" Reload=sl");
2204
2205 if (reg_addr[m].scalar_in_vmx_p)
2206 {
2207 fprintf (stderr, "%*s Upper=y", spaces, "");
2208 spaces = 0;
2209 }
2210 else
2211 spaces += strlen (" Upper=y");
2212
2213 if (rs6000_vector_unit[m] != VECTOR_NONE
2214 || rs6000_vector_mem[m] != VECTOR_NONE)
2215 {
2216 fprintf (stderr, "%*s vector: arith=%-10s mem=%s",
2217 spaces, "",
2218 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2219 rs6000_debug_vector_unit (rs6000_vector_mem[m]));
2220 }
2221
2222 fputs ("\n", stderr);
2223 }
2224
2225 #define DEBUG_FMT_ID "%-32s= "
2226 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2227 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2228 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2229
2230 /* Print various interesting information with -mdebug=reg. */
2231 static void
2232 rs6000_debug_reg_global (void)
2233 {
2234 static const char *const tf[2] = { "false", "true" };
2235 const char *nl = (const char *)0;
2236 int m;
2237 size_t m1, m2, v;
2238 char costly_num[20];
2239 char nop_num[20];
2240 char flags_buffer[40];
2241 const char *costly_str;
2242 const char *nop_str;
2243 const char *trace_str;
2244 const char *abi_str;
2245 const char *cmodel_str;
2246 struct cl_target_option cl_opts;
2247
2248 /* Modes we want tieable information on. */
2249 static const machine_mode print_tieable_modes[] = {
2250 QImode,
2251 HImode,
2252 SImode,
2253 DImode,
2254 TImode,
2255 PTImode,
2256 SFmode,
2257 DFmode,
2258 TFmode,
2259 IFmode,
2260 KFmode,
2261 SDmode,
2262 DDmode,
2263 TDmode,
2264 V2SImode,
2265 V2SFmode,
2266 V16QImode,
2267 V8HImode,
2268 V4SImode,
2269 V2DImode,
2270 V1TImode,
2271 V32QImode,
2272 V16HImode,
2273 V8SImode,
2274 V4DImode,
2275 V2TImode,
2276 V4SFmode,
2277 V2DFmode,
2278 V8SFmode,
2279 V4DFmode,
2280 OOmode,
2281 XOmode,
2282 CCmode,
2283 CCUNSmode,
2284 CCEQmode,
2285 CCFPmode,
2286 };
2287
2288 /* Virtual regs we are interested in. */
2289 const static struct {
2290 int regno; /* register number. */
2291 const char *name; /* register name. */
2292 } virtual_regs[] = {
2293 { STACK_POINTER_REGNUM, "stack pointer:" },
2294 { TOC_REGNUM, "toc: " },
2295 { STATIC_CHAIN_REGNUM, "static chain: " },
2296 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2297 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2298 { ARG_POINTER_REGNUM, "arg pointer: " },
2299 { FRAME_POINTER_REGNUM, "frame pointer:" },
2300 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2301 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2302 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2303 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2304 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2305 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2306 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2307 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2308 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2309 };
2310
2311 fputs ("\nHard register information:\n", stderr);
2312 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2313 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2314 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2315 LAST_ALTIVEC_REGNO,
2316 "vs");
2317 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2318 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2319 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2320 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2321 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2322 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2323
2324 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2325 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2326 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2327
2328 fprintf (stderr,
2329 "\n"
2330 "d reg_class = %s\n"
2331 "f reg_class = %s\n"
2332 "v reg_class = %s\n"
2333 "wa reg_class = %s\n"
2334 "we reg_class = %s\n"
2335 "wr reg_class = %s\n"
2336 "wx reg_class = %s\n"
2337 "wA reg_class = %s\n"
2338 "\n",
2339 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2340 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2341 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2342 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2343 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
2344 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2345 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2346 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]]);
2347
2348 nl = "\n";
2349 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2350 rs6000_debug_print_mode (m);
2351
2352 fputs ("\n", stderr);
2353
2354 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2355 {
2356 machine_mode mode1 = print_tieable_modes[m1];
2357 bool first_time = true;
2358
2359 nl = (const char *)0;
2360 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2361 {
2362 machine_mode mode2 = print_tieable_modes[m2];
2363 if (mode1 != mode2 && rs6000_modes_tieable_p (mode1, mode2))
2364 {
2365 if (first_time)
2366 {
2367 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2368 nl = "\n";
2369 first_time = false;
2370 }
2371
2372 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2373 }
2374 }
2375
2376 if (!first_time)
2377 fputs ("\n", stderr);
2378 }
2379
2380 if (nl)
2381 fputs (nl, stderr);
2382
2383 if (rs6000_recip_control)
2384 {
2385 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2386
2387 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2388 if (rs6000_recip_bits[m])
2389 {
2390 fprintf (stderr,
2391 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2392 GET_MODE_NAME (m),
2393 (RS6000_RECIP_AUTO_RE_P (m)
2394 ? "auto"
2395 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2396 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2397 ? "auto"
2398 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2399 }
2400
2401 fputs ("\n", stderr);
2402 }
2403
2404 if (rs6000_cpu_index >= 0)
2405 {
2406 const char *name = processor_target_table[rs6000_cpu_index].name;
2407 HOST_WIDE_INT flags
2408 = processor_target_table[rs6000_cpu_index].target_enable;
2409
2410 sprintf (flags_buffer, "-mcpu=%s flags", name);
2411 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2412 }
2413 else
2414 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2415
2416 if (rs6000_tune_index >= 0)
2417 {
2418 const char *name = processor_target_table[rs6000_tune_index].name;
2419 HOST_WIDE_INT flags
2420 = processor_target_table[rs6000_tune_index].target_enable;
2421
2422 sprintf (flags_buffer, "-mtune=%s flags", name);
2423 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2424 }
2425 else
2426 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2427
2428 cl_target_option_save (&cl_opts, &global_options, &global_options_set);
2429 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2430 rs6000_isa_flags);
2431
2432 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2433 rs6000_isa_flags_explicit);
2434
2435 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2436 rs6000_builtin_mask);
2437
2438 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2439
2440 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2441 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2442
2443 switch (rs6000_sched_costly_dep)
2444 {
2445 case max_dep_latency:
2446 costly_str = "max_dep_latency";
2447 break;
2448
2449 case no_dep_costly:
2450 costly_str = "no_dep_costly";
2451 break;
2452
2453 case all_deps_costly:
2454 costly_str = "all_deps_costly";
2455 break;
2456
2457 case true_store_to_load_dep_costly:
2458 costly_str = "true_store_to_load_dep_costly";
2459 break;
2460
2461 case store_to_load_dep_costly:
2462 costly_str = "store_to_load_dep_costly";
2463 break;
2464
2465 default:
2466 costly_str = costly_num;
2467 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2468 break;
2469 }
2470
2471 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2472
2473 switch (rs6000_sched_insert_nops)
2474 {
2475 case sched_finish_regroup_exact:
2476 nop_str = "sched_finish_regroup_exact";
2477 break;
2478
2479 case sched_finish_pad_groups:
2480 nop_str = "sched_finish_pad_groups";
2481 break;
2482
2483 case sched_finish_none:
2484 nop_str = "sched_finish_none";
2485 break;
2486
2487 default:
2488 nop_str = nop_num;
2489 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2490 break;
2491 }
2492
2493 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2494
2495 switch (rs6000_sdata)
2496 {
2497 default:
2498 case SDATA_NONE:
2499 break;
2500
2501 case SDATA_DATA:
2502 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2503 break;
2504
2505 case SDATA_SYSV:
2506 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2507 break;
2508
2509 case SDATA_EABI:
2510 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2511 break;
2512
2513 }
2514
2515 switch (rs6000_traceback)
2516 {
2517 case traceback_default: trace_str = "default"; break;
2518 case traceback_none: trace_str = "none"; break;
2519 case traceback_part: trace_str = "part"; break;
2520 case traceback_full: trace_str = "full"; break;
2521 default: trace_str = "unknown"; break;
2522 }
2523
2524 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2525
2526 switch (rs6000_current_cmodel)
2527 {
2528 case CMODEL_SMALL: cmodel_str = "small"; break;
2529 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2530 case CMODEL_LARGE: cmodel_str = "large"; break;
2531 default: cmodel_str = "unknown"; break;
2532 }
2533
2534 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2535
2536 switch (rs6000_current_abi)
2537 {
2538 case ABI_NONE: abi_str = "none"; break;
2539 case ABI_AIX: abi_str = "aix"; break;
2540 case ABI_ELFv2: abi_str = "ELFv2"; break;
2541 case ABI_V4: abi_str = "V4"; break;
2542 case ABI_DARWIN: abi_str = "darwin"; break;
2543 default: abi_str = "unknown"; break;
2544 }
2545
2546 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2547
2548 if (rs6000_altivec_abi)
2549 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2550
2551 if (rs6000_aix_extabi)
2552 fprintf (stderr, DEBUG_FMT_S, "AIX vec-extabi", "true");
2553
2554 if (rs6000_darwin64_abi)
2555 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2556
2557 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2558 (TARGET_SOFT_FLOAT ? "true" : "false"));
2559
2560 if (TARGET_LINK_STACK)
2561 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2562
2563 if (TARGET_P8_FUSION)
2564 {
2565 char options[80];
2566
2567 strcpy (options, "power8");
2568 if (TARGET_P8_FUSION_SIGN)
2569 strcat (options, ", sign");
2570
2571 fprintf (stderr, DEBUG_FMT_S, "fusion", options);
2572 }
2573
2574 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2575 TARGET_SECURE_PLT ? "secure" : "bss");
2576 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2577 aix_struct_return ? "aix" : "sysv");
2578 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2579 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2580 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2581 tf[!!rs6000_align_branch_targets]);
2582 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2583 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2584 rs6000_long_double_type_size);
2585 if (rs6000_long_double_type_size > 64)
2586 {
2587 fprintf (stderr, DEBUG_FMT_S, "long double type",
2588 TARGET_IEEEQUAD ? "IEEE" : "IBM");
2589 fprintf (stderr, DEBUG_FMT_S, "default long double type",
2590 TARGET_IEEEQUAD_DEFAULT ? "IEEE" : "IBM");
2591 }
2592 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2593 (int)rs6000_sched_restricted_insns_priority);
2594 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2595 (int)END_BUILTINS);
2596
2597 fprintf (stderr, DEBUG_FMT_D, "Enable float128 on VSX",
2598 (int)TARGET_FLOAT128_ENABLE_TYPE);
2599
2600 if (TARGET_VSX)
2601 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2602 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2603
2604 if (TARGET_DIRECT_MOVE_128)
2605 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element",
2606 (int)VECTOR_ELEMENT_MFVSRLD_64BIT);
2607 }
2608
2609 \f
2610 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2611 legitimate address support to figure out the appropriate addressing to
2612 use. */
2613
2614 static void
2615 rs6000_setup_reg_addr_masks (void)
2616 {
2617 ssize_t rc, reg, m, nregs;
2618 addr_mask_type any_addr_mask, addr_mask;
2619
2620 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2621 {
2622 machine_mode m2 = (machine_mode) m;
2623 bool complex_p = false;
2624 bool small_int_p = (m2 == QImode || m2 == HImode || m2 == SImode);
2625 size_t msize;
2626
2627 if (COMPLEX_MODE_P (m2))
2628 {
2629 complex_p = true;
2630 m2 = GET_MODE_INNER (m2);
2631 }
2632
2633 msize = GET_MODE_SIZE (m2);
2634
2635 /* SDmode is special in that we want to access it only via REG+REG
2636 addressing on power7 and above, since we want to use the LFIWZX and
2637 STFIWZX instructions to load it. */
2638 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2639
2640 any_addr_mask = 0;
2641 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2642 {
2643 addr_mask = 0;
2644 reg = reload_reg_map[rc].reg;
2645
2646 /* Can mode values go in the GPR/FPR/Altivec registers? */
2647 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2648 {
2649 bool small_int_vsx_p = (small_int_p
2650 && (rc == RELOAD_REG_FPR
2651 || rc == RELOAD_REG_VMX));
2652
2653 nregs = rs6000_hard_regno_nregs[m][reg];
2654 addr_mask |= RELOAD_REG_VALID;
2655
2656 /* Indicate if the mode takes more than 1 physical register. If
2657 it takes a single register, indicate it can do REG+REG
2658 addressing. Small integers in VSX registers can only do
2659 REG+REG addressing. */
2660 if (small_int_vsx_p)
2661 addr_mask |= RELOAD_REG_INDEXED;
2662 else if (nregs > 1 || m == BLKmode || complex_p)
2663 addr_mask |= RELOAD_REG_MULTIPLE;
2664 else
2665 addr_mask |= RELOAD_REG_INDEXED;
2666
2667 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2668 addressing. If we allow scalars into Altivec registers,
2669 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY.
2670
2671 For VSX systems, we don't allow update addressing for
2672 DFmode/SFmode if those registers can go in both the
2673 traditional floating point registers and Altivec registers.
2674 The load/store instructions for the Altivec registers do not
2675 have update forms. If we allowed update addressing, it seems
2676 to break IV-OPT code using floating point if the index type is
2677 int instead of long (PR target/81550 and target/84042). */
2678
2679 if (TARGET_UPDATE
2680 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2681 && msize <= 8
2682 && !VECTOR_MODE_P (m2)
2683 && !VECTOR_ALIGNMENT_P (m2)
2684 && !complex_p
2685 && (m != E_DFmode || !TARGET_VSX)
2686 && (m != E_SFmode || !TARGET_P8_VECTOR)
2687 && !small_int_vsx_p)
2688 {
2689 addr_mask |= RELOAD_REG_PRE_INCDEC;
2690
2691 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2692 we don't allow PRE_MODIFY for some multi-register
2693 operations. */
2694 switch (m)
2695 {
2696 default:
2697 addr_mask |= RELOAD_REG_PRE_MODIFY;
2698 break;
2699
2700 case E_DImode:
2701 if (TARGET_POWERPC64)
2702 addr_mask |= RELOAD_REG_PRE_MODIFY;
2703 break;
2704
2705 case E_DFmode:
2706 case E_DDmode:
2707 if (TARGET_HARD_FLOAT)
2708 addr_mask |= RELOAD_REG_PRE_MODIFY;
2709 break;
2710 }
2711 }
2712 }
2713
2714 /* GPR and FPR registers can do REG+OFFSET addressing, except
2715 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
2716 for 64-bit scalars and 32-bit SFmode to altivec registers. */
2717 if ((addr_mask != 0) && !indexed_only_p
2718 && msize <= 8
2719 && (rc == RELOAD_REG_GPR
2720 || ((msize == 8 || m2 == SFmode)
2721 && (rc == RELOAD_REG_FPR
2722 || (rc == RELOAD_REG_VMX && TARGET_P9_VECTOR)))))
2723 addr_mask |= RELOAD_REG_OFFSET;
2724
2725 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2726 instructions are enabled. The offset for 128-bit VSX registers is
2727 only 12-bits. While GPRs can handle the full offset range, VSX
2728 registers can only handle the restricted range. */
2729 else if ((addr_mask != 0) && !indexed_only_p
2730 && msize == 16 && TARGET_P9_VECTOR
2731 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2)
2732 || (m2 == TImode && TARGET_VSX)))
2733 {
2734 addr_mask |= RELOAD_REG_OFFSET;
2735 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2736 addr_mask |= RELOAD_REG_QUAD_OFFSET;
2737 }
2738
2739 /* Vector pairs can do both indexed and offset loads if the
2740 instructions are enabled, otherwise they can only do offset loads
2741 since it will be broken into two vector moves. Vector quads can
2742 only do offset loads. */
2743 else if ((addr_mask != 0) && TARGET_MMA
2744 && (m2 == OOmode || m2 == XOmode))
2745 {
2746 addr_mask |= RELOAD_REG_OFFSET;
2747 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2748 {
2749 addr_mask |= RELOAD_REG_QUAD_OFFSET;
2750 if (m2 == OOmode)
2751 addr_mask |= RELOAD_REG_INDEXED;
2752 }
2753 }
2754
2755 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2756 addressing on 128-bit types. */
2757 if (rc == RELOAD_REG_VMX && msize == 16
2758 && (addr_mask & RELOAD_REG_VALID) != 0)
2759 addr_mask |= RELOAD_REG_AND_M16;
2760
2761 reg_addr[m].addr_mask[rc] = addr_mask;
2762 any_addr_mask |= addr_mask;
2763 }
2764
2765 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2766 }
2767 }
2768
2769 \f
2770 /* Initialize the various global tables that are based on register size. */
2771 static void
2772 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2773 {
2774 ssize_t r, m, c;
2775 int align64;
2776 int align32;
2777
2778 /* Precalculate REGNO_REG_CLASS. */
2779 rs6000_regno_regclass[0] = GENERAL_REGS;
2780 for (r = 1; r < 32; ++r)
2781 rs6000_regno_regclass[r] = BASE_REGS;
2782
2783 for (r = 32; r < 64; ++r)
2784 rs6000_regno_regclass[r] = FLOAT_REGS;
2785
2786 for (r = 64; HARD_REGISTER_NUM_P (r); ++r)
2787 rs6000_regno_regclass[r] = NO_REGS;
2788
2789 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2790 rs6000_regno_regclass[r] = ALTIVEC_REGS;
2791
2792 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2793 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
2794 rs6000_regno_regclass[r] = CR_REGS;
2795
2796 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
2797 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
2798 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
2799 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
2800 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
2801 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
2802 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
2803
2804 /* Precalculate register class to simpler reload register class. We don't
2805 need all of the register classes that are combinations of different
2806 classes, just the simple ones that have constraint letters. */
2807 for (c = 0; c < N_REG_CLASSES; c++)
2808 reg_class_to_reg_type[c] = NO_REG_TYPE;
2809
2810 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
2811 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
2812 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
2813 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
2814 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
2815 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
2816 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
2817 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
2818 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
2819 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
2820
2821 if (TARGET_VSX)
2822 {
2823 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
2824 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
2825 }
2826 else
2827 {
2828 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
2829 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
2830 }
2831
2832 /* Precalculate the valid memory formats as well as the vector information,
2833 this must be set up before the rs6000_hard_regno_nregs_internal calls
2834 below. */
2835 gcc_assert ((int)VECTOR_NONE == 0);
2836 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
2837 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_mem));
2838
2839 gcc_assert ((int)CODE_FOR_nothing == 0);
2840 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
2841
2842 gcc_assert ((int)NO_REGS == 0);
2843 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
2844
2845 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2846 believes it can use native alignment or still uses 128-bit alignment. */
2847 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
2848 {
2849 align64 = 64;
2850 align32 = 32;
2851 }
2852 else
2853 {
2854 align64 = 128;
2855 align32 = 128;
2856 }
2857
2858 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
2859 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
2860 if (TARGET_FLOAT128_TYPE)
2861 {
2862 rs6000_vector_mem[KFmode] = VECTOR_VSX;
2863 rs6000_vector_align[KFmode] = 128;
2864
2865 if (FLOAT128_IEEE_P (TFmode))
2866 {
2867 rs6000_vector_mem[TFmode] = VECTOR_VSX;
2868 rs6000_vector_align[TFmode] = 128;
2869 }
2870 }
2871
2872 /* V2DF mode, VSX only. */
2873 if (TARGET_VSX)
2874 {
2875 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
2876 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
2877 rs6000_vector_align[V2DFmode] = align64;
2878 }
2879
2880 /* V4SF mode, either VSX or Altivec. */
2881 if (TARGET_VSX)
2882 {
2883 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
2884 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
2885 rs6000_vector_align[V4SFmode] = align32;
2886 }
2887 else if (TARGET_ALTIVEC)
2888 {
2889 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
2890 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
2891 rs6000_vector_align[V4SFmode] = align32;
2892 }
2893
2894 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2895 and stores. */
2896 if (TARGET_ALTIVEC)
2897 {
2898 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
2899 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
2900 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
2901 rs6000_vector_align[V4SImode] = align32;
2902 rs6000_vector_align[V8HImode] = align32;
2903 rs6000_vector_align[V16QImode] = align32;
2904
2905 if (TARGET_VSX)
2906 {
2907 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
2908 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
2909 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
2910 }
2911 else
2912 {
2913 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
2914 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
2915 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
2916 }
2917 }
2918
2919 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
2920 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
2921 if (TARGET_VSX)
2922 {
2923 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
2924 rs6000_vector_unit[V2DImode]
2925 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2926 rs6000_vector_align[V2DImode] = align64;
2927
2928 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
2929 rs6000_vector_unit[V1TImode]
2930 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2931 rs6000_vector_align[V1TImode] = 128;
2932 }
2933
2934 /* DFmode, see if we want to use the VSX unit. Memory is handled
2935 differently, so don't set rs6000_vector_mem. */
2936 if (TARGET_VSX)
2937 {
2938 rs6000_vector_unit[DFmode] = VECTOR_VSX;
2939 rs6000_vector_align[DFmode] = 64;
2940 }
2941
2942 /* SFmode, see if we want to use the VSX unit. */
2943 if (TARGET_P8_VECTOR)
2944 {
2945 rs6000_vector_unit[SFmode] = VECTOR_VSX;
2946 rs6000_vector_align[SFmode] = 32;
2947 }
2948
2949 /* Allow TImode in VSX register and set the VSX memory macros. */
2950 if (TARGET_VSX)
2951 {
2952 rs6000_vector_mem[TImode] = VECTOR_VSX;
2953 rs6000_vector_align[TImode] = align64;
2954 }
2955
2956 /* Add support for vector pairs and vector quad registers. */
2957 if (TARGET_MMA)
2958 {
2959 rs6000_vector_unit[OOmode] = VECTOR_NONE;
2960 rs6000_vector_mem[OOmode] = VECTOR_VSX;
2961 rs6000_vector_align[OOmode] = 256;
2962
2963 rs6000_vector_unit[XOmode] = VECTOR_NONE;
2964 rs6000_vector_mem[XOmode] = VECTOR_VSX;
2965 rs6000_vector_align[XOmode] = 512;
2966 }
2967
2968 /* Register class constraints for the constraints that depend on compile
2969 switches. When the VSX code was added, different constraints were added
2970 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
2971 of the VSX registers are used. The register classes for scalar floating
2972 point types is set, based on whether we allow that type into the upper
2973 (Altivec) registers. GCC has register classes to target the Altivec
2974 registers for load/store operations, to select using a VSX memory
2975 operation instead of the traditional floating point operation. The
2976 constraints are:
2977
2978 d - Register class to use with traditional DFmode instructions.
2979 f - Register class to use with traditional SFmode instructions.
2980 v - Altivec register.
2981 wa - Any VSX register.
2982 wc - Reserved to represent individual CR bits (used in LLVM).
2983 wn - always NO_REGS.
2984 wr - GPR if 64-bit mode is permitted.
2985 wx - Float register if we can do 32-bit int stores. */
2986
2987 if (TARGET_HARD_FLOAT)
2988 {
2989 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */
2990 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */
2991 }
2992
2993 if (TARGET_VSX)
2994 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
2995
2996 /* Add conditional constraints based on various options, to allow us to
2997 collapse multiple insn patterns. */
2998 if (TARGET_ALTIVEC)
2999 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
3000
3001 if (TARGET_POWERPC64)
3002 {
3003 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
3004 rs6000_constraints[RS6000_CONSTRAINT_wA] = BASE_REGS;
3005 }
3006
3007 if (TARGET_STFIWX)
3008 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
3009
3010 /* Support for new direct moves (ISA 3.0 + 64bit). */
3011 if (TARGET_DIRECT_MOVE_128)
3012 rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
3013
3014 /* Set up the reload helper and direct move functions. */
3015 if (TARGET_VSX || TARGET_ALTIVEC)
3016 {
3017 if (TARGET_64BIT)
3018 {
3019 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
3020 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
3021 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
3022 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
3023 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
3024 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
3025 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
3026 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
3027 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
3028 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
3029 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
3030 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
3031 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
3032 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
3033 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
3034 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
3035 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
3036 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
3037 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
3038 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
3039
3040 if (FLOAT128_VECTOR_P (KFmode))
3041 {
3042 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store;
3043 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load;
3044 }
3045
3046 if (FLOAT128_VECTOR_P (TFmode))
3047 {
3048 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store;
3049 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load;
3050 }
3051
3052 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3053 available. */
3054 if (TARGET_NO_SDMODE_STACK)
3055 {
3056 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
3057 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
3058 }
3059
3060 if (TARGET_VSX)
3061 {
3062 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
3063 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
3064 }
3065
3066 if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128)
3067 {
3068 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
3069 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
3070 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
3071 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
3072 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
3073 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
3074 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
3075 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
3076 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
3077
3078 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
3079 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
3080 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
3081 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
3082 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
3083 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
3084 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
3085 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
3086 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
3087
3088 if (FLOAT128_VECTOR_P (KFmode))
3089 {
3090 reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf;
3091 reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf;
3092 }
3093
3094 if (FLOAT128_VECTOR_P (TFmode))
3095 {
3096 reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf;
3097 reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf;
3098 }
3099
3100 if (TARGET_MMA)
3101 {
3102 reg_addr[OOmode].reload_store = CODE_FOR_reload_oo_di_store;
3103 reg_addr[OOmode].reload_load = CODE_FOR_reload_oo_di_load;
3104 reg_addr[XOmode].reload_store = CODE_FOR_reload_xo_di_store;
3105 reg_addr[XOmode].reload_load = CODE_FOR_reload_xo_di_load;
3106 }
3107 }
3108 }
3109 else
3110 {
3111 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
3112 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
3113 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
3114 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
3115 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
3116 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
3117 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
3118 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
3119 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
3120 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
3121 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
3122 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
3123 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
3124 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
3125 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
3126 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
3127 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
3128 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
3129 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
3130 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
3131
3132 if (FLOAT128_VECTOR_P (KFmode))
3133 {
3134 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store;
3135 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load;
3136 }
3137
3138 if (FLOAT128_IEEE_P (TFmode))
3139 {
3140 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store;
3141 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load;
3142 }
3143
3144 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3145 available. */
3146 if (TARGET_NO_SDMODE_STACK)
3147 {
3148 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
3149 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
3150 }
3151
3152 if (TARGET_VSX)
3153 {
3154 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
3155 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
3156 }
3157
3158 if (TARGET_DIRECT_MOVE)
3159 {
3160 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
3161 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
3162 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
3163 }
3164 }
3165
3166 reg_addr[DFmode].scalar_in_vmx_p = true;
3167 reg_addr[DImode].scalar_in_vmx_p = true;
3168
3169 if (TARGET_P8_VECTOR)
3170 {
3171 reg_addr[SFmode].scalar_in_vmx_p = true;
3172 reg_addr[SImode].scalar_in_vmx_p = true;
3173
3174 if (TARGET_P9_VECTOR)
3175 {
3176 reg_addr[HImode].scalar_in_vmx_p = true;
3177 reg_addr[QImode].scalar_in_vmx_p = true;
3178 }
3179 }
3180 }
3181
3182 /* Precalculate HARD_REGNO_NREGS. */
3183 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3184 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3185 rs6000_hard_regno_nregs[m][r]
3186 = rs6000_hard_regno_nregs_internal (r, (machine_mode) m);
3187
3188 /* Precalculate TARGET_HARD_REGNO_MODE_OK. */
3189 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3190 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3191 rs6000_hard_regno_mode_ok_p[m][r]
3192 = rs6000_hard_regno_mode_ok_uncached (r, (machine_mode) m);
3193
3194 /* Precalculate CLASS_MAX_NREGS sizes. */
3195 for (c = 0; c < LIM_REG_CLASSES; ++c)
3196 {
3197 int reg_size;
3198
3199 if (TARGET_VSX && VSX_REG_CLASS_P (c))
3200 reg_size = UNITS_PER_VSX_WORD;
3201
3202 else if (c == ALTIVEC_REGS)
3203 reg_size = UNITS_PER_ALTIVEC_WORD;
3204
3205 else if (c == FLOAT_REGS)
3206 reg_size = UNITS_PER_FP_WORD;
3207
3208 else
3209 reg_size = UNITS_PER_WORD;
3210
3211 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3212 {
3213 machine_mode m2 = (machine_mode)m;
3214 int reg_size2 = reg_size;
3215
3216 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3217 in VSX. */
3218 if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m))
3219 reg_size2 = UNITS_PER_FP_WORD;
3220
3221 rs6000_class_max_nregs[m][c]
3222 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3223 }
3224 }
3225
3226 /* Calculate which modes to automatically generate code to use a the
3227 reciprocal divide and square root instructions. In the future, possibly
3228 automatically generate the instructions even if the user did not specify
3229 -mrecip. The older machines double precision reciprocal sqrt estimate is
3230 not accurate enough. */
3231 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3232 if (TARGET_FRES)
3233 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3234 if (TARGET_FRE)
3235 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3236 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3237 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3238 if (VECTOR_UNIT_VSX_P (V2DFmode))
3239 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3240
3241 if (TARGET_FRSQRTES)
3242 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3243 if (TARGET_FRSQRTE)
3244 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3245 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3246 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3247 if (VECTOR_UNIT_VSX_P (V2DFmode))
3248 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3249
3250 if (rs6000_recip_control)
3251 {
3252 if (!flag_finite_math_only)
3253 warning (0, "%qs requires %qs or %qs", "-mrecip", "-ffinite-math",
3254 "-ffast-math");
3255 if (flag_trapping_math)
3256 warning (0, "%qs requires %qs or %qs", "-mrecip",
3257 "-fno-trapping-math", "-ffast-math");
3258 if (!flag_reciprocal_math)
3259 warning (0, "%qs requires %qs or %qs", "-mrecip", "-freciprocal-math",
3260 "-ffast-math");
3261 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3262 {
3263 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3264 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3265 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3266
3267 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3268 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3269 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3270
3271 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3272 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3273 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3274
3275 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3276 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3277 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3278
3279 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3280 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3281 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3282
3283 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3284 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3285 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3286
3287 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3288 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3289 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3290
3291 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3292 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3293 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3294 }
3295 }
3296
3297 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3298 legitimate address support to figure out the appropriate addressing to
3299 use. */
3300 rs6000_setup_reg_addr_masks ();
3301
3302 if (global_init_p || TARGET_DEBUG_TARGET)
3303 {
3304 if (TARGET_DEBUG_REG)
3305 rs6000_debug_reg_global ();
3306
3307 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3308 fprintf (stderr,
3309 "SImode variable mult cost = %d\n"
3310 "SImode constant mult cost = %d\n"
3311 "SImode short constant mult cost = %d\n"
3312 "DImode multipliciation cost = %d\n"
3313 "SImode division cost = %d\n"
3314 "DImode division cost = %d\n"
3315 "Simple fp operation cost = %d\n"
3316 "DFmode multiplication cost = %d\n"
3317 "SFmode division cost = %d\n"
3318 "DFmode division cost = %d\n"
3319 "cache line size = %d\n"
3320 "l1 cache size = %d\n"
3321 "l2 cache size = %d\n"
3322 "simultaneous prefetches = %d\n"
3323 "\n",
3324 rs6000_cost->mulsi,
3325 rs6000_cost->mulsi_const,
3326 rs6000_cost->mulsi_const9,
3327 rs6000_cost->muldi,
3328 rs6000_cost->divsi,
3329 rs6000_cost->divdi,
3330 rs6000_cost->fp,
3331 rs6000_cost->dmul,
3332 rs6000_cost->sdiv,
3333 rs6000_cost->ddiv,
3334 rs6000_cost->cache_line_size,
3335 rs6000_cost->l1_cache_size,
3336 rs6000_cost->l2_cache_size,
3337 rs6000_cost->simultaneous_prefetches);
3338 }
3339 }
3340
3341 #if TARGET_MACHO
3342 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3343
3344 static void
3345 darwin_rs6000_override_options (void)
3346 {
3347 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3348 off. */
3349 rs6000_altivec_abi = 1;
3350 TARGET_ALTIVEC_VRSAVE = 1;
3351 rs6000_current_abi = ABI_DARWIN;
3352
3353 if (DEFAULT_ABI == ABI_DARWIN
3354 && TARGET_64BIT)
3355 darwin_one_byte_bool = 1;
3356
3357 if (TARGET_64BIT && ! TARGET_POWERPC64)
3358 {
3359 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3360 warning (0, "%qs requires PowerPC64 architecture, enabling", "-m64");
3361 }
3362
3363 /* The linkers [ld64] that support 64Bit do not need the JBSR longcall
3364 optimisation, and will not work with the most generic case (where the
3365 symbol is undefined external, but there is no symbl stub). */
3366 if (TARGET_64BIT)
3367 rs6000_default_long_calls = 0;
3368
3369 /* ld_classic is (so far) still used for kernel (static) code, and supports
3370 the JBSR longcall / branch islands. */
3371 if (flag_mkernel)
3372 {
3373 rs6000_default_long_calls = 1;
3374
3375 /* Allow a kext author to do -mkernel -mhard-float. */
3376 if (! (rs6000_isa_flags_explicit & OPTION_MASK_SOFT_FLOAT))
3377 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3378 }
3379
3380 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3381 Altivec. */
3382 if (!flag_mkernel && !flag_apple_kext
3383 && TARGET_64BIT
3384 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3385 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3386
3387 /* Unless the user (not the configurer) has explicitly overridden
3388 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3389 G4 unless targeting the kernel. */
3390 if (!flag_mkernel
3391 && !flag_apple_kext
3392 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3393 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3394 && ! OPTION_SET_P (rs6000_cpu_index))
3395 {
3396 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3397 }
3398 }
3399 #endif
3400
3401 /* If not otherwise specified by a target, make 'long double' equivalent to
3402 'double'. */
3403
3404 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3405 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3406 #endif
3407
3408 /* Return the builtin mask of the various options used that could affect which
3409 builtins were used. In the past we used target_flags, but we've run out of
3410 bits, and some options are no longer in target_flags. */
3411
3412 HOST_WIDE_INT
3413 rs6000_builtin_mask_calculate (void)
3414 {
3415 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
3416 | ((TARGET_CMPB) ? RS6000_BTM_CMPB : 0)
3417 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
3418 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
3419 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
3420 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
3421 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
3422 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
3423 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
3424 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
3425 | ((TARGET_P9_VECTOR) ? RS6000_BTM_P9_VECTOR : 0)
3426 | ((TARGET_P9_MISC) ? RS6000_BTM_P9_MISC : 0)
3427 | ((TARGET_MODULO) ? RS6000_BTM_MODULO : 0)
3428 | ((TARGET_64BIT) ? RS6000_BTM_64BIT : 0)
3429 | ((TARGET_POWERPC64) ? RS6000_BTM_POWERPC64 : 0)
3430 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
3431 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
3432 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
3433 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
3434 | ((TARGET_LONG_DOUBLE_128
3435 && TARGET_HARD_FLOAT
3436 && !TARGET_IEEEQUAD) ? RS6000_BTM_LDBL128 : 0)
3437 | ((TARGET_FLOAT128_TYPE) ? RS6000_BTM_FLOAT128 : 0)
3438 | ((TARGET_FLOAT128_HW) ? RS6000_BTM_FLOAT128_HW : 0)
3439 | ((TARGET_MMA) ? RS6000_BTM_MMA : 0)
3440 | ((TARGET_POWER10) ? RS6000_BTM_P10 : 0));
3441 }
3442
3443 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3444 to clobber the XER[CA] bit because clobbering that bit without telling
3445 the compiler worked just fine with versions of GCC before GCC 5, and
3446 breaking a lot of older code in ways that are hard to track down is
3447 not such a great idea. */
3448
3449 static rtx_insn *
3450 rs6000_md_asm_adjust (vec<rtx> & /*outputs*/, vec<rtx> & /*inputs*/,
3451 vec<machine_mode> & /*input_modes*/,
3452 vec<const char *> & /*constraints*/, vec<rtx> &clobbers,
3453 HARD_REG_SET &clobbered_regs, location_t /*loc*/)
3454 {
3455 clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
3456 SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
3457 return NULL;
3458 }
3459
3460 /* This target function is similar to the hook TARGET_OPTION_OVERRIDE
3461 but is called when the optimize level is changed via an attribute or
3462 pragma or when it is reset at the end of the code affected by the
3463 attribute or pragma. It is not called at the beginning of compilation
3464 when TARGET_OPTION_OVERRIDE is called so if you want to perform these
3465 actions then, you should have TARGET_OPTION_OVERRIDE call
3466 TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE. */
3467
3468 static void
3469 rs6000_override_options_after_change (void)
3470 {
3471 /* Explicit -funroll-loops turns -munroll-only-small-loops off, and
3472 turns -frename-registers on. */
3473 if ((OPTION_SET_P (flag_unroll_loops) && flag_unroll_loops)
3474 || (OPTION_SET_P (flag_unroll_all_loops)
3475 && flag_unroll_all_loops))
3476 {
3477 if (!OPTION_SET_P (unroll_only_small_loops))
3478 unroll_only_small_loops = 0;
3479 if (!OPTION_SET_P (flag_rename_registers))
3480 flag_rename_registers = 1;
3481 if (!OPTION_SET_P (flag_cunroll_grow_size))
3482 flag_cunroll_grow_size = 1;
3483 }
3484 else if (!OPTION_SET_P (flag_cunroll_grow_size))
3485 flag_cunroll_grow_size = flag_peel_loops || optimize >= 3;
3486
3487 /* If we are inserting ROP-protect instructions, disable shrink wrap. */
3488 if (rs6000_rop_protect)
3489 flag_shrink_wrap = 0;
3490 }
3491
3492 #ifdef TARGET_USES_LINUX64_OPT
3493 static void
3494 rs6000_linux64_override_options ()
3495 {
3496 if (!OPTION_SET_P (rs6000_alignment_flags))
3497 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
3498 if (rs6000_isa_flags & OPTION_MASK_64BIT)
3499 {
3500 if (DEFAULT_ABI != ABI_AIX)
3501 {
3502 rs6000_current_abi = ABI_AIX;
3503 error (INVALID_64BIT, "call");
3504 }
3505 dot_symbols = !strcmp (rs6000_abi_name, "aixdesc");
3506 if (ELFv2_ABI_CHECK)
3507 {
3508 rs6000_current_abi = ABI_ELFv2;
3509 if (dot_symbols)
3510 error ("%<-mcall-aixdesc%> incompatible with %<-mabi=elfv2%>");
3511 }
3512 if (rs6000_isa_flags & OPTION_MASK_RELOCATABLE)
3513 {
3514 rs6000_isa_flags &= ~OPTION_MASK_RELOCATABLE;
3515 error (INVALID_64BIT, "relocatable");
3516 }
3517 if (rs6000_isa_flags & OPTION_MASK_EABI)
3518 {
3519 rs6000_isa_flags &= ~OPTION_MASK_EABI;
3520 error (INVALID_64BIT, "eabi");
3521 }
3522 if (TARGET_PROTOTYPE)
3523 {
3524 target_prototype = 0;
3525 error (INVALID_64BIT, "prototype");
3526 }
3527 if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) == 0)
3528 {
3529 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3530 error ("%<-m64%> requires a PowerPC64 cpu");
3531 }
3532 if (!OPTION_SET_P (rs6000_current_cmodel))
3533 SET_CMODEL (CMODEL_MEDIUM);
3534 if ((rs6000_isa_flags_explicit & OPTION_MASK_MINIMAL_TOC) != 0)
3535 {
3536 if (OPTION_SET_P (rs6000_current_cmodel)
3537 && rs6000_current_cmodel != CMODEL_SMALL)
3538 error ("%<-mcmodel incompatible with other toc options%>");
3539 if (TARGET_MINIMAL_TOC)
3540 SET_CMODEL (CMODEL_SMALL);
3541 else if (TARGET_PCREL
3542 || (PCREL_SUPPORTED_BY_OS
3543 && (rs6000_isa_flags_explicit & OPTION_MASK_PCREL) == 0))
3544 /* Ignore -mno-minimal-toc. */
3545 ;
3546 else
3547 SET_CMODEL (CMODEL_SMALL);
3548 }
3549 if (rs6000_current_cmodel != CMODEL_SMALL)
3550 {
3551 if (!OPTION_SET_P (TARGET_NO_FP_IN_TOC))
3552 TARGET_NO_FP_IN_TOC = rs6000_current_cmodel == CMODEL_MEDIUM;
3553 if (!OPTION_SET_P (TARGET_NO_SUM_IN_TOC))
3554 TARGET_NO_SUM_IN_TOC = 0;
3555 }
3556 if (TARGET_PLTSEQ && DEFAULT_ABI != ABI_ELFv2)
3557 {
3558 if (OPTION_SET_P (rs6000_pltseq))
3559 warning (0, "%qs unsupported for this ABI",
3560 "-mpltseq");
3561 rs6000_pltseq = false;
3562 }
3563 }
3564 else if (TARGET_64BIT)
3565 error (INVALID_32BIT, "32");
3566 else
3567 {
3568 if (TARGET_PROFILE_KERNEL)
3569 {
3570 profile_kernel = 0;
3571 error (INVALID_32BIT, "profile-kernel");
3572 }
3573 if (OPTION_SET_P (rs6000_current_cmodel))
3574 {
3575 SET_CMODEL (CMODEL_SMALL);
3576 error (INVALID_32BIT, "cmodel");
3577 }
3578 }
3579 }
3580 #endif
3581
3582 /* Return true if we are using GLIBC, and it supports IEEE 128-bit long double.
3583 This support is only in little endian GLIBC 2.32 or newer. */
3584 static bool
3585 glibc_supports_ieee_128bit (void)
3586 {
3587 #ifdef OPTION_GLIBC
3588 if (OPTION_GLIBC && !BYTES_BIG_ENDIAN
3589 && ((TARGET_GLIBC_MAJOR * 1000) + TARGET_GLIBC_MINOR) >= 2032)
3590 return true;
3591 #endif /* OPTION_GLIBC. */
3592
3593 return false;
3594 }
3595
3596 /* Override command line options.
3597
3598 Combine build-specific configuration information with options
3599 specified on the command line to set various state variables which
3600 influence code generation, optimization, and expansion of built-in
3601 functions. Assure that command-line configuration preferences are
3602 compatible with each other and with the build configuration; issue
3603 warnings while adjusting configuration or error messages while
3604 rejecting configuration.
3605
3606 Upon entry to this function:
3607
3608 This function is called once at the beginning of
3609 compilation, and then again at the start and end of compiling
3610 each section of code that has a different configuration, as
3611 indicated, for example, by adding the
3612
3613 __attribute__((__target__("cpu=power9")))
3614
3615 qualifier to a function definition or, for example, by bracketing
3616 code between
3617
3618 #pragma GCC target("altivec")
3619
3620 and
3621
3622 #pragma GCC reset_options
3623
3624 directives. Parameter global_init_p is true for the initial
3625 invocation, which initializes global variables, and false for all
3626 subsequent invocations.
3627
3628
3629 Various global state information is assumed to be valid. This
3630 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
3631 default CPU specified at build configure time, TARGET_DEFAULT,
3632 representing the default set of option flags for the default
3633 target, and OPTION_SET_P (rs6000_isa_flags), representing
3634 which options were requested on the command line.
3635
3636 Upon return from this function:
3637
3638 rs6000_isa_flags_explicit has a non-zero bit for each flag that
3639 was set by name on the command line. Additionally, if certain
3640 attributes are automatically enabled or disabled by this function
3641 in order to assure compatibility between options and
3642 configuration, the flags associated with those attributes are
3643 also set. By setting these "explicit bits", we avoid the risk
3644 that other code might accidentally overwrite these particular
3645 attributes with "default values".
3646
3647 The various bits of rs6000_isa_flags are set to indicate the
3648 target options that have been selected for the most current
3649 compilation efforts. This has the effect of also turning on the
3650 associated TARGET_XXX values since these are macros which are
3651 generally defined to test the corresponding bit of the
3652 rs6000_isa_flags variable.
3653
3654 The variable rs6000_builtin_mask is set to represent the target
3655 options for the most current compilation efforts, consistent with
3656 the current contents of rs6000_isa_flags. This variable controls
3657 expansion of built-in functions.
3658
3659 Various other global variables and fields of global structures
3660 (over 50 in all) are initialized to reflect the desired options
3661 for the most current compilation efforts. */
3662
3663 static bool
3664 rs6000_option_override_internal (bool global_init_p)
3665 {
3666 bool ret = true;
3667
3668 HOST_WIDE_INT set_masks;
3669 HOST_WIDE_INT ignore_masks;
3670 int cpu_index = -1;
3671 int tune_index;
3672 struct cl_target_option *main_target_opt
3673 = ((global_init_p || target_option_default_node == NULL)
3674 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3675
3676 /* Print defaults. */
3677 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
3678 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
3679
3680 /* Remember the explicit arguments. */
3681 if (global_init_p)
3682 rs6000_isa_flags_explicit = OPTION_SET_P (rs6000_isa_flags);
3683
3684 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3685 library functions, so warn about it. The flag may be useful for
3686 performance studies from time to time though, so don't disable it
3687 entirely. */
3688 if (OPTION_SET_P (rs6000_alignment_flags)
3689 && rs6000_alignment_flags == MASK_ALIGN_POWER
3690 && DEFAULT_ABI == ABI_DARWIN
3691 && TARGET_64BIT)
3692 warning (0, "%qs is not supported for 64-bit Darwin;"
3693 " it is incompatible with the installed C and C++ libraries",
3694 "-malign-power");
3695
3696 /* Numerous experiment shows that IRA based loop pressure
3697 calculation works better for RTL loop invariant motion on targets
3698 with enough (>= 32) registers. It is an expensive optimization.
3699 So it is on only for peak performance. */
3700 if (optimize >= 3 && global_init_p
3701 && !OPTION_SET_P (flag_ira_loop_pressure))
3702 flag_ira_loop_pressure = 1;
3703
3704 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
3705 for tracebacks to be complete but not if any -fasynchronous-unwind-tables
3706 options were already specified. */
3707 if (flag_sanitize & SANITIZE_USER_ADDRESS
3708 && !OPTION_SET_P (flag_asynchronous_unwind_tables))
3709 flag_asynchronous_unwind_tables = 1;
3710
3711 /* -fvariable-expansion-in-unroller is a win for POWER whenever the
3712 loop unroller is active. It is only checked during unrolling, so
3713 we can just set it on by default. */
3714 if (!OPTION_SET_P (flag_variable_expansion_in_unroller))
3715 flag_variable_expansion_in_unroller = 1;
3716
3717 /* Set the pointer size. */
3718 if (TARGET_64BIT)
3719 {
3720 rs6000_pmode = DImode;
3721 rs6000_pointer_size = 64;
3722 }
3723 else
3724 {
3725 rs6000_pmode = SImode;
3726 rs6000_pointer_size = 32;
3727 }
3728
3729 /* Some OSs don't support saving the high part of 64-bit registers on context
3730 switch. Other OSs don't support saving Altivec registers. On those OSs,
3731 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3732 if the user wants either, the user must explicitly specify them and we
3733 won't interfere with the user's specification. */
3734
3735 set_masks = POWERPC_MASKS;
3736 #ifdef OS_MISSING_POWERPC64
3737 if (OS_MISSING_POWERPC64)
3738 set_masks &= ~OPTION_MASK_POWERPC64;
3739 #endif
3740 #ifdef OS_MISSING_ALTIVEC
3741 if (OS_MISSING_ALTIVEC)
3742 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX
3743 | OTHER_VSX_VECTOR_MASKS);
3744 #endif
3745
3746 /* Don't override by the processor default if given explicitly. */
3747 set_masks &= ~rs6000_isa_flags_explicit;
3748
3749 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3750 the cpu in a target attribute or pragma, but did not specify a tuning
3751 option, use the cpu for the tuning option rather than the option specified
3752 with -mtune on the command line. Process a '--with-cpu' configuration
3753 request as an implicit --cpu. */
3754 if (rs6000_cpu_index >= 0)
3755 cpu_index = rs6000_cpu_index;
3756 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3757 cpu_index = main_target_opt->x_rs6000_cpu_index;
3758 else if (OPTION_TARGET_CPU_DEFAULT)
3759 cpu_index = rs6000_cpu_name_lookup (OPTION_TARGET_CPU_DEFAULT);
3760
3761 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3762 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3763 with those from the cpu, except for options that were explicitly set. If
3764 we don't have a cpu, do not override the target bits set in
3765 TARGET_DEFAULT. */
3766 if (cpu_index >= 0)
3767 {
3768 rs6000_cpu_index = cpu_index;
3769 rs6000_isa_flags &= ~set_masks;
3770 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3771 & set_masks);
3772 }
3773 else
3774 {
3775 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3776 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3777 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3778 to using rs6000_isa_flags, we need to do the initialization here.
3779
3780 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
3781 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
3782 HOST_WIDE_INT flags;
3783 if (TARGET_DEFAULT)
3784 flags = TARGET_DEFAULT;
3785 else
3786 {
3787 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3788 const char *default_cpu = (!TARGET_POWERPC64
3789 ? "powerpc"
3790 : (BYTES_BIG_ENDIAN
3791 ? "powerpc64"
3792 : "powerpc64le"));
3793 int default_cpu_index = rs6000_cpu_name_lookup (default_cpu);
3794 flags = processor_target_table[default_cpu_index].target_enable;
3795 }
3796 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
3797 }
3798
3799 if (rs6000_tune_index >= 0)
3800 tune_index = rs6000_tune_index;
3801 else if (cpu_index >= 0)
3802 rs6000_tune_index = tune_index = cpu_index;
3803 else
3804 {
3805 size_t i;
3806 enum processor_type tune_proc
3807 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
3808
3809 tune_index = -1;
3810 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3811 if (processor_target_table[i].processor == tune_proc)
3812 {
3813 tune_index = i;
3814 break;
3815 }
3816 }
3817
3818 if (cpu_index >= 0)
3819 rs6000_cpu = processor_target_table[cpu_index].processor;
3820 else
3821 rs6000_cpu = TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT;
3822
3823 gcc_assert (tune_index >= 0);
3824 rs6000_tune = processor_target_table[tune_index].processor;
3825
3826 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
3827 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
3828 || rs6000_cpu == PROCESSOR_PPCE5500)
3829 {
3830 if (TARGET_ALTIVEC)
3831 error ("AltiVec not supported in this target");
3832 }
3833
3834 /* If we are optimizing big endian systems for space, use the load/store
3835 multiple instructions. */
3836 if (BYTES_BIG_ENDIAN && optimize_size)
3837 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE;
3838
3839 /* Don't allow -mmultiple on little endian systems unless the cpu is a 750,
3840 because the hardware doesn't support the instructions used in little
3841 endian mode, and causes an alignment trap. The 750 does not cause an
3842 alignment trap (except when the target is unaligned). */
3843
3844 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750 && TARGET_MULTIPLE)
3845 {
3846 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
3847 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
3848 warning (0, "%qs is not supported on little endian systems",
3849 "-mmultiple");
3850 }
3851
3852 /* If little-endian, default to -mstrict-align on older processors.
3853 Testing for direct_move matches power8 and later. */
3854 if (!BYTES_BIG_ENDIAN
3855 && !(processor_target_table[tune_index].target_enable
3856 & OPTION_MASK_DIRECT_MOVE))
3857 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
3858
3859 if (!rs6000_fold_gimple)
3860 fprintf (stderr,
3861 "gimple folding of rs6000 builtins has been disabled.\n");
3862
3863 /* Add some warnings for VSX. */
3864 if (TARGET_VSX)
3865 {
3866 const char *msg = NULL;
3867 if (!TARGET_HARD_FLOAT)
3868 {
3869 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3870 msg = N_("%<-mvsx%> requires hardware floating point");
3871 else
3872 {
3873 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3874 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3875 }
3876 }
3877 else if (TARGET_AVOID_XFORM > 0)
3878 msg = N_("%<-mvsx%> needs indexed addressing");
3879 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
3880 & OPTION_MASK_ALTIVEC))
3881 {
3882 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3883 msg = N_("%<-mvsx%> and %<-mno-altivec%> are incompatible");
3884 else
3885 msg = N_("%<-mno-altivec%> disables vsx");
3886 }
3887
3888 if (msg)
3889 {
3890 warning (0, msg);
3891 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3892 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3893 }
3894 }
3895
3896 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
3897 the -mcpu setting to enable options that conflict. */
3898 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
3899 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
3900 | OPTION_MASK_ALTIVEC
3901 | OPTION_MASK_VSX)) != 0)
3902 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
3903 | OPTION_MASK_DIRECT_MOVE)
3904 & ~rs6000_isa_flags_explicit);
3905
3906 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3907 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
3908
3909 #ifdef XCOFF_DEBUGGING_INFO
3910 /* For AIX default to 64-bit DWARF. */
3911 if (!OPTION_SET_P (dwarf_offset_size))
3912 dwarf_offset_size = POINTER_SIZE_UNITS;
3913 #endif
3914
3915 /* Handle explicit -mno-{altivec,vsx,power8-vector,power9-vector} and turn
3916 off all of the options that depend on those flags. */
3917 ignore_masks = rs6000_disable_incompatible_switches ();
3918
3919 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
3920 unless the user explicitly used the -mno-<option> to disable the code. */
3921 if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_MISC)
3922 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3923 else if (TARGET_P9_MINMAX)
3924 {
3925 if (cpu_index >= 0)
3926 {
3927 if (cpu_index == PROCESSOR_POWER9)
3928 {
3929 /* legacy behavior: allow -mcpu=power9 with certain
3930 capabilities explicitly disabled. */
3931 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3932 }
3933 else
3934 error ("power9 target option is incompatible with %<%s=<xxx>%> "
3935 "for <xxx> less than power9", "-mcpu");
3936 }
3937 else if ((ISA_3_0_MASKS_SERVER & rs6000_isa_flags_explicit)
3938 != (ISA_3_0_MASKS_SERVER & rs6000_isa_flags
3939 & rs6000_isa_flags_explicit))
3940 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
3941 were explicitly cleared. */
3942 error ("%qs incompatible with explicitly disabled options",
3943 "-mpower9-minmax");
3944 else
3945 rs6000_isa_flags |= ISA_3_0_MASKS_SERVER;
3946 }
3947 else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
3948 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~ignore_masks);
3949 else if (TARGET_VSX)
3950 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~ignore_masks);
3951 else if (TARGET_POPCNTD)
3952 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~ignore_masks);
3953 else if (TARGET_DFP)
3954 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks);
3955 else if (TARGET_CMPB)
3956 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks);
3957 else if (TARGET_FPRND)
3958 rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks);
3959 else if (TARGET_POPCNTB)
3960 rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks);
3961 else if (TARGET_ALTIVEC)
3962 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~ignore_masks);
3963
3964 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
3965 {
3966 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
3967 error ("%qs requires %qs", "-mcrypto", "-maltivec");
3968 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
3969 }
3970
3971 if (!TARGET_FPRND && TARGET_VSX)
3972 {
3973 if (rs6000_isa_flags_explicit & OPTION_MASK_FPRND)
3974 /* TARGET_VSX = 1 implies Power 7 and newer */
3975 error ("%qs requires %qs", "-mvsx", "-mfprnd");
3976 rs6000_isa_flags &= ~OPTION_MASK_FPRND;
3977 }
3978
3979 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
3980 {
3981 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
3982 error ("%qs requires %qs", "-mdirect-move", "-mvsx");
3983 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
3984 }
3985
3986 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
3987 {
3988 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3989 error ("%qs requires %qs", "-mpower8-vector", "-maltivec");
3990 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3991 }
3992
3993 if (TARGET_P8_VECTOR && !TARGET_VSX)
3994 {
3995 if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3996 && (rs6000_isa_flags_explicit & OPTION_MASK_VSX))
3997 error ("%qs requires %qs", "-mpower8-vector", "-mvsx");
3998 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) == 0)
3999 {
4000 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
4001 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
4002 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
4003 }
4004 else
4005 {
4006 /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is
4007 not explicit. */
4008 rs6000_isa_flags |= OPTION_MASK_VSX;
4009 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
4010 }
4011 }
4012
4013 if (TARGET_DFP && !TARGET_HARD_FLOAT)
4014 {
4015 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
4016 error ("%qs requires %qs", "-mhard-dfp", "-mhard-float");
4017 rs6000_isa_flags &= ~OPTION_MASK_DFP;
4018 }
4019
4020 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
4021 silently turn off quad memory mode. */
4022 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
4023 {
4024 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4025 warning (0, N_("%<-mquad-memory%> requires 64-bit mode"));
4026
4027 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
4028 warning (0, N_("%<-mquad-memory-atomic%> requires 64-bit mode"));
4029
4030 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
4031 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
4032 }
4033
4034 /* Non-atomic quad memory load/store are disabled for little endian, since
4035 the words are reversed, but atomic operations can still be done by
4036 swapping the words. */
4037 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
4038 {
4039 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4040 warning (0, N_("%<-mquad-memory%> is not available in little endian "
4041 "mode"));
4042
4043 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
4044 }
4045
4046 /* Assume if the user asked for normal quad memory instructions, they want
4047 the atomic versions as well, unless they explicity told us not to use quad
4048 word atomic instructions. */
4049 if (TARGET_QUAD_MEMORY
4050 && !TARGET_QUAD_MEMORY_ATOMIC
4051 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
4052 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
4053
4054 /* If we can shrink-wrap the TOC register save separately, then use
4055 -msave-toc-indirect unless explicitly disabled. */
4056 if ((rs6000_isa_flags_explicit & OPTION_MASK_SAVE_TOC_INDIRECT) == 0
4057 && flag_shrink_wrap_separate
4058 && optimize_function_for_speed_p (cfun))
4059 rs6000_isa_flags |= OPTION_MASK_SAVE_TOC_INDIRECT;
4060
4061 /* Enable power8 fusion if we are tuning for power8, even if we aren't
4062 generating power8 instructions. Power9 does not optimize power8 fusion
4063 cases. */
4064 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
4065 {
4066 if (processor_target_table[tune_index].processor == PROCESSOR_POWER8)
4067 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4068 else
4069 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
4070 }
4071
4072 /* Setting additional fusion flags turns on base fusion. */
4073 if (!TARGET_P8_FUSION && TARGET_P8_FUSION_SIGN)
4074 {
4075 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4076 {
4077 if (TARGET_P8_FUSION_SIGN)
4078 error ("%qs requires %qs", "-mpower8-fusion-sign",
4079 "-mpower8-fusion");
4080
4081 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
4082 }
4083 else
4084 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4085 }
4086
4087 /* Power8 does not fuse sign extended loads with the addis. If we are
4088 optimizing at high levels for speed, convert a sign extended load into a
4089 zero extending load, and an explicit sign extension. */
4090 if (TARGET_P8_FUSION
4091 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
4092 && optimize_function_for_speed_p (cfun)
4093 && optimize >= 3)
4094 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
4095
4096 /* ISA 3.0 vector instructions include ISA 2.07. */
4097 if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
4098 {
4099 /* We prefer to not mention undocumented options in
4100 error messages. However, if users have managed to select
4101 power9-vector without selecting power8-vector, they
4102 already know about undocumented flags. */
4103 if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) &&
4104 (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR))
4105 error ("%qs requires %qs", "-mpower9-vector", "-mpower8-vector");
4106 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) == 0)
4107 {
4108 rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR;
4109 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4110 rs6000_isa_flags_explicit |= OPTION_MASK_P9_VECTOR;
4111 }
4112 else
4113 {
4114 /* OPTION_MASK_P9_VECTOR is explicit and
4115 OPTION_MASK_P8_VECTOR is not explicit. */
4116 rs6000_isa_flags |= OPTION_MASK_P8_VECTOR;
4117 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
4118 }
4119 }
4120
4121 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4122 support. If we only have ISA 2.06 support, and the user did not specify
4123 the switch, leave it set to -1 so the movmisalign patterns are enabled,
4124 but we don't enable the full vectorization support */
4125 if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
4126 TARGET_ALLOW_MOVMISALIGN = 1;
4127
4128 else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
4129 {
4130 if (TARGET_ALLOW_MOVMISALIGN > 0
4131 && OPTION_SET_P (TARGET_ALLOW_MOVMISALIGN))
4132 error ("%qs requires %qs", "-mallow-movmisalign", "-mvsx");
4133
4134 TARGET_ALLOW_MOVMISALIGN = 0;
4135 }
4136
4137 /* Determine when unaligned vector accesses are permitted, and when
4138 they are preferred over masked Altivec loads. Note that if
4139 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4140 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
4141 not true. */
4142 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4143 {
4144 if (!TARGET_VSX)
4145 {
4146 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4147 error ("%qs requires %qs", "-mefficient-unaligned-vsx", "-mvsx");
4148
4149 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4150 }
4151
4152 else if (!TARGET_ALLOW_MOVMISALIGN)
4153 {
4154 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4155 error ("%qs requires %qs", "-munefficient-unaligned-vsx",
4156 "-mallow-movmisalign");
4157
4158 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4159 }
4160 }
4161
4162 if (!(rs6000_isa_flags_explicit & OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX))
4163 {
4164 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4165 rs6000_isa_flags |= OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX;
4166 else
4167 rs6000_isa_flags &= ~OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX;
4168 }
4169
4170 if (!(rs6000_isa_flags_explicit & OPTION_MASK_BLOCK_OPS_VECTOR_PAIR))
4171 {
4172 if (TARGET_MMA && TARGET_EFFICIENT_UNALIGNED_VSX)
4173 rs6000_isa_flags |= OPTION_MASK_BLOCK_OPS_VECTOR_PAIR;
4174 else
4175 rs6000_isa_flags &= ~OPTION_MASK_BLOCK_OPS_VECTOR_PAIR;
4176 }
4177
4178 /* Use long double size to select the appropriate long double. We use
4179 TYPE_PRECISION to differentiate the 3 different long double types. We map
4180 128 into the precision used for TFmode. */
4181 int default_long_double_size = (RS6000_DEFAULT_LONG_DOUBLE_SIZE == 64
4182 ? 64
4183 : FLOAT_PRECISION_TFmode);
4184
4185 /* Set long double size before the IEEE 128-bit tests. */
4186 if (!OPTION_SET_P (rs6000_long_double_type_size))
4187 {
4188 if (main_target_opt != NULL
4189 && (main_target_opt->x_rs6000_long_double_type_size
4190 != default_long_double_size))
4191 error ("target attribute or pragma changes %<long double%> size");
4192 else
4193 rs6000_long_double_type_size = default_long_double_size;
4194 }
4195 else if (rs6000_long_double_type_size == FLOAT_PRECISION_TFmode)
4196 ; /* The option value can be seen when cl_target_option_restore is called. */
4197 else if (rs6000_long_double_type_size == 128)
4198 rs6000_long_double_type_size = FLOAT_PRECISION_TFmode;
4199 else if (OPTION_SET_P (rs6000_ieeequad))
4200 {
4201 if (global_options.x_rs6000_ieeequad)
4202 error ("%qs requires %qs", "-mabi=ieeelongdouble", "-mlong-double-128");
4203 else
4204 error ("%qs requires %qs", "-mabi=ibmlongdouble", "-mlong-double-128");
4205 }
4206
4207 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
4208 systems will also set long double to be IEEE 128-bit. AIX and Darwin
4209 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
4210 those systems will not pick up this default. Warn if the user changes the
4211 default unless -Wno-psabi. */
4212 if (!OPTION_SET_P (rs6000_ieeequad))
4213 rs6000_ieeequad = TARGET_IEEEQUAD_DEFAULT;
4214
4215 else
4216 {
4217 if (global_options.x_rs6000_ieeequad
4218 && (!TARGET_POPCNTD || !TARGET_VSX))
4219 error ("%qs requires full ISA 2.06 support", "-mabi=ieeelongdouble");
4220
4221 if (rs6000_ieeequad != TARGET_IEEEQUAD_DEFAULT && TARGET_LONG_DOUBLE_128)
4222 {
4223 /* Determine if the user can change the default long double type at
4224 compilation time. You need GLIBC 2.32 or newer to be able to
4225 change the long double type. Only issue one warning. */
4226 static bool warned_change_long_double;
4227
4228 if (!warned_change_long_double && !glibc_supports_ieee_128bit ())
4229 {
4230 warned_change_long_double = true;
4231 if (TARGET_IEEEQUAD)
4232 warning (OPT_Wpsabi, "Using IEEE extended precision "
4233 "%<long double%>");
4234 else
4235 warning (OPT_Wpsabi, "Using IBM extended precision "
4236 "%<long double%>");
4237 }
4238 }
4239 }
4240
4241 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
4242 sytems. In GCC 7, we would enable the IEEE 128-bit floating point
4243 infrastructure (-mfloat128-type) but not enable the actual __float128 type
4244 unless the user used the explicit -mfloat128. In GCC 8, we enable both
4245 the keyword as well as the type. */
4246 TARGET_FLOAT128_TYPE = TARGET_FLOAT128_ENABLE_TYPE && TARGET_VSX;
4247
4248 /* IEEE 128-bit floating point requires VSX support. */
4249 if (TARGET_FLOAT128_KEYWORD)
4250 {
4251 if (!TARGET_VSX)
4252 {
4253 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
4254 error ("%qs requires VSX support", "-mfloat128");
4255
4256 TARGET_FLOAT128_TYPE = 0;
4257 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_KEYWORD
4258 | OPTION_MASK_FLOAT128_HW);
4259 }
4260 else if (!TARGET_FLOAT128_TYPE)
4261 {
4262 TARGET_FLOAT128_TYPE = 1;
4263 warning (0, "The %<-mfloat128%> option may not be fully supported");
4264 }
4265 }
4266
4267 /* Enable the __float128 keyword under Linux by default. */
4268 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_KEYWORD
4269 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) == 0)
4270 rs6000_isa_flags |= OPTION_MASK_FLOAT128_KEYWORD;
4271
4272 /* If we have are supporting the float128 type and full ISA 3.0 support,
4273 enable -mfloat128-hardware by default. However, don't enable the
4274 __float128 keyword if it was explicitly turned off. 64-bit mode is needed
4275 because sometimes the compiler wants to put things in an integer
4276 container, and if we don't have __int128 support, it is impossible. */
4277 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW && TARGET_64BIT
4278 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) == ISA_3_0_MASKS_IEEE
4279 && !(rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW))
4280 rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
4281
4282 if (TARGET_FLOAT128_HW
4283 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) != ISA_3_0_MASKS_IEEE)
4284 {
4285 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4286 error ("%qs requires full ISA 3.0 support", "%<-mfloat128-hardware%>");
4287
4288 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4289 }
4290
4291 if (TARGET_FLOAT128_HW && !TARGET_64BIT)
4292 {
4293 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4294 error ("%qs requires %qs", "%<-mfloat128-hardware%>", "-m64");
4295
4296 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4297 }
4298
4299 /* Enable -mprefixed by default on power10 systems. */
4300 if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED) == 0)
4301 rs6000_isa_flags |= OPTION_MASK_PREFIXED;
4302
4303 /* -mprefixed requires -mcpu=power10 (or later). */
4304 else if (TARGET_PREFIXED && !TARGET_POWER10)
4305 {
4306 if ((rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED) != 0)
4307 error ("%qs requires %qs", "-mprefixed", "-mcpu=power10");
4308
4309 rs6000_isa_flags &= ~OPTION_MASK_PREFIXED;
4310 }
4311
4312 /* -mpcrel requires prefixed load/store addressing. */
4313 if (TARGET_PCREL && !TARGET_PREFIXED)
4314 {
4315 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4316 error ("%qs requires %qs", "-mpcrel", "-mprefixed");
4317
4318 rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4319 }
4320
4321 /* Print the options after updating the defaults. */
4322 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4323 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
4324
4325 /* E500mc does "better" if we inline more aggressively. Respect the
4326 user's opinion, though. */
4327 if (rs6000_block_move_inline_limit == 0
4328 && (rs6000_tune == PROCESSOR_PPCE500MC
4329 || rs6000_tune == PROCESSOR_PPCE500MC64
4330 || rs6000_tune == PROCESSOR_PPCE5500
4331 || rs6000_tune == PROCESSOR_PPCE6500))
4332 rs6000_block_move_inline_limit = 128;
4333
4334 /* store_one_arg depends on expand_block_move to handle at least the
4335 size of reg_parm_stack_space. */
4336 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
4337 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
4338
4339 if (global_init_p)
4340 {
4341 /* If the appropriate debug option is enabled, replace the target hooks
4342 with debug versions that call the real version and then prints
4343 debugging information. */
4344 if (TARGET_DEBUG_COST)
4345 {
4346 targetm.rtx_costs = rs6000_debug_rtx_costs;
4347 targetm.address_cost = rs6000_debug_address_cost;
4348 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
4349 }
4350
4351 if (TARGET_DEBUG_ADDR)
4352 {
4353 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
4354 targetm.legitimize_address = rs6000_debug_legitimize_address;
4355 rs6000_secondary_reload_class_ptr
4356 = rs6000_debug_secondary_reload_class;
4357 targetm.secondary_memory_needed
4358 = rs6000_debug_secondary_memory_needed;
4359 targetm.can_change_mode_class
4360 = rs6000_debug_can_change_mode_class;
4361 rs6000_preferred_reload_class_ptr
4362 = rs6000_debug_preferred_reload_class;
4363 rs6000_mode_dependent_address_ptr
4364 = rs6000_debug_mode_dependent_address;
4365 }
4366
4367 if (rs6000_veclibabi_name)
4368 {
4369 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
4370 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
4371 else
4372 {
4373 error ("unknown vectorization library ABI type (%qs) for "
4374 "%qs switch", rs6000_veclibabi_name, "-mveclibabi=");
4375 ret = false;
4376 }
4377 }
4378 }
4379
4380 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
4381 target attribute or pragma which automatically enables both options,
4382 unless the altivec ABI was set. This is set by default for 64-bit, but
4383 not for 32-bit. */
4384 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4385 {
4386 TARGET_FLOAT128_TYPE = 0;
4387 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC
4388 | OPTION_MASK_FLOAT128_KEYWORD)
4389 & ~rs6000_isa_flags_explicit);
4390 }
4391
4392 /* Enable Altivec ABI for AIX -maltivec. */
4393 if (TARGET_XCOFF
4394 && (TARGET_ALTIVEC || TARGET_VSX)
4395 && !OPTION_SET_P (rs6000_altivec_abi))
4396 {
4397 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4398 error ("target attribute or pragma changes AltiVec ABI");
4399 else
4400 rs6000_altivec_abi = 1;
4401 }
4402
4403 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4404 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4405 be explicitly overridden in either case. */
4406 if (TARGET_ELF)
4407 {
4408 if (!OPTION_SET_P (rs6000_altivec_abi)
4409 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
4410 {
4411 if (main_target_opt != NULL &&
4412 !main_target_opt->x_rs6000_altivec_abi)
4413 error ("target attribute or pragma changes AltiVec ABI");
4414 else
4415 rs6000_altivec_abi = 1;
4416 }
4417 }
4418
4419 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4420 So far, the only darwin64 targets are also MACH-O. */
4421 if (TARGET_MACHO
4422 && DEFAULT_ABI == ABI_DARWIN
4423 && TARGET_64BIT)
4424 {
4425 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
4426 error ("target attribute or pragma changes darwin64 ABI");
4427 else
4428 {
4429 rs6000_darwin64_abi = 1;
4430 /* Default to natural alignment, for better performance. */
4431 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
4432 }
4433 }
4434
4435 /* Place FP constants in the constant pool instead of TOC
4436 if section anchors enabled. */
4437 if (flag_section_anchors
4438 && !OPTION_SET_P (TARGET_NO_FP_IN_TOC))
4439 TARGET_NO_FP_IN_TOC = 1;
4440
4441 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4442 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
4443
4444 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4445 SUBTARGET_OVERRIDE_OPTIONS;
4446 #endif
4447 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4448 SUBSUBTARGET_OVERRIDE_OPTIONS;
4449 #endif
4450 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4451 SUB3TARGET_OVERRIDE_OPTIONS;
4452 #endif
4453
4454 /* If the ABI has support for PC-relative relocations, enable it by default.
4455 This test depends on the sub-target tests above setting the code model to
4456 medium for ELF v2 systems. */
4457 if (PCREL_SUPPORTED_BY_OS
4458 && (rs6000_isa_flags_explicit & OPTION_MASK_PCREL) == 0)
4459 rs6000_isa_flags |= OPTION_MASK_PCREL;
4460
4461 /* -mpcrel requires -mcmodel=medium, but we can't check TARGET_CMODEL until
4462 after the subtarget override options are done. */
4463 else if (TARGET_PCREL && TARGET_CMODEL != CMODEL_MEDIUM)
4464 {
4465 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4466 error ("%qs requires %qs", "-mpcrel", "-mcmodel=medium");
4467
4468 rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4469 }
4470
4471 /* Enable -mmma by default on power10 systems. */
4472 if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_MMA) == 0)
4473 rs6000_isa_flags |= OPTION_MASK_MMA;
4474
4475 if (TARGET_POWER10
4476 && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION) == 0)
4477 rs6000_isa_flags |= OPTION_MASK_P10_FUSION;
4478
4479 if (TARGET_POWER10 &&
4480 (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_LD_CMPI) == 0)
4481 rs6000_isa_flags |= OPTION_MASK_P10_FUSION_LD_CMPI;
4482
4483 if (TARGET_POWER10
4484 && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_2LOGICAL) == 0)
4485 rs6000_isa_flags |= OPTION_MASK_P10_FUSION_2LOGICAL;
4486
4487 if (TARGET_POWER10
4488 && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_LOGADD) == 0)
4489 rs6000_isa_flags |= OPTION_MASK_P10_FUSION_LOGADD;
4490
4491 if (TARGET_POWER10
4492 && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_ADDLOG) == 0)
4493 rs6000_isa_flags |= OPTION_MASK_P10_FUSION_ADDLOG;
4494
4495 if (TARGET_POWER10
4496 && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_2ADD) == 0)
4497 rs6000_isa_flags |= OPTION_MASK_P10_FUSION_2ADD;
4498
4499 if (TARGET_POWER10
4500 && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_2STORE) == 0)
4501 rs6000_isa_flags |= OPTION_MASK_P10_FUSION_2STORE;
4502
4503 /* Turn off vector pair/mma options on non-power10 systems. */
4504 else if (!TARGET_POWER10 && TARGET_MMA)
4505 {
4506 if ((rs6000_isa_flags_explicit & OPTION_MASK_MMA) != 0)
4507 error ("%qs requires %qs", "-mmma", "-mcpu=power10");
4508
4509 rs6000_isa_flags &= ~OPTION_MASK_MMA;
4510 }
4511
4512 if (!TARGET_PCREL && TARGET_PCREL_OPT)
4513 rs6000_isa_flags &= ~OPTION_MASK_PCREL_OPT;
4514
4515 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4516 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
4517
4518 rs6000_always_hint = (rs6000_tune != PROCESSOR_POWER4
4519 && rs6000_tune != PROCESSOR_POWER5
4520 && rs6000_tune != PROCESSOR_POWER6
4521 && rs6000_tune != PROCESSOR_POWER7
4522 && rs6000_tune != PROCESSOR_POWER8
4523 && rs6000_tune != PROCESSOR_POWER9
4524 && rs6000_tune != PROCESSOR_POWER10
4525 && rs6000_tune != PROCESSOR_PPCA2
4526 && rs6000_tune != PROCESSOR_CELL
4527 && rs6000_tune != PROCESSOR_PPC476);
4528 rs6000_sched_groups = (rs6000_tune == PROCESSOR_POWER4
4529 || rs6000_tune == PROCESSOR_POWER5
4530 || rs6000_tune == PROCESSOR_POWER7
4531 || rs6000_tune == PROCESSOR_POWER8);
4532 rs6000_align_branch_targets = (rs6000_tune == PROCESSOR_POWER4
4533 || rs6000_tune == PROCESSOR_POWER5
4534 || rs6000_tune == PROCESSOR_POWER6
4535 || rs6000_tune == PROCESSOR_POWER7
4536 || rs6000_tune == PROCESSOR_POWER8
4537 || rs6000_tune == PROCESSOR_POWER9
4538 || rs6000_tune == PROCESSOR_POWER10
4539 || rs6000_tune == PROCESSOR_PPCE500MC
4540 || rs6000_tune == PROCESSOR_PPCE500MC64
4541 || rs6000_tune == PROCESSOR_PPCE5500
4542 || rs6000_tune == PROCESSOR_PPCE6500);
4543
4544 /* Allow debug switches to override the above settings. These are set to -1
4545 in rs6000.opt to indicate the user hasn't directly set the switch. */
4546 if (TARGET_ALWAYS_HINT >= 0)
4547 rs6000_always_hint = TARGET_ALWAYS_HINT;
4548
4549 if (TARGET_SCHED_GROUPS >= 0)
4550 rs6000_sched_groups = TARGET_SCHED_GROUPS;
4551
4552 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
4553 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
4554
4555 rs6000_sched_restricted_insns_priority
4556 = (rs6000_sched_groups ? 1 : 0);
4557
4558 /* Handle -msched-costly-dep option. */
4559 rs6000_sched_costly_dep
4560 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
4561
4562 if (rs6000_sched_costly_dep_str)
4563 {
4564 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
4565 rs6000_sched_costly_dep = no_dep_costly;
4566 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
4567 rs6000_sched_costly_dep = all_deps_costly;
4568 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
4569 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
4570 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
4571 rs6000_sched_costly_dep = store_to_load_dep_costly;
4572 else
4573 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
4574 atoi (rs6000_sched_costly_dep_str));
4575 }
4576
4577 /* Handle -minsert-sched-nops option. */
4578 rs6000_sched_insert_nops
4579 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
4580
4581 if (rs6000_sched_insert_nops_str)
4582 {
4583 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
4584 rs6000_sched_insert_nops = sched_finish_none;
4585 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
4586 rs6000_sched_insert_nops = sched_finish_pad_groups;
4587 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
4588 rs6000_sched_insert_nops = sched_finish_regroup_exact;
4589 else
4590 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
4591 atoi (rs6000_sched_insert_nops_str));
4592 }
4593
4594 /* Handle stack protector */
4595 if (!OPTION_SET_P (rs6000_stack_protector_guard))
4596 #ifdef TARGET_THREAD_SSP_OFFSET
4597 rs6000_stack_protector_guard = SSP_TLS;
4598 #else
4599 rs6000_stack_protector_guard = SSP_GLOBAL;
4600 #endif
4601
4602 #ifdef TARGET_THREAD_SSP_OFFSET
4603 rs6000_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET;
4604 rs6000_stack_protector_guard_reg = TARGET_64BIT ? 13 : 2;
4605 #endif
4606
4607 if (OPTION_SET_P (rs6000_stack_protector_guard_offset_str))
4608 {
4609 char *endp;
4610 const char *str = rs6000_stack_protector_guard_offset_str;
4611
4612 errno = 0;
4613 long offset = strtol (str, &endp, 0);
4614 if (!*str || *endp || errno)
4615 error ("%qs is not a valid number in %qs", str,
4616 "-mstack-protector-guard-offset=");
4617
4618 if (!IN_RANGE (offset, -0x8000, 0x7fff)
4619 || (TARGET_64BIT && (offset & 3)))
4620 error ("%qs is not a valid offset in %qs", str,
4621 "-mstack-protector-guard-offset=");
4622
4623 rs6000_stack_protector_guard_offset = offset;
4624 }
4625
4626 if (OPTION_SET_P (rs6000_stack_protector_guard_reg_str))
4627 {
4628 const char *str = rs6000_stack_protector_guard_reg_str;
4629 int reg = decode_reg_name (str);
4630
4631 if (!IN_RANGE (reg, 1, 31))
4632 error ("%qs is not a valid base register in %qs", str,
4633 "-mstack-protector-guard-reg=");
4634
4635 rs6000_stack_protector_guard_reg = reg;
4636 }
4637
4638 if (rs6000_stack_protector_guard == SSP_TLS
4639 && !IN_RANGE (rs6000_stack_protector_guard_reg, 1, 31))
4640 error ("%qs needs a valid base register", "-mstack-protector-guard=tls");
4641
4642 if (global_init_p)
4643 {
4644 #ifdef TARGET_REGNAMES
4645 /* If the user desires alternate register names, copy in the
4646 alternate names now. */
4647 if (TARGET_REGNAMES)
4648 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
4649 #endif
4650
4651 /* Set aix_struct_return last, after the ABI is determined.
4652 If -maix-struct-return or -msvr4-struct-return was explicitly
4653 used, don't override with the ABI default. */
4654 if (!OPTION_SET_P (aix_struct_return))
4655 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
4656
4657 #if 0
4658 /* IBM XL compiler defaults to unsigned bitfields. */
4659 if (TARGET_XL_COMPAT)
4660 flag_signed_bitfields = 0;
4661 #endif
4662
4663 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
4664 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
4665
4666 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
4667
4668 /* We can only guarantee the availability of DI pseudo-ops when
4669 assembling for 64-bit targets. */
4670 if (!TARGET_64BIT)
4671 {
4672 targetm.asm_out.aligned_op.di = NULL;
4673 targetm.asm_out.unaligned_op.di = NULL;
4674 }
4675
4676
4677 /* Set branch target alignment, if not optimizing for size. */
4678 if (!optimize_size)
4679 {
4680 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4681 aligned 8byte to avoid misprediction by the branch predictor. */
4682 if (rs6000_tune == PROCESSOR_TITAN
4683 || rs6000_tune == PROCESSOR_CELL)
4684 {
4685 if (flag_align_functions && !str_align_functions)
4686 str_align_functions = "8";
4687 if (flag_align_jumps && !str_align_jumps)
4688 str_align_jumps = "8";
4689 if (flag_align_loops && !str_align_loops)
4690 str_align_loops = "8";
4691 }
4692 if (rs6000_align_branch_targets)
4693 {
4694 if (flag_align_functions && !str_align_functions)
4695 str_align_functions = "16";
4696 if (flag_align_jumps && !str_align_jumps)
4697 str_align_jumps = "16";
4698 if (flag_align_loops && !str_align_loops)
4699 {
4700 can_override_loop_align = 1;
4701 str_align_loops = "16";
4702 }
4703 }
4704 }
4705
4706 /* Arrange to save and restore machine status around nested functions. */
4707 init_machine_status = rs6000_init_machine_status;
4708
4709 /* We should always be splitting complex arguments, but we can't break
4710 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
4711 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
4712 targetm.calls.split_complex_arg = NULL;
4713
4714 /* The AIX and ELFv1 ABIs define standard function descriptors. */
4715 if (DEFAULT_ABI == ABI_AIX)
4716 targetm.calls.custom_function_descriptors = 0;
4717 }
4718
4719 /* Initialize rs6000_cost with the appropriate target costs. */
4720 if (optimize_size)
4721 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
4722 else
4723 switch (rs6000_tune)
4724 {
4725 case PROCESSOR_RS64A:
4726 rs6000_cost = &rs64a_cost;
4727 break;
4728
4729 case PROCESSOR_MPCCORE:
4730 rs6000_cost = &mpccore_cost;
4731 break;
4732
4733 case PROCESSOR_PPC403:
4734 rs6000_cost = &ppc403_cost;
4735 break;
4736
4737 case PROCESSOR_PPC405:
4738 rs6000_cost = &ppc405_cost;
4739 break;
4740
4741 case PROCESSOR_PPC440:
4742 rs6000_cost = &ppc440_cost;
4743 break;
4744
4745 case PROCESSOR_PPC476:
4746 rs6000_cost = &ppc476_cost;
4747 break;
4748
4749 case PROCESSOR_PPC601:
4750 rs6000_cost = &ppc601_cost;
4751 break;
4752
4753 case PROCESSOR_PPC603:
4754 rs6000_cost = &ppc603_cost;
4755 break;
4756
4757 case PROCESSOR_PPC604:
4758 rs6000_cost = &ppc604_cost;
4759 break;
4760
4761 case PROCESSOR_PPC604e:
4762 rs6000_cost = &ppc604e_cost;
4763 break;
4764
4765 case PROCESSOR_PPC620:
4766 rs6000_cost = &ppc620_cost;
4767 break;
4768
4769 case PROCESSOR_PPC630:
4770 rs6000_cost = &ppc630_cost;
4771 break;
4772
4773 case PROCESSOR_CELL:
4774 rs6000_cost = &ppccell_cost;
4775 break;
4776
4777 case PROCESSOR_PPC750:
4778 case PROCESSOR_PPC7400:
4779 rs6000_cost = &ppc750_cost;
4780 break;
4781
4782 case PROCESSOR_PPC7450:
4783 rs6000_cost = &ppc7450_cost;
4784 break;
4785
4786 case PROCESSOR_PPC8540:
4787 case PROCESSOR_PPC8548:
4788 rs6000_cost = &ppc8540_cost;
4789 break;
4790
4791 case PROCESSOR_PPCE300C2:
4792 case PROCESSOR_PPCE300C3:
4793 rs6000_cost = &ppce300c2c3_cost;
4794 break;
4795
4796 case PROCESSOR_PPCE500MC:
4797 rs6000_cost = &ppce500mc_cost;
4798 break;
4799
4800 case PROCESSOR_PPCE500MC64:
4801 rs6000_cost = &ppce500mc64_cost;
4802 break;
4803
4804 case PROCESSOR_PPCE5500:
4805 rs6000_cost = &ppce5500_cost;
4806 break;
4807
4808 case PROCESSOR_PPCE6500:
4809 rs6000_cost = &ppce6500_cost;
4810 break;
4811
4812 case PROCESSOR_TITAN:
4813 rs6000_cost = &titan_cost;
4814 break;
4815
4816 case PROCESSOR_POWER4:
4817 case PROCESSOR_POWER5:
4818 rs6000_cost = &power4_cost;
4819 break;
4820
4821 case PROCESSOR_POWER6:
4822 rs6000_cost = &power6_cost;
4823 break;
4824
4825 case PROCESSOR_POWER7:
4826 rs6000_cost = &power7_cost;
4827 break;
4828
4829 case PROCESSOR_POWER8:
4830 rs6000_cost = &power8_cost;
4831 break;
4832
4833 case PROCESSOR_POWER9:
4834 rs6000_cost = &power9_cost;
4835 break;
4836
4837 case PROCESSOR_POWER10:
4838 rs6000_cost = &power10_cost;
4839 break;
4840
4841 case PROCESSOR_PPCA2:
4842 rs6000_cost = &ppca2_cost;
4843 break;
4844
4845 default:
4846 gcc_unreachable ();
4847 }
4848
4849 if (global_init_p)
4850 {
4851 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4852 param_simultaneous_prefetches,
4853 rs6000_cost->simultaneous_prefetches);
4854 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4855 param_l1_cache_size,
4856 rs6000_cost->l1_cache_size);
4857 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4858 param_l1_cache_line_size,
4859 rs6000_cost->cache_line_size);
4860 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4861 param_l2_cache_size,
4862 rs6000_cost->l2_cache_size);
4863
4864 /* Increase loop peeling limits based on performance analysis. */
4865 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4866 param_max_peeled_insns, 400);
4867 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4868 param_max_completely_peeled_insns, 400);
4869
4870 /* The lxvl/stxvl instructions don't perform well before Power10. */
4871 if (TARGET_POWER10)
4872 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4873 param_vect_partial_vector_usage, 1);
4874 else
4875 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4876 param_vect_partial_vector_usage, 0);
4877
4878 /* Use the 'model' -fsched-pressure algorithm by default. */
4879 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4880 param_sched_pressure_algorithm,
4881 SCHED_PRESSURE_MODEL);
4882
4883 /* If using typedef char *va_list, signal that
4884 __builtin_va_start (&ap, 0) can be optimized to
4885 ap = __builtin_next_arg (0). */
4886 if (DEFAULT_ABI != ABI_V4)
4887 targetm.expand_builtin_va_start = NULL;
4888 }
4889
4890 rs6000_override_options_after_change ();
4891
4892 /* If not explicitly specified via option, decide whether to generate indexed
4893 load/store instructions. A value of -1 indicates that the
4894 initial value of this variable has not been overwritten. During
4895 compilation, TARGET_AVOID_XFORM is either 0 or 1. */
4896 if (TARGET_AVOID_XFORM == -1)
4897 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4898 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4899 need indexed accesses and the type used is the scalar type of the element
4900 being loaded or stored. */
4901 TARGET_AVOID_XFORM = (rs6000_tune == PROCESSOR_POWER6 && TARGET_CMPB
4902 && !TARGET_ALTIVEC);
4903
4904 /* Set the -mrecip options. */
4905 if (rs6000_recip_name)
4906 {
4907 char *p = ASTRDUP (rs6000_recip_name);
4908 char *q;
4909 unsigned int mask, i;
4910 bool invert;
4911
4912 while ((q = strtok (p, ",")) != NULL)
4913 {
4914 p = NULL;
4915 if (*q == '!')
4916 {
4917 invert = true;
4918 q++;
4919 }
4920 else
4921 invert = false;
4922
4923 if (!strcmp (q, "default"))
4924 mask = ((TARGET_RECIP_PRECISION)
4925 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
4926 else
4927 {
4928 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4929 if (!strcmp (q, recip_options[i].string))
4930 {
4931 mask = recip_options[i].mask;
4932 break;
4933 }
4934
4935 if (i == ARRAY_SIZE (recip_options))
4936 {
4937 error ("unknown option for %<%s=%s%>", "-mrecip", q);
4938 invert = false;
4939 mask = 0;
4940 ret = false;
4941 }
4942 }
4943
4944 if (invert)
4945 rs6000_recip_control &= ~mask;
4946 else
4947 rs6000_recip_control |= mask;
4948 }
4949 }
4950
4951 /* Set the builtin mask of the various options used that could affect which
4952 builtins were used. In the past we used target_flags, but we've run out
4953 of bits, and some options are no longer in target_flags. */
4954 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
4955 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
4956 rs6000_print_builtin_options (stderr, 0, "builtin mask",
4957 rs6000_builtin_mask);
4958
4959 /* Initialize all of the registers. */
4960 rs6000_init_hard_regno_mode_ok (global_init_p);
4961
4962 /* Save the initial options in case the user does function specific options */
4963 if (global_init_p)
4964 target_option_default_node = target_option_current_node
4965 = build_target_option_node (&global_options, &global_options_set);
4966
4967 /* If not explicitly specified via option, decide whether to generate the
4968 extra blr's required to preserve the link stack on some cpus (eg, 476). */
4969 if (TARGET_LINK_STACK == -1)
4970 SET_TARGET_LINK_STACK (rs6000_tune == PROCESSOR_PPC476 && flag_pic);
4971
4972 /* Deprecate use of -mno-speculate-indirect-jumps. */
4973 if (!rs6000_speculate_indirect_jumps)
4974 warning (0, "%qs is deprecated and not recommended in any circumstances",
4975 "-mno-speculate-indirect-jumps");
4976
4977 return ret;
4978 }
4979
4980 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
4981 define the target cpu type. */
4982
4983 static void
4984 rs6000_option_override (void)
4985 {
4986 (void) rs6000_option_override_internal (true);
4987 }
4988
4989 \f
4990 /* Implement targetm.vectorize.builtin_mask_for_load. */
4991 static tree
4992 rs6000_builtin_mask_for_load (void)
4993 {
4994 /* Don't use lvsl/vperm for P8 and similarly efficient machines. */
4995 if ((TARGET_ALTIVEC && !TARGET_VSX)
4996 || (TARGET_VSX && !TARGET_EFFICIENT_UNALIGNED_VSX))
4997 return altivec_builtin_mask_for_load;
4998 else
4999 return 0;
5000 }
5001
5002 /* Implement LOOP_ALIGN. */
5003 align_flags
5004 rs6000_loop_align (rtx label)
5005 {
5006 basic_block bb;
5007 int ninsns;
5008
5009 /* Don't override loop alignment if -falign-loops was specified. */
5010 if (!can_override_loop_align)
5011 return align_loops;
5012
5013 bb = BLOCK_FOR_INSN (label);
5014 ninsns = num_loop_insns(bb->loop_father);
5015
5016 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
5017 if (ninsns > 4 && ninsns <= 8
5018 && (rs6000_tune == PROCESSOR_POWER4
5019 || rs6000_tune == PROCESSOR_POWER5
5020 || rs6000_tune == PROCESSOR_POWER6
5021 || rs6000_tune == PROCESSOR_POWER7
5022 || rs6000_tune == PROCESSOR_POWER8))
5023 return align_flags (5);
5024 else
5025 return align_loops;
5026 }
5027
5028 /* Return true iff, data reference of TYPE can reach vector alignment (16)
5029 after applying N number of iterations. This routine does not determine
5030 how may iterations are required to reach desired alignment. */
5031
5032 static bool
5033 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
5034 {
5035 if (is_packed)
5036 return false;
5037
5038 if (TARGET_32BIT)
5039 {
5040 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
5041 return true;
5042
5043 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
5044 return true;
5045
5046 return false;
5047 }
5048 else
5049 {
5050 if (TARGET_MACHO)
5051 return false;
5052
5053 /* Assuming that all other types are naturally aligned. CHECKME! */
5054 return true;
5055 }
5056 }
5057
5058 /* Return true if the vector misalignment factor is supported by the
5059 target. */
5060 static bool
5061 rs6000_builtin_support_vector_misalignment (machine_mode mode,
5062 const_tree type,
5063 int misalignment,
5064 bool is_packed)
5065 {
5066 if (TARGET_VSX)
5067 {
5068 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5069 return true;
5070
5071 /* Return if movmisalign pattern is not supported for this mode. */
5072 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
5073 return false;
5074
5075 if (misalignment == -1)
5076 {
5077 /* Misalignment factor is unknown at compile time but we know
5078 it's word aligned. */
5079 if (rs6000_vector_alignment_reachable (type, is_packed))
5080 {
5081 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
5082
5083 if (element_size == 64 || element_size == 32)
5084 return true;
5085 }
5086
5087 return false;
5088 }
5089
5090 /* VSX supports word-aligned vector. */
5091 if (misalignment % 4 == 0)
5092 return true;
5093 }
5094 return false;
5095 }
5096
5097 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5098 static int
5099 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
5100 tree vectype, int misalign)
5101 {
5102 unsigned elements;
5103 tree elem_type;
5104
5105 switch (type_of_cost)
5106 {
5107 case scalar_stmt:
5108 case scalar_store:
5109 case vector_stmt:
5110 case vector_store:
5111 case vec_to_scalar:
5112 case scalar_to_vec:
5113 case cond_branch_not_taken:
5114 return 1;
5115 case scalar_load:
5116 case vector_load:
5117 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5118 return 2;
5119
5120 case vec_perm:
5121 /* Power7 has only one permute unit, make it a bit expensive. */
5122 if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7)
5123 return 3;
5124 else
5125 return 1;
5126
5127 case vec_promote_demote:
5128 /* Power7 has only one permute/pack unit, make it a bit expensive. */
5129 if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7)
5130 return 4;
5131 else
5132 return 1;
5133
5134 case cond_branch_taken:
5135 return 3;
5136
5137 case unaligned_load:
5138 case vector_gather_load:
5139 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5140 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5141 return 2;
5142
5143 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5144 {
5145 elements = TYPE_VECTOR_SUBPARTS (vectype);
5146 /* See PR102767, consider V1TI to keep consistency. */
5147 if (elements == 2 || elements == 1)
5148 /* Double word aligned. */
5149 return 4;
5150
5151 if (elements == 4)
5152 {
5153 switch (misalign)
5154 {
5155 case 8:
5156 /* Double word aligned. */
5157 return 4;
5158
5159 case -1:
5160 /* Unknown misalignment. */
5161 case 4:
5162 case 12:
5163 /* Word aligned. */
5164 return 33;
5165
5166 default:
5167 gcc_unreachable ();
5168 }
5169 }
5170 }
5171
5172 if (TARGET_ALTIVEC)
5173 /* Misaligned loads are not supported. */
5174 gcc_unreachable ();
5175
5176 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5177 return 4;
5178
5179 case unaligned_store:
5180 case vector_scatter_store:
5181 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5182 return 1;
5183
5184 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5185 {
5186 elements = TYPE_VECTOR_SUBPARTS (vectype);
5187 /* See PR102767, consider V1TI to keep consistency. */
5188 if (elements == 2 || elements == 1)
5189 /* Double word aligned. */
5190 return 2;
5191
5192 if (elements == 4)
5193 {
5194 switch (misalign)
5195 {
5196 case 8:
5197 /* Double word aligned. */
5198 return 2;
5199
5200 case -1:
5201 /* Unknown misalignment. */
5202 case 4:
5203 case 12:
5204 /* Word aligned. */
5205 return 23;
5206
5207 default:
5208 gcc_unreachable ();
5209 }
5210 }
5211 }
5212
5213 if (TARGET_ALTIVEC)
5214 /* Misaligned stores are not supported. */
5215 gcc_unreachable ();
5216
5217 return 2;
5218
5219 case vec_construct:
5220 /* This is a rough approximation assuming non-constant elements
5221 constructed into a vector via element insertion. FIXME:
5222 vec_construct is not granular enough for uniformly good
5223 decisions. If the initialization is a splat, this is
5224 cheaper than we estimate. Improve this someday. */
5225 elem_type = TREE_TYPE (vectype);
5226 /* 32-bit vectors loaded into registers are stored as double
5227 precision, so we need 2 permutes, 2 converts, and 1 merge
5228 to construct a vector of short floats from them. */
5229 if (SCALAR_FLOAT_TYPE_P (elem_type)
5230 && TYPE_PRECISION (elem_type) == 32)
5231 return 5;
5232 /* On POWER9, integer vector types are built up in GPRs and then
5233 use a direct move (2 cycles). For POWER8 this is even worse,
5234 as we need two direct moves and a merge, and the direct moves
5235 are five cycles. */
5236 else if (INTEGRAL_TYPE_P (elem_type))
5237 {
5238 if (TARGET_P9_VECTOR)
5239 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 2;
5240 else
5241 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 5;
5242 }
5243 else
5244 /* V2DFmode doesn't need a direct move. */
5245 return 2;
5246
5247 default:
5248 gcc_unreachable ();
5249 }
5250 }
5251
5252 /* Implement targetm.vectorize.preferred_simd_mode. */
5253
5254 static machine_mode
5255 rs6000_preferred_simd_mode (scalar_mode mode)
5256 {
5257 opt_machine_mode vmode = mode_for_vector (mode, 16 / GET_MODE_SIZE (mode));
5258
5259 if (vmode.exists () && !VECTOR_MEM_NONE_P (vmode.require ()))
5260 return vmode.require ();
5261
5262 return word_mode;
5263 }
5264
5265 class rs6000_cost_data : public vector_costs
5266 {
5267 public:
5268 using vector_costs::vector_costs;
5269
5270 unsigned int add_stmt_cost (int count, vect_cost_for_stmt kind,
5271 stmt_vec_info stmt_info, tree vectype,
5272 int misalign,
5273 vect_cost_model_location where) override;
5274 void finish_cost (const vector_costs *) override;
5275
5276 protected:
5277 void update_target_cost_per_stmt (vect_cost_for_stmt, stmt_vec_info,
5278 vect_cost_model_location, unsigned int);
5279 void density_test (loop_vec_info);
5280 void adjust_vect_cost_per_loop (loop_vec_info);
5281
5282 /* Total number of vectorized stmts (loop only). */
5283 unsigned m_nstmts = 0;
5284 /* Total number of loads (loop only). */
5285 unsigned m_nloads = 0;
5286 /* Possible extra penalized cost on vector construction (loop only). */
5287 unsigned m_extra_ctor_cost = 0;
5288 /* For each vectorized loop, this var holds TRUE iff a non-memory vector
5289 instruction is needed by the vectorization. */
5290 bool m_vect_nonmem = false;
5291 };
5292
5293 /* Test for likely overcommitment of vector hardware resources. If a
5294 loop iteration is relatively large, and too large a percentage of
5295 instructions in the loop are vectorized, the cost model may not
5296 adequately reflect delays from unavailable vector resources.
5297 Penalize the loop body cost for this case. */
5298
5299 void
5300 rs6000_cost_data::density_test (loop_vec_info loop_vinfo)
5301 {
5302 /* This density test only cares about the cost of vector version of the
5303 loop, so immediately return if we are passed costing for the scalar
5304 version (namely computing single scalar iteration cost). */
5305 if (m_costing_for_scalar)
5306 return;
5307
5308 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5309 basic_block *bbs = get_loop_body (loop);
5310 int nbbs = loop->num_nodes;
5311 int vec_cost = m_costs[vect_body], not_vec_cost = 0;
5312
5313 for (int i = 0; i < nbbs; i++)
5314 {
5315 basic_block bb = bbs[i];
5316 gimple_stmt_iterator gsi;
5317
5318 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5319 {
5320 gimple *stmt = gsi_stmt (gsi);
5321 if (is_gimple_debug (stmt))
5322 continue;
5323
5324 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
5325
5326 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5327 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
5328 not_vec_cost++;
5329 }
5330 }
5331
5332 free (bbs);
5333 int density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
5334
5335 if (density_pct > rs6000_density_pct_threshold
5336 && vec_cost + not_vec_cost > rs6000_density_size_threshold)
5337 {
5338 m_costs[vect_body] = vec_cost * (100 + rs6000_density_penalty) / 100;
5339 if (dump_enabled_p ())
5340 dump_printf_loc (MSG_NOTE, vect_location,
5341 "density %d%%, cost %d exceeds threshold, penalizing "
5342 "loop body cost by %u%%\n", density_pct,
5343 vec_cost + not_vec_cost, rs6000_density_penalty);
5344 }
5345
5346 /* Check whether we need to penalize the body cost to account
5347 for excess strided or elementwise loads. */
5348 if (m_extra_ctor_cost > 0)
5349 {
5350 gcc_assert (m_nloads <= m_nstmts);
5351 unsigned int load_pct = (m_nloads * 100) / m_nstmts;
5352
5353 /* It's likely to be bounded by latency and execution resources
5354 from many scalar loads which are strided or elementwise loads
5355 into a vector if both conditions below are found:
5356 1. there are many loads, it's easy to result in a long wait
5357 for load units;
5358 2. load has a big proportion of all vectorized statements,
5359 it's not easy to schedule other statements to spread among
5360 the loads.
5361 One typical case is the innermost loop of the hotspot of SPEC2017
5362 503.bwaves_r without loop interchange. */
5363 if (m_nloads > (unsigned int) rs6000_density_load_num_threshold
5364 && load_pct > (unsigned int) rs6000_density_load_pct_threshold)
5365 {
5366 m_costs[vect_body] += m_extra_ctor_cost;
5367 if (dump_enabled_p ())
5368 dump_printf_loc (MSG_NOTE, vect_location,
5369 "Found %u loads and "
5370 "load pct. %u%% exceed "
5371 "the threshold, "
5372 "penalizing loop body "
5373 "cost by extra cost %u "
5374 "for ctor.\n",
5375 m_nloads, load_pct,
5376 m_extra_ctor_cost);
5377 }
5378 }
5379 }
5380
5381 /* Implement targetm.vectorize.create_costs. */
5382
5383 static vector_costs *
5384 rs6000_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar)
5385 {
5386 return new rs6000_cost_data (vinfo, costing_for_scalar);
5387 }
5388
5389 /* Adjust vectorization cost after calling rs6000_builtin_vectorization_cost.
5390 For some statement, we would like to further fine-grain tweak the cost on
5391 top of rs6000_builtin_vectorization_cost handling which doesn't have any
5392 information on statement operation codes etc. One typical case here is
5393 COND_EXPR, it takes the same cost to simple FXU instruction when evaluating
5394 for scalar cost, but it should be priced more whatever transformed to either
5395 compare + branch or compare + isel instructions. */
5396
5397 static unsigned
5398 rs6000_adjust_vect_cost_per_stmt (enum vect_cost_for_stmt kind,
5399 struct _stmt_vec_info *stmt_info)
5400 {
5401 if (kind == scalar_stmt && stmt_info && stmt_info->stmt
5402 && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
5403 {
5404 tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
5405 if (subcode == COND_EXPR)
5406 return 2;
5407 }
5408
5409 return 0;
5410 }
5411
5412 /* Helper function for add_stmt_cost. Check each statement cost
5413 entry, gather information and update the target_cost fields
5414 accordingly. */
5415 void
5416 rs6000_cost_data::update_target_cost_per_stmt (vect_cost_for_stmt kind,
5417 stmt_vec_info stmt_info,
5418 vect_cost_model_location where,
5419 unsigned int orig_count)
5420 {
5421
5422 /* Check whether we're doing something other than just a copy loop.
5423 Not all such loops may be profitably vectorized; see
5424 rs6000_finish_cost. */
5425 if (kind == vec_to_scalar
5426 || kind == vec_perm
5427 || kind == vec_promote_demote
5428 || kind == vec_construct
5429 || kind == scalar_to_vec
5430 || (where == vect_body && kind == vector_stmt))
5431 m_vect_nonmem = true;
5432
5433 /* Gather some information when we are costing the vectorized instruction
5434 for the statements located in a loop body. */
5435 if (!m_costing_for_scalar
5436 && is_a<loop_vec_info> (m_vinfo)
5437 && where == vect_body)
5438 {
5439 m_nstmts += orig_count;
5440
5441 if (kind == scalar_load || kind == vector_load
5442 || kind == unaligned_load || kind == vector_gather_load)
5443 m_nloads += orig_count;
5444
5445 /* Power processors do not currently have instructions for strided
5446 and elementwise loads, and instead we must generate multiple
5447 scalar loads. This leads to undercounting of the cost. We
5448 account for this by scaling the construction cost by the number
5449 of elements involved, and saving this as extra cost that we may
5450 or may not need to apply. When finalizing the cost of the loop,
5451 the extra penalty is applied when the load density heuristics
5452 are satisfied. */
5453 if (kind == vec_construct && stmt_info
5454 && STMT_VINFO_TYPE (stmt_info) == load_vec_info_type
5455 && (STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_ELEMENTWISE
5456 || STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_STRIDED_SLP))
5457 {
5458 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5459 unsigned int nunits = vect_nunits_for_cost (vectype);
5460 /* We don't expect strided/elementwise loads for just 1 nunit. */
5461 gcc_assert (nunits > 1);
5462 /* i386 port adopts nunits * stmt_cost as the penalized cost
5463 for this kind of penalization, we used to follow it but
5464 found it could result in an unreliable body cost especially
5465 for V16QI/V8HI modes. To make it better, we choose this
5466 new heuristic: for each scalar load, we use 2 as penalized
5467 cost for the case with 2 nunits and use 1 for the other
5468 cases. It's without much supporting theory, mainly
5469 concluded from the broad performance evaluations on Power8,
5470 Power9 and Power10. One possibly related point is that:
5471 vector construction for more units would use more insns,
5472 it has more chances to schedule them better (even run in
5473 parallelly when enough available units at that time), so
5474 it seems reasonable not to penalize that much for them. */
5475 unsigned int adjusted_cost = (nunits == 2) ? 2 : 1;
5476 unsigned int extra_cost = nunits * adjusted_cost;
5477 m_extra_ctor_cost += extra_cost;
5478 }
5479 }
5480 }
5481
5482 unsigned
5483 rs6000_cost_data::add_stmt_cost (int count, vect_cost_for_stmt kind,
5484 stmt_vec_info stmt_info, tree vectype,
5485 int misalign, vect_cost_model_location where)
5486 {
5487 unsigned retval = 0;
5488
5489 if (flag_vect_cost_model)
5490 {
5491 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
5492 misalign);
5493 stmt_cost += rs6000_adjust_vect_cost_per_stmt (kind, stmt_info);
5494 /* Statements in an inner loop relative to the loop being
5495 vectorized are weighted more heavily. The value here is
5496 arbitrary and could potentially be improved with analysis. */
5497 unsigned int orig_count = count;
5498 retval = adjust_cost_for_freq (stmt_info, where, count * stmt_cost);
5499 m_costs[where] += retval;
5500
5501 update_target_cost_per_stmt (kind, stmt_info, where, orig_count);
5502 }
5503
5504 return retval;
5505 }
5506
5507 /* For some target specific vectorization cost which can't be handled per stmt,
5508 we check the requisite conditions and adjust the vectorization cost
5509 accordingly if satisfied. One typical example is to model shift cost for
5510 vector with length by counting number of required lengths under condition
5511 LOOP_VINFO_FULLY_WITH_LENGTH_P. */
5512
5513 void
5514 rs6000_cost_data::adjust_vect_cost_per_loop (loop_vec_info loop_vinfo)
5515 {
5516 if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
5517 {
5518 rgroup_controls *rgc;
5519 unsigned int num_vectors_m1;
5520 unsigned int shift_cnt = 0;
5521 FOR_EACH_VEC_ELT (LOOP_VINFO_LENS (loop_vinfo), num_vectors_m1, rgc)
5522 if (rgc->type)
5523 /* Each length needs one shift to fill into bits 0-7. */
5524 shift_cnt += num_vectors_m1 + 1;
5525
5526 add_stmt_cost (shift_cnt, scalar_stmt, NULL, NULL_TREE, 0, vect_body);
5527 }
5528 }
5529
5530 void
5531 rs6000_cost_data::finish_cost (const vector_costs *scalar_costs)
5532 {
5533 if (loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (m_vinfo))
5534 {
5535 adjust_vect_cost_per_loop (loop_vinfo);
5536 density_test (loop_vinfo);
5537
5538 /* Don't vectorize minimum-vectorization-factor, simple copy loops
5539 that require versioning for any reason. The vectorization is at
5540 best a wash inside the loop, and the versioning checks make
5541 profitability highly unlikely and potentially quite harmful. */
5542 if (!m_vect_nonmem
5543 && LOOP_VINFO_VECT_FACTOR (loop_vinfo) == 2
5544 && LOOP_REQUIRES_VERSIONING (loop_vinfo))
5545 m_costs[vect_body] += 10000;
5546 }
5547
5548 vector_costs::finish_cost (scalar_costs);
5549 }
5550
5551 /* Implement targetm.loop_unroll_adjust. */
5552
5553 static unsigned
5554 rs6000_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
5555 {
5556 if (unroll_only_small_loops)
5557 {
5558 /* TODO: These are hardcoded values right now. We probably should use
5559 a PARAM here. */
5560 if (loop->ninsns <= 6)
5561 return MIN (4, nunroll);
5562 if (loop->ninsns <= 10)
5563 return MIN (2, nunroll);
5564
5565 return 0;
5566 }
5567
5568 return nunroll;
5569 }
5570
5571 /* Returns a function decl for a vectorized version of the builtin function
5572 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5573 if it is not available.
5574
5575 Implement targetm.vectorize.builtin_vectorized_function. */
5576
5577 static tree
5578 rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
5579 tree type_in)
5580 {
5581 machine_mode in_mode, out_mode;
5582 int in_n, out_n;
5583
5584 if (TARGET_DEBUG_BUILTIN)
5585 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5586 combined_fn_name (combined_fn (fn)),
5587 GET_MODE_NAME (TYPE_MODE (type_out)),
5588 GET_MODE_NAME (TYPE_MODE (type_in)));
5589
5590 /* TODO: Should this be gcc_assert? */
5591 if (TREE_CODE (type_out) != VECTOR_TYPE
5592 || TREE_CODE (type_in) != VECTOR_TYPE)
5593 return NULL_TREE;
5594
5595 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5596 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5597 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5598 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5599
5600 switch (fn)
5601 {
5602 CASE_CFN_COPYSIGN:
5603 if (VECTOR_UNIT_VSX_P (V2DFmode)
5604 && out_mode == DFmode && out_n == 2
5605 && in_mode == DFmode && in_n == 2)
5606 return rs6000_builtin_decls[RS6000_BIF_CPSGNDP];
5607 if (VECTOR_UNIT_VSX_P (V4SFmode)
5608 && out_mode == SFmode && out_n == 4
5609 && in_mode == SFmode && in_n == 4)
5610 return rs6000_builtin_decls[RS6000_BIF_CPSGNSP];
5611 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5612 && out_mode == SFmode && out_n == 4
5613 && in_mode == SFmode && in_n == 4)
5614 return rs6000_builtin_decls[RS6000_BIF_COPYSIGN_V4SF];
5615 break;
5616 CASE_CFN_CEIL:
5617 if (VECTOR_UNIT_VSX_P (V2DFmode)
5618 && out_mode == DFmode && out_n == 2
5619 && in_mode == DFmode && in_n == 2)
5620 return rs6000_builtin_decls[RS6000_BIF_XVRDPIP];
5621 if (VECTOR_UNIT_VSX_P (V4SFmode)
5622 && out_mode == SFmode && out_n == 4
5623 && in_mode == SFmode && in_n == 4)
5624 return rs6000_builtin_decls[RS6000_BIF_XVRSPIP];
5625 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5626 && out_mode == SFmode && out_n == 4
5627 && in_mode == SFmode && in_n == 4)
5628 return rs6000_builtin_decls[RS6000_BIF_VRFIP];
5629 break;
5630 CASE_CFN_FLOOR:
5631 if (VECTOR_UNIT_VSX_P (V2DFmode)
5632 && out_mode == DFmode && out_n == 2
5633 && in_mode == DFmode && in_n == 2)
5634 return rs6000_builtin_decls[RS6000_BIF_XVRDPIM];
5635 if (VECTOR_UNIT_VSX_P (V4SFmode)
5636 && out_mode == SFmode && out_n == 4
5637 && in_mode == SFmode && in_n == 4)
5638 return rs6000_builtin_decls[RS6000_BIF_XVRSPIM];
5639 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5640 && out_mode == SFmode && out_n == 4
5641 && in_mode == SFmode && in_n == 4)
5642 return rs6000_builtin_decls[RS6000_BIF_VRFIM];
5643 break;
5644 CASE_CFN_FMA:
5645 if (VECTOR_UNIT_VSX_P (V2DFmode)
5646 && out_mode == DFmode && out_n == 2
5647 && in_mode == DFmode && in_n == 2)
5648 return rs6000_builtin_decls[RS6000_BIF_XVMADDDP];
5649 if (VECTOR_UNIT_VSX_P (V4SFmode)
5650 && out_mode == SFmode && out_n == 4
5651 && in_mode == SFmode && in_n == 4)
5652 return rs6000_builtin_decls[RS6000_BIF_XVMADDSP];
5653 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5654 && out_mode == SFmode && out_n == 4
5655 && in_mode == SFmode && in_n == 4)
5656 return rs6000_builtin_decls[RS6000_BIF_VMADDFP];
5657 break;
5658 CASE_CFN_TRUNC:
5659 if (VECTOR_UNIT_VSX_P (V2DFmode)
5660 && out_mode == DFmode && out_n == 2
5661 && in_mode == DFmode && in_n == 2)
5662 return rs6000_builtin_decls[RS6000_BIF_XVRDPIZ];
5663 if (VECTOR_UNIT_VSX_P (V4SFmode)
5664 && out_mode == SFmode && out_n == 4
5665 && in_mode == SFmode && in_n == 4)
5666 return rs6000_builtin_decls[RS6000_BIF_XVRSPIZ];
5667 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5668 && out_mode == SFmode && out_n == 4
5669 && in_mode == SFmode && in_n == 4)
5670 return rs6000_builtin_decls[RS6000_BIF_VRFIZ];
5671 break;
5672 CASE_CFN_NEARBYINT:
5673 if (VECTOR_UNIT_VSX_P (V2DFmode)
5674 && flag_unsafe_math_optimizations
5675 && out_mode == DFmode && out_n == 2
5676 && in_mode == DFmode && in_n == 2)
5677 return rs6000_builtin_decls[RS6000_BIF_XVRDPI];
5678 if (VECTOR_UNIT_VSX_P (V4SFmode)
5679 && flag_unsafe_math_optimizations
5680 && out_mode == SFmode && out_n == 4
5681 && in_mode == SFmode && in_n == 4)
5682 return rs6000_builtin_decls[RS6000_BIF_XVRSPI];
5683 break;
5684 CASE_CFN_RINT:
5685 if (VECTOR_UNIT_VSX_P (V2DFmode)
5686 && !flag_trapping_math
5687 && out_mode == DFmode && out_n == 2
5688 && in_mode == DFmode && in_n == 2)
5689 return rs6000_builtin_decls[RS6000_BIF_XVRDPIC];
5690 if (VECTOR_UNIT_VSX_P (V4SFmode)
5691 && !flag_trapping_math
5692 && out_mode == SFmode && out_n == 4
5693 && in_mode == SFmode && in_n == 4)
5694 return rs6000_builtin_decls[RS6000_BIF_XVRSPIC];
5695 break;
5696 default:
5697 break;
5698 }
5699
5700 /* Generate calls to libmass if appropriate. */
5701 if (rs6000_veclib_handler)
5702 return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
5703
5704 return NULL_TREE;
5705 }
5706
5707 /* Implement targetm.vectorize.builtin_md_vectorized_function. */
5708
5709 static tree
5710 rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out,
5711 tree type_in)
5712 {
5713 machine_mode in_mode, out_mode;
5714 int in_n, out_n;
5715
5716 if (TARGET_DEBUG_BUILTIN)
5717 fprintf (stderr,
5718 "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n",
5719 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
5720 GET_MODE_NAME (TYPE_MODE (type_out)),
5721 GET_MODE_NAME (TYPE_MODE (type_in)));
5722
5723 /* TODO: Should this be gcc_assert? */
5724 if (TREE_CODE (type_out) != VECTOR_TYPE
5725 || TREE_CODE (type_in) != VECTOR_TYPE)
5726 return NULL_TREE;
5727
5728 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5729 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5730 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5731 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5732
5733 enum rs6000_gen_builtins fn
5734 = (enum rs6000_gen_builtins) DECL_MD_FUNCTION_CODE (fndecl);
5735 switch (fn)
5736 {
5737 case RS6000_BIF_RSQRTF:
5738 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5739 && out_mode == SFmode && out_n == 4
5740 && in_mode == SFmode && in_n == 4)
5741 return rs6000_builtin_decls[RS6000_BIF_VRSQRTFP];
5742 break;
5743 case RS6000_BIF_RSQRT:
5744 if (VECTOR_UNIT_VSX_P (V2DFmode)
5745 && out_mode == DFmode && out_n == 2
5746 && in_mode == DFmode && in_n == 2)
5747 return rs6000_builtin_decls[RS6000_BIF_RSQRT_2DF];
5748 break;
5749 case RS6000_BIF_RECIPF:
5750 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5751 && out_mode == SFmode && out_n == 4
5752 && in_mode == SFmode && in_n == 4)
5753 return rs6000_builtin_decls[RS6000_BIF_VRECIPFP];
5754 break;
5755 case RS6000_BIF_RECIP:
5756 if (VECTOR_UNIT_VSX_P (V2DFmode)
5757 && out_mode == DFmode && out_n == 2
5758 && in_mode == DFmode && in_n == 2)
5759 return rs6000_builtin_decls[RS6000_BIF_RECIP_V2DF];
5760 break;
5761 default:
5762 break;
5763 }
5764
5765 machine_mode in_vmode = TYPE_MODE (type_in);
5766 machine_mode out_vmode = TYPE_MODE (type_out);
5767
5768 /* Power10 supported vectorized built-in functions. */
5769 if (TARGET_POWER10
5770 && in_vmode == out_vmode
5771 && VECTOR_UNIT_ALTIVEC_OR_VSX_P (in_vmode))
5772 {
5773 machine_mode exp_mode = DImode;
5774 machine_mode exp_vmode = V2DImode;
5775 enum rs6000_gen_builtins bif;
5776 switch (fn)
5777 {
5778 case RS6000_BIF_DIVWE:
5779 case RS6000_BIF_DIVWEU:
5780 exp_mode = SImode;
5781 exp_vmode = V4SImode;
5782 if (fn == RS6000_BIF_DIVWE)
5783 bif = RS6000_BIF_VDIVESW;
5784 else
5785 bif = RS6000_BIF_VDIVEUW;
5786 break;
5787 case RS6000_BIF_DIVDE:
5788 case RS6000_BIF_DIVDEU:
5789 if (fn == RS6000_BIF_DIVDE)
5790 bif = RS6000_BIF_VDIVESD;
5791 else
5792 bif = RS6000_BIF_VDIVEUD;
5793 break;
5794 case RS6000_BIF_CFUGED:
5795 bif = RS6000_BIF_VCFUGED;
5796 break;
5797 case RS6000_BIF_CNTLZDM:
5798 bif = RS6000_BIF_VCLZDM;
5799 break;
5800 case RS6000_BIF_CNTTZDM:
5801 bif = RS6000_BIF_VCTZDM;
5802 break;
5803 case RS6000_BIF_PDEPD:
5804 bif = RS6000_BIF_VPDEPD;
5805 break;
5806 case RS6000_BIF_PEXTD:
5807 bif = RS6000_BIF_VPEXTD;
5808 break;
5809 default:
5810 return NULL_TREE;
5811 }
5812
5813 if (in_mode == exp_mode && in_vmode == exp_vmode)
5814 return rs6000_builtin_decls[bif];
5815 }
5816
5817 return NULL_TREE;
5818 }
5819
5820 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5821 library with vectorized intrinsics. */
5822
5823 static tree
5824 rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out,
5825 tree type_in)
5826 {
5827 char name[32];
5828 const char *suffix = NULL;
5829 tree fntype, new_fndecl, bdecl = NULL_TREE;
5830 int n_args = 1;
5831 const char *bname;
5832 machine_mode el_mode, in_mode;
5833 int n, in_n;
5834
5835 /* Libmass is suitable for unsafe math only as it does not correctly support
5836 parts of IEEE with the required precision such as denormals. Only support
5837 it if we have VSX to use the simd d2 or f4 functions.
5838 XXX: Add variable length support. */
5839 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
5840 return NULL_TREE;
5841
5842 el_mode = TYPE_MODE (TREE_TYPE (type_out));
5843 n = TYPE_VECTOR_SUBPARTS (type_out);
5844 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5845 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5846 if (el_mode != in_mode
5847 || n != in_n)
5848 return NULL_TREE;
5849
5850 switch (fn)
5851 {
5852 CASE_CFN_ATAN2:
5853 CASE_CFN_HYPOT:
5854 CASE_CFN_POW:
5855 n_args = 2;
5856 gcc_fallthrough ();
5857
5858 CASE_CFN_ACOS:
5859 CASE_CFN_ACOSH:
5860 CASE_CFN_ASIN:
5861 CASE_CFN_ASINH:
5862 CASE_CFN_ATAN:
5863 CASE_CFN_ATANH:
5864 CASE_CFN_CBRT:
5865 CASE_CFN_COS:
5866 CASE_CFN_COSH:
5867 CASE_CFN_ERF:
5868 CASE_CFN_ERFC:
5869 CASE_CFN_EXP2:
5870 CASE_CFN_EXP:
5871 CASE_CFN_EXPM1:
5872 CASE_CFN_LGAMMA:
5873 CASE_CFN_LOG10:
5874 CASE_CFN_LOG1P:
5875 CASE_CFN_LOG2:
5876 CASE_CFN_LOG:
5877 CASE_CFN_SIN:
5878 CASE_CFN_SINH:
5879 CASE_CFN_SQRT:
5880 CASE_CFN_TAN:
5881 CASE_CFN_TANH:
5882 if (el_mode == DFmode && n == 2)
5883 {
5884 bdecl = mathfn_built_in (double_type_node, fn);
5885 suffix = "d2"; /* pow -> powd2 */
5886 }
5887 else if (el_mode == SFmode && n == 4)
5888 {
5889 bdecl = mathfn_built_in (float_type_node, fn);
5890 suffix = "4"; /* powf -> powf4 */
5891 }
5892 else
5893 return NULL_TREE;
5894 if (!bdecl)
5895 return NULL_TREE;
5896 break;
5897
5898 default:
5899 return NULL_TREE;
5900 }
5901
5902 gcc_assert (suffix != NULL);
5903 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
5904 if (!bname)
5905 return NULL_TREE;
5906
5907 strcpy (name, bname + strlen ("__builtin_"));
5908 strcat (name, suffix);
5909
5910 if (n_args == 1)
5911 fntype = build_function_type_list (type_out, type_in, NULL);
5912 else if (n_args == 2)
5913 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
5914 else
5915 gcc_unreachable ();
5916
5917 /* Build a function declaration for the vectorized function. */
5918 new_fndecl = build_decl (BUILTINS_LOCATION,
5919 FUNCTION_DECL, get_identifier (name), fntype);
5920 TREE_PUBLIC (new_fndecl) = 1;
5921 DECL_EXTERNAL (new_fndecl) = 1;
5922 DECL_IS_NOVOPS (new_fndecl) = 1;
5923 TREE_READONLY (new_fndecl) = 1;
5924
5925 return new_fndecl;
5926 }
5927
5928 \f
5929 /* Default CPU string for rs6000*_file_start functions. */
5930 static const char *rs6000_default_cpu;
5931
5932 #ifdef USING_ELFOS_H
5933 const char *rs6000_machine;
5934
5935 const char *
5936 rs6000_machine_from_flags (void)
5937 {
5938 /* For some CPUs, the machine cannot be determined by ISA flags. We have to
5939 check them first. */
5940 switch (rs6000_cpu)
5941 {
5942 case PROCESSOR_PPC8540:
5943 case PROCESSOR_PPC8548:
5944 return "e500";
5945
5946 case PROCESSOR_PPCE300C2:
5947 case PROCESSOR_PPCE300C3:
5948 return "e300";
5949
5950 case PROCESSOR_PPCE500MC:
5951 return "e500mc";
5952
5953 case PROCESSOR_PPCE500MC64:
5954 return "e500mc64";
5955
5956 case PROCESSOR_PPCE5500:
5957 return "e5500";
5958
5959 case PROCESSOR_PPCE6500:
5960 return "e6500";
5961
5962 default:
5963 break;
5964 }
5965
5966 HOST_WIDE_INT flags = rs6000_isa_flags;
5967
5968 /* Disable the flags that should never influence the .machine selection. */
5969 flags &= ~(OPTION_MASK_PPC_GFXOPT | OPTION_MASK_PPC_GPOPT | OPTION_MASK_ISEL);
5970
5971 if ((flags & (ISA_3_1_MASKS_SERVER & ~ISA_3_0_MASKS_SERVER)) != 0)
5972 return "power10";
5973 if ((flags & (ISA_3_0_MASKS_SERVER & ~ISA_2_7_MASKS_SERVER)) != 0)
5974 return "power9";
5975 if ((flags & (ISA_2_7_MASKS_SERVER & ~ISA_2_6_MASKS_SERVER)) != 0)
5976 return "power8";
5977 if ((flags & (ISA_2_6_MASKS_SERVER & ~ISA_2_5_MASKS_SERVER)) != 0)
5978 return "power7";
5979 if ((flags & (ISA_2_5_MASKS_SERVER & ~ISA_2_4_MASKS)) != 0)
5980 return "power6";
5981 if ((flags & (ISA_2_4_MASKS & ~ISA_2_1_MASKS)) != 0)
5982 return "power5";
5983 if ((flags & ISA_2_1_MASKS) != 0)
5984 return "power4";
5985 if ((flags & OPTION_MASK_POWERPC64) != 0)
5986 return "ppc64";
5987 return "ppc";
5988 }
5989
5990 void
5991 emit_asm_machine (void)
5992 {
5993 fprintf (asm_out_file, "\t.machine %s\n", rs6000_machine);
5994 }
5995 #endif
5996
5997 /* Do anything needed at the start of the asm file. */
5998
5999 static void
6000 rs6000_file_start (void)
6001 {
6002 char buffer[80];
6003 const char *start = buffer;
6004 FILE *file = asm_out_file;
6005
6006 rs6000_default_cpu = TARGET_CPU_DEFAULT;
6007
6008 default_file_start ();
6009
6010 if (flag_verbose_asm)
6011 {
6012 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
6013
6014 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
6015 {
6016 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
6017 start = "";
6018 }
6019
6020 if (OPTION_SET_P (rs6000_cpu_index))
6021 {
6022 fprintf (file, "%s -mcpu=%s", start,
6023 processor_target_table[rs6000_cpu_index].name);
6024 start = "";
6025 }
6026
6027 if (OPTION_SET_P (rs6000_tune_index))
6028 {
6029 fprintf (file, "%s -mtune=%s", start,
6030 processor_target_table[rs6000_tune_index].name);
6031 start = "";
6032 }
6033
6034 if (PPC405_ERRATUM77)
6035 {
6036 fprintf (file, "%s PPC405CR_ERRATUM77", start);
6037 start = "";
6038 }
6039
6040 #ifdef USING_ELFOS_H
6041 switch (rs6000_sdata)
6042 {
6043 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
6044 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
6045 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
6046 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
6047 }
6048
6049 if (rs6000_sdata && g_switch_value)
6050 {
6051 fprintf (file, "%s -G %d", start,
6052 g_switch_value);
6053 start = "";
6054 }
6055 #endif
6056
6057 if (*start == '\0')
6058 putc ('\n', file);
6059 }
6060
6061 #ifdef USING_ELFOS_H
6062 rs6000_machine = rs6000_machine_from_flags ();
6063 emit_asm_machine ();
6064 #endif
6065
6066 if (DEFAULT_ABI == ABI_ELFv2)
6067 fprintf (file, "\t.abiversion 2\n");
6068 }
6069
6070 \f
6071 /* Return nonzero if this function is known to have a null epilogue. */
6072
6073 int
6074 direct_return (void)
6075 {
6076 if (reload_completed)
6077 {
6078 rs6000_stack_t *info = rs6000_stack_info ();
6079
6080 if (info->first_gp_reg_save == 32
6081 && info->first_fp_reg_save == 64
6082 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
6083 && ! info->lr_save_p
6084 && ! info->cr_save_p
6085 && info->vrsave_size == 0
6086 && ! info->push_p)
6087 return 1;
6088 }
6089
6090 return 0;
6091 }
6092
6093 /* Helper for num_insns_constant. Calculate number of instructions to
6094 load VALUE to a single gpr using combinations of addi, addis, ori,
6095 oris, sldi and rldimi instructions. */
6096
6097 static int
6098 num_insns_constant_gpr (HOST_WIDE_INT value)
6099 {
6100 /* signed constant loadable with addi */
6101 if (SIGNED_INTEGER_16BIT_P (value))
6102 return 1;
6103
6104 /* constant loadable with addis */
6105 else if ((value & 0xffff) == 0
6106 && (value >> 31 == -1 || value >> 31 == 0))
6107 return 1;
6108
6109 /* PADDI can support up to 34 bit signed integers. */
6110 else if (TARGET_PREFIXED && SIGNED_INTEGER_34BIT_P (value))
6111 return 1;
6112
6113 else if (TARGET_POWERPC64)
6114 {
6115 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
6116 HOST_WIDE_INT high = value >> 31;
6117
6118 if (high == 0 || high == -1)
6119 return 2;
6120
6121 high >>= 1;
6122
6123 if (low == 0 || low == high)
6124 return num_insns_constant_gpr (high) + 1;
6125 else if (high == 0)
6126 return num_insns_constant_gpr (low) + 1;
6127 else
6128 return (num_insns_constant_gpr (high)
6129 + num_insns_constant_gpr (low) + 1);
6130 }
6131
6132 else
6133 return 2;
6134 }
6135
6136 /* Helper for num_insns_constant. Allow constants formed by the
6137 num_insns_constant_gpr sequences, plus li -1, rldicl/rldicr/rlwinm,
6138 and handle modes that require multiple gprs. */
6139
6140 static int
6141 num_insns_constant_multi (HOST_WIDE_INT value, machine_mode mode)
6142 {
6143 int nregs = (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6144 int total = 0;
6145 while (nregs-- > 0)
6146 {
6147 HOST_WIDE_INT low = sext_hwi (value, BITS_PER_WORD);
6148 int insns = num_insns_constant_gpr (low);
6149 if (insns > 2
6150 /* We won't get more than 2 from num_insns_constant_gpr
6151 except when TARGET_POWERPC64 and mode is DImode or
6152 wider, so the register mode must be DImode. */
6153 && rs6000_is_valid_and_mask (GEN_INT (low), DImode))
6154 insns = 2;
6155 total += insns;
6156 /* If BITS_PER_WORD is the number of bits in HOST_WIDE_INT, doing
6157 it all at once would be UB. */
6158 value >>= (BITS_PER_WORD - 1);
6159 value >>= 1;
6160 }
6161 return total;
6162 }
6163
6164 /* Return the number of instructions it takes to form a constant in as
6165 many gprs are needed for MODE. */
6166
6167 int
6168 num_insns_constant (rtx op, machine_mode mode)
6169 {
6170 HOST_WIDE_INT val;
6171
6172 switch (GET_CODE (op))
6173 {
6174 case CONST_INT:
6175 val = INTVAL (op);
6176 break;
6177
6178 case CONST_WIDE_INT:
6179 {
6180 int insns = 0;
6181 for (int i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
6182 insns += num_insns_constant_multi (CONST_WIDE_INT_ELT (op, i),
6183 DImode);
6184 return insns;
6185 }
6186
6187 case CONST_DOUBLE:
6188 {
6189 const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (op);
6190
6191 if (mode == SFmode || mode == SDmode)
6192 {
6193 long l;
6194
6195 if (mode == SDmode)
6196 REAL_VALUE_TO_TARGET_DECIMAL32 (*rv, l);
6197 else
6198 REAL_VALUE_TO_TARGET_SINGLE (*rv, l);
6199 /* See the first define_split in rs6000.md handling a
6200 const_double_operand. */
6201 val = l;
6202 mode = SImode;
6203 }
6204 else if (mode == DFmode || mode == DDmode)
6205 {
6206 long l[2];
6207
6208 if (mode == DDmode)
6209 REAL_VALUE_TO_TARGET_DECIMAL64 (*rv, l);
6210 else
6211 REAL_VALUE_TO_TARGET_DOUBLE (*rv, l);
6212
6213 /* See the second (32-bit) and third (64-bit) define_split
6214 in rs6000.md handling a const_double_operand. */
6215 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 1] << 32;
6216 val |= l[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffffUL;
6217 mode = DImode;
6218 }
6219 else if (mode == TFmode || mode == TDmode
6220 || mode == KFmode || mode == IFmode)
6221 {
6222 long l[4];
6223 int insns;
6224
6225 if (mode == TDmode)
6226 REAL_VALUE_TO_TARGET_DECIMAL128 (*rv, l);
6227 else
6228 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*rv, l);
6229
6230 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 3] << 32;
6231 val |= l[WORDS_BIG_ENDIAN ? 1 : 2] & 0xffffffffUL;
6232 insns = num_insns_constant_multi (val, DImode);
6233 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 2 : 1] << 32;
6234 val |= l[WORDS_BIG_ENDIAN ? 3 : 0] & 0xffffffffUL;
6235 insns += num_insns_constant_multi (val, DImode);
6236 return insns;
6237 }
6238 else
6239 gcc_unreachable ();
6240 }
6241 break;
6242
6243 default:
6244 gcc_unreachable ();
6245 }
6246
6247 return num_insns_constant_multi (val, mode);
6248 }
6249
6250 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
6251 If the mode of OP is MODE_VECTOR_INT, this simply returns the
6252 corresponding element of the vector, but for V4SFmode, the
6253 corresponding "float" is interpreted as an SImode integer. */
6254
6255 HOST_WIDE_INT
6256 const_vector_elt_as_int (rtx op, unsigned int elt)
6257 {
6258 rtx tmp;
6259
6260 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
6261 gcc_assert (GET_MODE (op) != V2DImode
6262 && GET_MODE (op) != V2DFmode);
6263
6264 tmp = CONST_VECTOR_ELT (op, elt);
6265 if (GET_MODE (op) == V4SFmode)
6266 tmp = gen_lowpart (SImode, tmp);
6267 return INTVAL (tmp);
6268 }
6269
6270 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
6271 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
6272 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
6273 all items are set to the same value and contain COPIES replicas of the
6274 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
6275 operand and the others are set to the value of the operand's msb. */
6276
6277 static bool
6278 vspltis_constant (rtx op, unsigned step, unsigned copies)
6279 {
6280 machine_mode mode = GET_MODE (op);
6281 machine_mode inner = GET_MODE_INNER (mode);
6282
6283 unsigned i;
6284 unsigned nunits;
6285 unsigned bitsize;
6286 unsigned mask;
6287
6288 HOST_WIDE_INT val;
6289 HOST_WIDE_INT splat_val;
6290 HOST_WIDE_INT msb_val;
6291
6292 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
6293 return false;
6294
6295 nunits = GET_MODE_NUNITS (mode);
6296 bitsize = GET_MODE_BITSIZE (inner);
6297 mask = GET_MODE_MASK (inner);
6298
6299 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6300 splat_val = val;
6301 msb_val = val >= 0 ? 0 : -1;
6302
6303 if (val == 0 && step > 1)
6304 {
6305 /* Special case for loading most significant bit with step > 1.
6306 In that case, match 0s in all but step-1s elements, where match
6307 EASY_VECTOR_MSB. */
6308 for (i = 1; i < nunits; ++i)
6309 {
6310 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
6311 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
6312 if ((i & (step - 1)) == step - 1)
6313 {
6314 if (!EASY_VECTOR_MSB (elt_val, inner))
6315 break;
6316 }
6317 else if (elt_val)
6318 break;
6319 }
6320 if (i == nunits)
6321 return true;
6322 }
6323
6324 /* Construct the value to be splatted, if possible. If not, return 0. */
6325 for (i = 2; i <= copies; i *= 2)
6326 {
6327 HOST_WIDE_INT small_val;
6328 bitsize /= 2;
6329 small_val = splat_val >> bitsize;
6330 mask >>= bitsize;
6331 if (splat_val != ((HOST_WIDE_INT)
6332 ((unsigned HOST_WIDE_INT) small_val << bitsize)
6333 | (small_val & mask)))
6334 return false;
6335 splat_val = small_val;
6336 inner = smallest_int_mode_for_size (bitsize);
6337 }
6338
6339 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
6340 if (EASY_VECTOR_15 (splat_val))
6341 ;
6342
6343 /* Also check if we can splat, and then add the result to itself. Do so if
6344 the value is positive, of if the splat instruction is using OP's mode;
6345 for splat_val < 0, the splat and the add should use the same mode. */
6346 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
6347 && (splat_val >= 0 || (step == 1 && copies == 1)))
6348 ;
6349
6350 /* Also check if are loading up the most significant bit which can be done by
6351 loading up -1 and shifting the value left by -1. Only do this for
6352 step 1 here, for larger steps it is done earlier. */
6353 else if (EASY_VECTOR_MSB (splat_val, inner) && step == 1)
6354 ;
6355
6356 else
6357 return false;
6358
6359 /* Check if VAL is present in every STEP-th element, and the
6360 other elements are filled with its most significant bit. */
6361 for (i = 1; i < nunits; ++i)
6362 {
6363 HOST_WIDE_INT desired_val;
6364 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
6365 if ((i & (step - 1)) == 0)
6366 desired_val = val;
6367 else
6368 desired_val = msb_val;
6369
6370 if (desired_val != const_vector_elt_as_int (op, elt))
6371 return false;
6372 }
6373
6374 return true;
6375 }
6376
6377 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
6378 instruction, filling in the bottom elements with 0 or -1.
6379
6380 Return 0 if the constant cannot be generated with VSLDOI. Return positive
6381 for the number of zeroes to shift in, or negative for the number of 0xff
6382 bytes to shift in.
6383
6384 OP is a CONST_VECTOR. */
6385
6386 int
6387 vspltis_shifted (rtx op)
6388 {
6389 machine_mode mode = GET_MODE (op);
6390 machine_mode inner = GET_MODE_INNER (mode);
6391
6392 unsigned i, j;
6393 unsigned nunits;
6394 unsigned mask;
6395
6396 HOST_WIDE_INT val;
6397
6398 if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
6399 return false;
6400
6401 /* We need to create pseudo registers to do the shift, so don't recognize
6402 shift vector constants after reload. */
6403 if (!can_create_pseudo_p ())
6404 return false;
6405
6406 nunits = GET_MODE_NUNITS (mode);
6407 mask = GET_MODE_MASK (inner);
6408
6409 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
6410
6411 /* Check if the value can really be the operand of a vspltis[bhw]. */
6412 if (EASY_VECTOR_15 (val))
6413 ;
6414
6415 /* Also check if we are loading up the most significant bit which can be done
6416 by loading up -1 and shifting the value left by -1. */
6417 else if (EASY_VECTOR_MSB (val, inner))
6418 ;
6419
6420 else
6421 return 0;
6422
6423 /* Check if VAL is present in every STEP-th element until we find elements
6424 that are 0 or all 1 bits. */
6425 for (i = 1; i < nunits; ++i)
6426 {
6427 unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
6428 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
6429
6430 /* If the value isn't the splat value, check for the remaining elements
6431 being 0/-1. */
6432 if (val != elt_val)
6433 {
6434 if (elt_val == 0)
6435 {
6436 for (j = i+1; j < nunits; ++j)
6437 {
6438 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6439 if (const_vector_elt_as_int (op, elt2) != 0)
6440 return 0;
6441 }
6442
6443 return (nunits - i) * GET_MODE_SIZE (inner);
6444 }
6445
6446 else if ((elt_val & mask) == mask)
6447 {
6448 for (j = i+1; j < nunits; ++j)
6449 {
6450 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6451 if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
6452 return 0;
6453 }
6454
6455 return -((nunits - i) * GET_MODE_SIZE (inner));
6456 }
6457
6458 else
6459 return 0;
6460 }
6461 }
6462
6463 /* If all elements are equal, we don't need to do VSLDOI. */
6464 return 0;
6465 }
6466
6467
6468 /* Return non-zero (element mode byte size) if OP is of the given MODE
6469 and can be synthesized with a vspltisb, vspltish or vspltisw. */
6470
6471 int
6472 easy_altivec_constant (rtx op, machine_mode mode)
6473 {
6474 unsigned step, copies;
6475
6476 if (mode == VOIDmode)
6477 mode = GET_MODE (op);
6478 else if (mode != GET_MODE (op))
6479 return 0;
6480
6481 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
6482 constants. */
6483 if (mode == V2DFmode)
6484 return zero_constant (op, mode) ? 8 : 0;
6485
6486 else if (mode == V2DImode)
6487 {
6488 if (!CONST_INT_P (CONST_VECTOR_ELT (op, 0))
6489 || !CONST_INT_P (CONST_VECTOR_ELT (op, 1)))
6490 return 0;
6491
6492 if (zero_constant (op, mode))
6493 return 8;
6494
6495 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
6496 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
6497 return 8;
6498
6499 return 0;
6500 }
6501
6502 /* V1TImode is a special container for TImode. Ignore for now. */
6503 else if (mode == V1TImode)
6504 return 0;
6505
6506 /* Start with a vspltisw. */
6507 step = GET_MODE_NUNITS (mode) / 4;
6508 copies = 1;
6509
6510 if (vspltis_constant (op, step, copies))
6511 return 4;
6512
6513 /* Then try with a vspltish. */
6514 if (step == 1)
6515 copies <<= 1;
6516 else
6517 step >>= 1;
6518
6519 if (vspltis_constant (op, step, copies))
6520 return 2;
6521
6522 /* And finally a vspltisb. */
6523 if (step == 1)
6524 copies <<= 1;
6525 else
6526 step >>= 1;
6527
6528 if (vspltis_constant (op, step, copies))
6529 return 1;
6530
6531 if (vspltis_shifted (op) != 0)
6532 return GET_MODE_SIZE (GET_MODE_INNER (mode));
6533
6534 return 0;
6535 }
6536
6537 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6538 result is OP. Abort if it is not possible. */
6539
6540 rtx
6541 gen_easy_altivec_constant (rtx op)
6542 {
6543 machine_mode mode = GET_MODE (op);
6544 int nunits = GET_MODE_NUNITS (mode);
6545 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6546 unsigned step = nunits / 4;
6547 unsigned copies = 1;
6548
6549 /* Start with a vspltisw. */
6550 if (vspltis_constant (op, step, copies))
6551 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
6552
6553 /* Then try with a vspltish. */
6554 if (step == 1)
6555 copies <<= 1;
6556 else
6557 step >>= 1;
6558
6559 if (vspltis_constant (op, step, copies))
6560 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
6561
6562 /* And finally a vspltisb. */
6563 if (step == 1)
6564 copies <<= 1;
6565 else
6566 step >>= 1;
6567
6568 if (vspltis_constant (op, step, copies))
6569 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
6570
6571 gcc_unreachable ();
6572 }
6573
6574 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6575 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6576
6577 Return the number of instructions needed (1 or 2) into the address pointed
6578 via NUM_INSNS_PTR.
6579
6580 Return the constant that is being split via CONSTANT_PTR. */
6581
6582 bool
6583 xxspltib_constant_p (rtx op,
6584 machine_mode mode,
6585 int *num_insns_ptr,
6586 int *constant_ptr)
6587 {
6588 size_t nunits = GET_MODE_NUNITS (mode);
6589 size_t i;
6590 HOST_WIDE_INT value;
6591 rtx element;
6592
6593 /* Set the returned values to out of bound values. */
6594 *num_insns_ptr = -1;
6595 *constant_ptr = 256;
6596
6597 if (!TARGET_P9_VECTOR)
6598 return false;
6599
6600 if (mode == VOIDmode)
6601 mode = GET_MODE (op);
6602
6603 else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode)
6604 return false;
6605
6606 /* Handle (vec_duplicate <constant>). */
6607 if (GET_CODE (op) == VEC_DUPLICATE)
6608 {
6609 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6610 && mode != V2DImode)
6611 return false;
6612
6613 element = XEXP (op, 0);
6614 if (!CONST_INT_P (element))
6615 return false;
6616
6617 value = INTVAL (element);
6618 if (!IN_RANGE (value, -128, 127))
6619 return false;
6620 }
6621
6622 /* Handle (const_vector [...]). */
6623 else if (GET_CODE (op) == CONST_VECTOR)
6624 {
6625 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6626 && mode != V2DImode)
6627 return false;
6628
6629 element = CONST_VECTOR_ELT (op, 0);
6630 if (!CONST_INT_P (element))
6631 return false;
6632
6633 value = INTVAL (element);
6634 if (!IN_RANGE (value, -128, 127))
6635 return false;
6636
6637 for (i = 1; i < nunits; i++)
6638 {
6639 element = CONST_VECTOR_ELT (op, i);
6640 if (!CONST_INT_P (element))
6641 return false;
6642
6643 if (value != INTVAL (element))
6644 return false;
6645 }
6646 }
6647
6648 /* Handle integer constants being loaded into the upper part of the VSX
6649 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6650 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6651 else if (CONST_INT_P (op))
6652 {
6653 if (!SCALAR_INT_MODE_P (mode))
6654 return false;
6655
6656 value = INTVAL (op);
6657 if (!IN_RANGE (value, -128, 127))
6658 return false;
6659
6660 if (!IN_RANGE (value, -1, 0))
6661 {
6662 if (!(reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID))
6663 return false;
6664
6665 if (EASY_VECTOR_15 (value))
6666 return false;
6667 }
6668 }
6669
6670 else
6671 return false;
6672
6673 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6674 sign extend. Special case 0/-1 to allow getting any VSX register instead
6675 of an Altivec register. */
6676 if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0)
6677 && EASY_VECTOR_15 (value))
6678 return false;
6679
6680 /* Return # of instructions and the constant byte for XXSPLTIB. */
6681 if (mode == V16QImode)
6682 *num_insns_ptr = 1;
6683
6684 else if (IN_RANGE (value, -1, 0))
6685 *num_insns_ptr = 1;
6686
6687 /* Do not generate XXSPLTIB and a sign extend operation if we can generate a
6688 single XXSPLTIW or XXSPLTIDP instruction. */
6689 else if (vsx_prefixed_constant (op, mode))
6690 return false;
6691
6692 /* Return XXSPLITB followed by a sign extend operation to convert the
6693 constant to V8HImode or V4SImode. */
6694 else
6695 *num_insns_ptr = 2;
6696
6697 *constant_ptr = (int) value;
6698 return true;
6699 }
6700
6701 const char *
6702 output_vec_const_move (rtx *operands)
6703 {
6704 int shift;
6705 machine_mode mode;
6706 rtx dest, vec;
6707
6708 dest = operands[0];
6709 vec = operands[1];
6710 mode = GET_MODE (dest);
6711
6712 if (TARGET_VSX)
6713 {
6714 bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest));
6715 int xxspltib_value = 256;
6716 int num_insns = -1;
6717
6718 if (zero_constant (vec, mode))
6719 {
6720 if (TARGET_P9_VECTOR)
6721 return "xxspltib %x0,0";
6722
6723 else if (dest_vmx_p)
6724 return "vspltisw %0,0";
6725
6726 else
6727 return "xxlxor %x0,%x0,%x0";
6728 }
6729
6730 if (all_ones_constant (vec, mode))
6731 {
6732 if (TARGET_P9_VECTOR)
6733 return "xxspltib %x0,255";
6734
6735 else if (dest_vmx_p)
6736 return "vspltisw %0,-1";
6737
6738 else if (TARGET_P8_VECTOR)
6739 return "xxlorc %x0,%x0,%x0";
6740
6741 else
6742 gcc_unreachable ();
6743 }
6744
6745 vec_const_128bit_type vsx_const;
6746 if (TARGET_POWER10 && vec_const_128bit_to_bytes (vec, mode, &vsx_const))
6747 {
6748 unsigned imm = constant_generates_lxvkq (&vsx_const);
6749 if (imm)
6750 {
6751 operands[2] = GEN_INT (imm);
6752 return "lxvkq %x0,%2";
6753 }
6754
6755 imm = constant_generates_xxspltiw (&vsx_const);
6756 if (imm)
6757 {
6758 operands[2] = GEN_INT (imm);
6759 return "xxspltiw %x0,%2";
6760 }
6761
6762 imm = constant_generates_xxspltidp (&vsx_const);
6763 if (imm)
6764 {
6765 operands[2] = GEN_INT (imm);
6766 return "xxspltidp %x0,%2";
6767 }
6768 }
6769
6770 if (TARGET_P9_VECTOR
6771 && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
6772 {
6773 if (num_insns == 1)
6774 {
6775 operands[2] = GEN_INT (xxspltib_value & 0xff);
6776 return "xxspltib %x0,%2";
6777 }
6778
6779 return "#";
6780 }
6781 }
6782
6783 if (TARGET_ALTIVEC)
6784 {
6785 rtx splat_vec;
6786
6787 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest)));
6788 if (zero_constant (vec, mode))
6789 return "vspltisw %0,0";
6790
6791 if (all_ones_constant (vec, mode))
6792 return "vspltisw %0,-1";
6793
6794 /* Do we need to construct a value using VSLDOI? */
6795 shift = vspltis_shifted (vec);
6796 if (shift != 0)
6797 return "#";
6798
6799 splat_vec = gen_easy_altivec_constant (vec);
6800 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
6801 operands[1] = XEXP (splat_vec, 0);
6802 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
6803 return "#";
6804
6805 switch (GET_MODE (splat_vec))
6806 {
6807 case E_V4SImode:
6808 return "vspltisw %0,%1";
6809
6810 case E_V8HImode:
6811 return "vspltish %0,%1";
6812
6813 case E_V16QImode:
6814 return "vspltisb %0,%1";
6815
6816 default:
6817 gcc_unreachable ();
6818 }
6819 }
6820
6821 gcc_unreachable ();
6822 }
6823
6824 /* Initialize vector TARGET to VALS. */
6825
6826 void
6827 rs6000_expand_vector_init (rtx target, rtx vals)
6828 {
6829 machine_mode mode = GET_MODE (target);
6830 machine_mode inner_mode = GET_MODE_INNER (mode);
6831 unsigned int n_elts = GET_MODE_NUNITS (mode);
6832 int n_var = 0, one_var = -1;
6833 bool all_same = true, all_const_zero = true;
6834 rtx x, mem;
6835 unsigned int i;
6836
6837 for (i = 0; i < n_elts; ++i)
6838 {
6839 x = XVECEXP (vals, 0, i);
6840 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6841 ++n_var, one_var = i;
6842 else if (x != CONST0_RTX (inner_mode))
6843 all_const_zero = false;
6844
6845 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6846 all_same = false;
6847 }
6848
6849 if (n_var == 0)
6850 {
6851 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6852 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
6853 if ((int_vector_p || TARGET_VSX) && all_const_zero)
6854 {
6855 /* Zero register. */
6856 emit_move_insn (target, CONST0_RTX (mode));
6857 return;
6858 }
6859 else if (int_vector_p && easy_vector_constant (const_vec, mode))
6860 {
6861 /* Splat immediate. */
6862 emit_insn (gen_rtx_SET (target, const_vec));
6863 return;
6864 }
6865 else
6866 {
6867 /* Load from constant pool. */
6868 emit_move_insn (target, const_vec);
6869 return;
6870 }
6871 }
6872
6873 /* Double word values on VSX can use xxpermdi or lxvdsx. */
6874 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
6875 {
6876 rtx op[2];
6877 size_t i;
6878 size_t num_elements = all_same ? 1 : 2;
6879 for (i = 0; i < num_elements; i++)
6880 {
6881 op[i] = XVECEXP (vals, 0, i);
6882 /* Just in case there is a SUBREG with a smaller mode, do a
6883 conversion. */
6884 if (GET_MODE (op[i]) != inner_mode)
6885 {
6886 rtx tmp = gen_reg_rtx (inner_mode);
6887 convert_move (tmp, op[i], 0);
6888 op[i] = tmp;
6889 }
6890 /* Allow load with splat double word. */
6891 else if (MEM_P (op[i]))
6892 {
6893 if (!all_same)
6894 op[i] = force_reg (inner_mode, op[i]);
6895 }
6896 else if (!REG_P (op[i]))
6897 op[i] = force_reg (inner_mode, op[i]);
6898 }
6899
6900 if (all_same)
6901 {
6902 if (mode == V2DFmode)
6903 emit_insn (gen_vsx_splat_v2df (target, op[0]));
6904 else
6905 emit_insn (gen_vsx_splat_v2di (target, op[0]));
6906 }
6907 else
6908 {
6909 if (mode == V2DFmode)
6910 emit_insn (gen_vsx_concat_v2df (target, op[0], op[1]));
6911 else
6912 emit_insn (gen_vsx_concat_v2di (target, op[0], op[1]));
6913 }
6914 return;
6915 }
6916
6917 /* Special case initializing vector int if we are on 64-bit systems with
6918 direct move or we have the ISA 3.0 instructions. */
6919 if (mode == V4SImode && VECTOR_MEM_VSX_P (V4SImode)
6920 && TARGET_DIRECT_MOVE_64BIT)
6921 {
6922 if (all_same)
6923 {
6924 rtx element0 = XVECEXP (vals, 0, 0);
6925 if (MEM_P (element0))
6926 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6927 else
6928 element0 = force_reg (SImode, element0);
6929
6930 if (TARGET_P9_VECTOR)
6931 emit_insn (gen_vsx_splat_v4si (target, element0));
6932 else
6933 {
6934 rtx tmp = gen_reg_rtx (DImode);
6935 emit_insn (gen_zero_extendsidi2 (tmp, element0));
6936 emit_insn (gen_vsx_splat_v4si_di (target, tmp));
6937 }
6938 return;
6939 }
6940 else
6941 {
6942 rtx elements[4];
6943 size_t i;
6944
6945 for (i = 0; i < 4; i++)
6946 elements[i] = force_reg (SImode, XVECEXP (vals, 0, i));
6947
6948 emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1],
6949 elements[2], elements[3]));
6950 return;
6951 }
6952 }
6953
6954 /* With single precision floating point on VSX, know that internally single
6955 precision is actually represented as a double, and either make 2 V2DF
6956 vectors, and convert these vectors to single precision, or do one
6957 conversion, and splat the result to the other elements. */
6958 if (mode == V4SFmode && VECTOR_MEM_VSX_P (V4SFmode))
6959 {
6960 if (all_same)
6961 {
6962 rtx element0 = XVECEXP (vals, 0, 0);
6963
6964 if (TARGET_P9_VECTOR)
6965 {
6966 if (MEM_P (element0))
6967 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6968
6969 emit_insn (gen_vsx_splat_v4sf (target, element0));
6970 }
6971
6972 else
6973 {
6974 rtx freg = gen_reg_rtx (V4SFmode);
6975 rtx sreg = force_reg (SFmode, element0);
6976 rtx cvt = (TARGET_XSCVDPSPN
6977 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
6978 : gen_vsx_xscvdpsp_scalar (freg, sreg));
6979
6980 emit_insn (cvt);
6981 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg,
6982 const0_rtx));
6983 }
6984 }
6985 else
6986 {
6987 if (TARGET_P8_VECTOR && TARGET_POWERPC64)
6988 {
6989 rtx tmp_sf[4];
6990 rtx tmp_si[4];
6991 rtx tmp_di[4];
6992 rtx mrg_di[4];
6993 for (i = 0; i < 4; i++)
6994 {
6995 tmp_si[i] = gen_reg_rtx (SImode);
6996 tmp_di[i] = gen_reg_rtx (DImode);
6997 mrg_di[i] = gen_reg_rtx (DImode);
6998 tmp_sf[i] = force_reg (SFmode, XVECEXP (vals, 0, i));
6999 emit_insn (gen_movsi_from_sf (tmp_si[i], tmp_sf[i]));
7000 emit_insn (gen_zero_extendsidi2 (tmp_di[i], tmp_si[i]));
7001 }
7002
7003 if (!BYTES_BIG_ENDIAN)
7004 {
7005 std::swap (tmp_di[0], tmp_di[1]);
7006 std::swap (tmp_di[2], tmp_di[3]);
7007 }
7008
7009 emit_insn (gen_ashldi3 (mrg_di[0], tmp_di[0], GEN_INT (32)));
7010 emit_insn (gen_iordi3 (mrg_di[1], mrg_di[0], tmp_di[1]));
7011 emit_insn (gen_ashldi3 (mrg_di[2], tmp_di[2], GEN_INT (32)));
7012 emit_insn (gen_iordi3 (mrg_di[3], mrg_di[2], tmp_di[3]));
7013
7014 rtx tmp_v2di = gen_reg_rtx (V2DImode);
7015 emit_insn (gen_vsx_concat_v2di (tmp_v2di, mrg_di[1], mrg_di[3]));
7016 emit_move_insn (target, gen_lowpart (V4SFmode, tmp_v2di));
7017 }
7018 else
7019 {
7020 rtx dbl_even = gen_reg_rtx (V2DFmode);
7021 rtx dbl_odd = gen_reg_rtx (V2DFmode);
7022 rtx flt_even = gen_reg_rtx (V4SFmode);
7023 rtx flt_odd = gen_reg_rtx (V4SFmode);
7024 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
7025 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
7026 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
7027 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
7028
7029 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
7030 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
7031 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
7032 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
7033 rs6000_expand_extract_even (target, flt_even, flt_odd);
7034 }
7035 }
7036 return;
7037 }
7038
7039 /* Special case initializing vector short/char that are splats if we are on
7040 64-bit systems with direct move. */
7041 if (all_same && TARGET_DIRECT_MOVE_64BIT
7042 && (mode == V16QImode || mode == V8HImode))
7043 {
7044 rtx op0 = XVECEXP (vals, 0, 0);
7045 rtx di_tmp = gen_reg_rtx (DImode);
7046
7047 if (!REG_P (op0))
7048 op0 = force_reg (GET_MODE_INNER (mode), op0);
7049
7050 if (mode == V16QImode)
7051 {
7052 emit_insn (gen_zero_extendqidi2 (di_tmp, op0));
7053 emit_insn (gen_vsx_vspltb_di (target, di_tmp));
7054 return;
7055 }
7056
7057 if (mode == V8HImode)
7058 {
7059 emit_insn (gen_zero_extendhidi2 (di_tmp, op0));
7060 emit_insn (gen_vsx_vsplth_di (target, di_tmp));
7061 return;
7062 }
7063 }
7064
7065 /* Store value to stack temp. Load vector element. Splat. However, splat
7066 of 64-bit items is not supported on Altivec. */
7067 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
7068 {
7069 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
7070 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
7071 XVECEXP (vals, 0, 0));
7072 x = gen_rtx_UNSPEC (VOIDmode,
7073 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
7074 emit_insn (gen_rtx_PARALLEL (VOIDmode,
7075 gen_rtvec (2,
7076 gen_rtx_SET (target, mem),
7077 x)));
7078 x = gen_rtx_VEC_SELECT (inner_mode, target,
7079 gen_rtx_PARALLEL (VOIDmode,
7080 gen_rtvec (1, const0_rtx)));
7081 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
7082 return;
7083 }
7084
7085 /* One field is non-constant. Load constant then overwrite
7086 varying field. */
7087 if (n_var == 1)
7088 {
7089 rtx copy = copy_rtx (vals);
7090
7091 /* Load constant part of vector, substitute neighboring value for
7092 varying element. */
7093 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
7094 rs6000_expand_vector_init (target, copy);
7095
7096 /* Insert variable. */
7097 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var),
7098 GEN_INT (one_var));
7099 return;
7100 }
7101
7102 if (TARGET_DIRECT_MOVE && (mode == V16QImode || mode == V8HImode))
7103 {
7104 rtx op[16];
7105 /* Force the values into word_mode registers. */
7106 for (i = 0; i < n_elts; i++)
7107 {
7108 rtx tmp = force_reg (inner_mode, XVECEXP (vals, 0, i));
7109 machine_mode tmode = TARGET_POWERPC64 ? DImode : SImode;
7110 op[i] = simplify_gen_subreg (tmode, tmp, inner_mode, 0);
7111 }
7112
7113 /* Take unsigned char big endianness on 64bit as example for below
7114 construction, the input values are: A, B, C, D, ..., O, P. */
7115
7116 if (TARGET_DIRECT_MOVE_128)
7117 {
7118 /* Move to VSX register with vec_concat, each has 2 values.
7119 eg: vr1[0] = { xxxxxxxA, xxxxxxxB };
7120 vr1[1] = { xxxxxxxC, xxxxxxxD };
7121 ...
7122 vr1[7] = { xxxxxxxO, xxxxxxxP }; */
7123 rtx vr1[8];
7124 for (i = 0; i < n_elts / 2; i++)
7125 {
7126 vr1[i] = gen_reg_rtx (V2DImode);
7127 emit_insn (gen_vsx_concat_v2di (vr1[i], op[i * 2],
7128 op[i * 2 + 1]));
7129 }
7130
7131 /* Pack vectors with 2 values into vectors with 4 values.
7132 eg: vr2[0] = { xxxAxxxB, xxxCxxxD };
7133 vr2[1] = { xxxExxxF, xxxGxxxH };
7134 vr2[1] = { xxxIxxxJ, xxxKxxxL };
7135 vr2[3] = { xxxMxxxN, xxxOxxxP }; */
7136 rtx vr2[4];
7137 for (i = 0; i < n_elts / 4; i++)
7138 {
7139 vr2[i] = gen_reg_rtx (V4SImode);
7140 emit_insn (gen_altivec_vpkudum (vr2[i], vr1[i * 2],
7141 vr1[i * 2 + 1]));
7142 }
7143
7144 /* Pack vectors with 4 values into vectors with 8 values.
7145 eg: vr3[0] = { xAxBxCxD, xExFxGxH };
7146 vr3[1] = { xIxJxKxL, xMxNxOxP }; */
7147 rtx vr3[2];
7148 for (i = 0; i < n_elts / 8; i++)
7149 {
7150 vr3[i] = gen_reg_rtx (V8HImode);
7151 emit_insn (gen_altivec_vpkuwum (vr3[i], vr2[i * 2],
7152 vr2[i * 2 + 1]));
7153 }
7154
7155 /* If it's V8HImode, it's done and return it. */
7156 if (mode == V8HImode)
7157 {
7158 emit_insn (gen_rtx_SET (target, vr3[0]));
7159 return;
7160 }
7161
7162 /* Pack vectors with 8 values into 16 values. */
7163 rtx res = gen_reg_rtx (V16QImode);
7164 emit_insn (gen_altivec_vpkuhum (res, vr3[0], vr3[1]));
7165 emit_insn (gen_rtx_SET (target, res));
7166 }
7167 else
7168 {
7169 rtx (*merge_v16qi) (rtx, rtx, rtx) = NULL;
7170 rtx (*merge_v8hi) (rtx, rtx, rtx) = NULL;
7171 rtx (*merge_v4si) (rtx, rtx, rtx) = NULL;
7172 rtx perm_idx;
7173
7174 /* Set up some common gen routines and values. */
7175 if (BYTES_BIG_ENDIAN)
7176 {
7177 if (mode == V16QImode)
7178 {
7179 merge_v16qi = gen_altivec_vmrghb;
7180 merge_v8hi = gen_altivec_vmrglh;
7181 }
7182 else
7183 merge_v8hi = gen_altivec_vmrghh;
7184
7185 merge_v4si = gen_altivec_vmrglw;
7186 perm_idx = GEN_INT (3);
7187 }
7188 else
7189 {
7190 if (mode == V16QImode)
7191 {
7192 merge_v16qi = gen_altivec_vmrglb;
7193 merge_v8hi = gen_altivec_vmrghh;
7194 }
7195 else
7196 merge_v8hi = gen_altivec_vmrglh;
7197
7198 merge_v4si = gen_altivec_vmrghw;
7199 perm_idx = GEN_INT (0);
7200 }
7201
7202 /* Move to VSX register with direct move.
7203 eg: vr_qi[0] = { xxxxxxxA, xxxxxxxx };
7204 vr_qi[1] = { xxxxxxxB, xxxxxxxx };
7205 ...
7206 vr_qi[15] = { xxxxxxxP, xxxxxxxx }; */
7207 rtx vr_qi[16];
7208 for (i = 0; i < n_elts; i++)
7209 {
7210 vr_qi[i] = gen_reg_rtx (V16QImode);
7211 if (TARGET_POWERPC64)
7212 emit_insn (gen_p8_mtvsrd_v16qidi2 (vr_qi[i], op[i]));
7213 else
7214 emit_insn (gen_p8_mtvsrwz_v16qisi2 (vr_qi[i], op[i]));
7215 }
7216
7217 /* Merge/move to vector short.
7218 eg: vr_hi[0] = { xxxxxxxx, xxxxxxAB };
7219 vr_hi[1] = { xxxxxxxx, xxxxxxCD };
7220 ...
7221 vr_hi[7] = { xxxxxxxx, xxxxxxOP }; */
7222 rtx vr_hi[8];
7223 for (i = 0; i < 8; i++)
7224 {
7225 rtx tmp = vr_qi[i];
7226 if (mode == V16QImode)
7227 {
7228 tmp = gen_reg_rtx (V16QImode);
7229 emit_insn (merge_v16qi (tmp, vr_qi[2 * i], vr_qi[2 * i + 1]));
7230 }
7231 vr_hi[i] = gen_reg_rtx (V8HImode);
7232 emit_move_insn (vr_hi[i], gen_lowpart (V8HImode, tmp));
7233 }
7234
7235 /* Merge vector short to vector int.
7236 eg: vr_si[0] = { xxxxxxxx, xxxxABCD };
7237 vr_si[1] = { xxxxxxxx, xxxxEFGH };
7238 ...
7239 vr_si[3] = { xxxxxxxx, xxxxMNOP }; */
7240 rtx vr_si[4];
7241 for (i = 0; i < 4; i++)
7242 {
7243 rtx tmp = gen_reg_rtx (V8HImode);
7244 emit_insn (merge_v8hi (tmp, vr_hi[2 * i], vr_hi[2 * i + 1]));
7245 vr_si[i] = gen_reg_rtx (V4SImode);
7246 emit_move_insn (vr_si[i], gen_lowpart (V4SImode, tmp));
7247 }
7248
7249 /* Merge vector int to vector long.
7250 eg: vr_di[0] = { xxxxxxxx, ABCDEFGH };
7251 vr_di[1] = { xxxxxxxx, IJKLMNOP }; */
7252 rtx vr_di[2];
7253 for (i = 0; i < 2; i++)
7254 {
7255 rtx tmp = gen_reg_rtx (V4SImode);
7256 emit_insn (merge_v4si (tmp, vr_si[2 * i], vr_si[2 * i + 1]));
7257 vr_di[i] = gen_reg_rtx (V2DImode);
7258 emit_move_insn (vr_di[i], gen_lowpart (V2DImode, tmp));
7259 }
7260
7261 rtx res = gen_reg_rtx (V2DImode);
7262 emit_insn (gen_vsx_xxpermdi_v2di (res, vr_di[0], vr_di[1], perm_idx));
7263 emit_insn (gen_rtx_SET (target, gen_lowpart (mode, res)));
7264 }
7265
7266 return;
7267 }
7268
7269 /* Construct the vector in memory one field at a time
7270 and load the whole vector. */
7271 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7272 for (i = 0; i < n_elts; i++)
7273 emit_move_insn (adjust_address_nv (mem, inner_mode,
7274 i * GET_MODE_SIZE (inner_mode)),
7275 XVECEXP (vals, 0, i));
7276 emit_move_insn (target, mem);
7277 }
7278
7279 /* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX
7280 is variable and also counts by vector element size for p9 and above. */
7281
7282 static void
7283 rs6000_expand_vector_set_var_p9 (rtx target, rtx val, rtx idx)
7284 {
7285 machine_mode mode = GET_MODE (target);
7286
7287 gcc_assert (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (idx));
7288
7289 machine_mode inner_mode = GET_MODE (val);
7290
7291 int width = GET_MODE_SIZE (inner_mode);
7292
7293 gcc_assert (width >= 1 && width <= 8);
7294
7295 int shift = exact_log2 (width);
7296
7297 machine_mode idx_mode = GET_MODE (idx);
7298
7299 machine_mode shift_mode;
7300 rtx (*gen_ashl)(rtx, rtx, rtx);
7301 rtx (*gen_lvsl)(rtx, rtx);
7302 rtx (*gen_lvsr)(rtx, rtx);
7303
7304 if (TARGET_POWERPC64)
7305 {
7306 shift_mode = DImode;
7307 gen_ashl = gen_ashldi3;
7308 gen_lvsl = gen_altivec_lvsl_reg_di;
7309 gen_lvsr = gen_altivec_lvsr_reg_di;
7310 }
7311 else
7312 {
7313 shift_mode = SImode;
7314 gen_ashl = gen_ashlsi3;
7315 gen_lvsl = gen_altivec_lvsl_reg_si;
7316 gen_lvsr = gen_altivec_lvsr_reg_si;
7317 }
7318 /* Generate the IDX for permute shift, width is the vector element size.
7319 idx = idx * width. */
7320 rtx tmp = gen_reg_rtx (shift_mode);
7321 idx = convert_modes (shift_mode, idx_mode, idx, 1);
7322
7323 emit_insn (gen_ashl (tmp, idx, GEN_INT (shift)));
7324
7325 /* lvsr v1,0,idx. */
7326 rtx pcvr = gen_reg_rtx (V16QImode);
7327 emit_insn (gen_lvsr (pcvr, tmp));
7328
7329 /* lvsl v2,0,idx. */
7330 rtx pcvl = gen_reg_rtx (V16QImode);
7331 emit_insn (gen_lvsl (pcvl, tmp));
7332
7333 rtx sub_target = simplify_gen_subreg (V16QImode, target, mode, 0);
7334
7335 rtx permr
7336 = gen_altivec_vperm_v8hiv16qi (sub_target, sub_target, sub_target, pcvr);
7337 emit_insn (permr);
7338
7339 rs6000_expand_vector_set (target, val, const0_rtx);
7340
7341 rtx perml
7342 = gen_altivec_vperm_v8hiv16qi (sub_target, sub_target, sub_target, pcvl);
7343 emit_insn (perml);
7344 }
7345
7346 /* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX
7347 is variable and also counts by vector element size for p7 & p8. */
7348
7349 static void
7350 rs6000_expand_vector_set_var_p7 (rtx target, rtx val, rtx idx)
7351 {
7352 machine_mode mode = GET_MODE (target);
7353
7354 gcc_assert (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (idx));
7355
7356 machine_mode inner_mode = GET_MODE (val);
7357 HOST_WIDE_INT mode_mask = GET_MODE_MASK (inner_mode);
7358
7359 int width = GET_MODE_SIZE (inner_mode);
7360 gcc_assert (width >= 1 && width <= 4);
7361
7362 int shift = exact_log2 (width);
7363
7364 machine_mode idx_mode = GET_MODE (idx);
7365
7366 machine_mode shift_mode;
7367 rtx (*gen_ashl)(rtx, rtx, rtx);
7368 rtx (*gen_add)(rtx, rtx, rtx);
7369 rtx (*gen_sub)(rtx, rtx, rtx);
7370 rtx (*gen_lvsl)(rtx, rtx);
7371
7372 if (TARGET_POWERPC64)
7373 {
7374 shift_mode = DImode;
7375 gen_ashl = gen_ashldi3;
7376 gen_add = gen_adddi3;
7377 gen_sub = gen_subdi3;
7378 gen_lvsl = gen_altivec_lvsl_reg_di;
7379 }
7380 else
7381 {
7382 shift_mode = SImode;
7383 gen_ashl = gen_ashlsi3;
7384 gen_add = gen_addsi3;
7385 gen_sub = gen_subsi3;
7386 gen_lvsl = gen_altivec_lvsl_reg_si;
7387 }
7388
7389 /* idx = idx * width. */
7390 rtx tmp = gen_reg_rtx (shift_mode);
7391 idx = convert_modes (shift_mode, idx_mode, idx, 1);
7392
7393 emit_insn (gen_ashl (tmp, idx, GEN_INT (shift)));
7394
7395 /* For LE: idx = idx + 8. */
7396 if (!BYTES_BIG_ENDIAN)
7397 emit_insn (gen_add (tmp, tmp, GEN_INT (8)));
7398 else
7399 emit_insn (gen_sub (tmp, GEN_INT (24 - width), tmp));
7400
7401 /* lxv vs33, mask.
7402 DImode: 0xffffffffffffffff0000000000000000
7403 SImode: 0x00000000ffffffff0000000000000000
7404 HImode: 0x000000000000ffff0000000000000000.
7405 QImode: 0x00000000000000ff0000000000000000. */
7406 rtx mask = gen_reg_rtx (V16QImode);
7407 rtx mask_v2di = gen_reg_rtx (V2DImode);
7408 rtvec v = rtvec_alloc (2);
7409 if (!BYTES_BIG_ENDIAN)
7410 {
7411 RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, 0);
7412 RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, mode_mask);
7413 }
7414 else
7415 {
7416 RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, mode_mask);
7417 RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, 0);
7418 }
7419 emit_insn (gen_vec_initv2didi (mask_v2di, gen_rtx_PARALLEL (V2DImode, v)));
7420 rtx sub_mask = simplify_gen_subreg (V16QImode, mask_v2di, V2DImode, 0);
7421 emit_insn (gen_rtx_SET (mask, sub_mask));
7422
7423 /* mtvsrd[wz] f0,tmp_val. */
7424 rtx tmp_val = gen_reg_rtx (SImode);
7425 if (inner_mode == E_SFmode)
7426 if (TARGET_DIRECT_MOVE_64BIT)
7427 emit_insn (gen_movsi_from_sf (tmp_val, val));
7428 else
7429 {
7430 rtx stack = rs6000_allocate_stack_temp (SFmode, false, true);
7431 emit_insn (gen_movsf_hardfloat (stack, val));
7432 rtx stack2 = copy_rtx (stack);
7433 PUT_MODE (stack2, SImode);
7434 emit_move_insn (tmp_val, stack2);
7435 }
7436 else
7437 tmp_val = force_reg (SImode, val);
7438
7439 rtx val_v16qi = gen_reg_rtx (V16QImode);
7440 rtx val_v2di = gen_reg_rtx (V2DImode);
7441 rtvec vec_val = rtvec_alloc (2);
7442 if (!BYTES_BIG_ENDIAN)
7443 {
7444 RTVEC_ELT (vec_val, 0) = gen_rtx_CONST_INT (DImode, 0);
7445 RTVEC_ELT (vec_val, 1) = tmp_val;
7446 }
7447 else
7448 {
7449 RTVEC_ELT (vec_val, 0) = tmp_val;
7450 RTVEC_ELT (vec_val, 1) = gen_rtx_CONST_INT (DImode, 0);
7451 }
7452 emit_insn (
7453 gen_vec_initv2didi (val_v2di, gen_rtx_PARALLEL (V2DImode, vec_val)));
7454 rtx sub_val = simplify_gen_subreg (V16QImode, val_v2di, V2DImode, 0);
7455 emit_insn (gen_rtx_SET (val_v16qi, sub_val));
7456
7457 /* lvsl 13,0,idx. */
7458 rtx pcv = gen_reg_rtx (V16QImode);
7459 emit_insn (gen_lvsl (pcv, tmp));
7460
7461 /* vperm 1,1,1,13. */
7462 /* vperm 0,0,0,13. */
7463 rtx val_perm = gen_reg_rtx (V16QImode);
7464 rtx mask_perm = gen_reg_rtx (V16QImode);
7465 emit_insn (gen_altivec_vperm_v8hiv16qi (val_perm, val_v16qi, val_v16qi, pcv));
7466 emit_insn (gen_altivec_vperm_v8hiv16qi (mask_perm, mask, mask, pcv));
7467
7468 rtx target_v16qi = simplify_gen_subreg (V16QImode, target, mode, 0);
7469
7470 /* xxsel 34,34,32,33. */
7471 emit_insn (
7472 gen_vector_select_v16qi (target_v16qi, target_v16qi, val_perm, mask_perm));
7473 }
7474
7475 /* Set field ELT_RTX of TARGET to VAL. */
7476
7477 void
7478 rs6000_expand_vector_set (rtx target, rtx val, rtx elt_rtx)
7479 {
7480 machine_mode mode = GET_MODE (target);
7481 machine_mode inner_mode = GET_MODE_INNER (mode);
7482 rtx reg = gen_reg_rtx (mode);
7483 rtx mask, mem, x;
7484 int width = GET_MODE_SIZE (inner_mode);
7485 int i;
7486
7487 val = force_reg (GET_MODE (val), val);
7488
7489 if (VECTOR_MEM_VSX_P (mode))
7490 {
7491 if (!CONST_INT_P (elt_rtx))
7492 {
7493 /* For V2DI/V2DF, could leverage the P9 version to generate xxpermdi
7494 when elt_rtx is variable. */
7495 if ((TARGET_P9_VECTOR && TARGET_POWERPC64) || width == 8)
7496 {
7497 rs6000_expand_vector_set_var_p9 (target, val, elt_rtx);
7498 return;
7499 }
7500 else if (TARGET_VSX)
7501 {
7502 rs6000_expand_vector_set_var_p7 (target, val, elt_rtx);
7503 return;
7504 }
7505 else
7506 gcc_assert (CONST_INT_P (elt_rtx));
7507 }
7508
7509 rtx insn = NULL_RTX;
7510
7511 if (mode == V2DFmode)
7512 insn = gen_vsx_set_v2df (target, target, val, elt_rtx);
7513
7514 else if (mode == V2DImode)
7515 insn = gen_vsx_set_v2di (target, target, val, elt_rtx);
7516
7517 else if (TARGET_P9_VECTOR && TARGET_POWERPC64)
7518 {
7519 if (mode == V4SImode)
7520 insn = gen_vsx_set_v4si_p9 (target, target, val, elt_rtx);
7521 else if (mode == V8HImode)
7522 insn = gen_vsx_set_v8hi_p9 (target, target, val, elt_rtx);
7523 else if (mode == V16QImode)
7524 insn = gen_vsx_set_v16qi_p9 (target, target, val, elt_rtx);
7525 else if (mode == V4SFmode)
7526 insn = gen_vsx_set_v4sf_p9 (target, target, val, elt_rtx);
7527 }
7528
7529 if (insn)
7530 {
7531 emit_insn (insn);
7532 return;
7533 }
7534 }
7535
7536 /* Simplify setting single element vectors like V1TImode. */
7537 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode)
7538 && INTVAL (elt_rtx) == 0)
7539 {
7540 emit_move_insn (target, gen_lowpart (mode, val));
7541 return;
7542 }
7543
7544 /* Load single variable value. */
7545 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
7546 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
7547 x = gen_rtx_UNSPEC (VOIDmode,
7548 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
7549 emit_insn (gen_rtx_PARALLEL (VOIDmode,
7550 gen_rtvec (2,
7551 gen_rtx_SET (reg, mem),
7552 x)));
7553
7554 /* Linear sequence. */
7555 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
7556 for (i = 0; i < 16; ++i)
7557 XVECEXP (mask, 0, i) = GEN_INT (i);
7558
7559 /* Set permute mask to insert element into target. */
7560 for (i = 0; i < width; ++i)
7561 XVECEXP (mask, 0, INTVAL (elt_rtx) * width + i) = GEN_INT (i + 0x10);
7562 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
7563
7564 if (BYTES_BIG_ENDIAN)
7565 x = gen_rtx_UNSPEC (mode,
7566 gen_rtvec (3, target, reg,
7567 force_reg (V16QImode, x)),
7568 UNSPEC_VPERM);
7569 else
7570 {
7571 if (TARGET_P9_VECTOR)
7572 x = gen_rtx_UNSPEC (mode,
7573 gen_rtvec (3, reg, target,
7574 force_reg (V16QImode, x)),
7575 UNSPEC_VPERMR);
7576 else
7577 {
7578 /* Invert selector. We prefer to generate VNAND on P8 so
7579 that future fusion opportunities can kick in, but must
7580 generate VNOR elsewhere. */
7581 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
7582 rtx iorx = (TARGET_P8_VECTOR
7583 ? gen_rtx_IOR (V16QImode, notx, notx)
7584 : gen_rtx_AND (V16QImode, notx, notx));
7585 rtx tmp = gen_reg_rtx (V16QImode);
7586 emit_insn (gen_rtx_SET (tmp, iorx));
7587
7588 /* Permute with operands reversed and adjusted selector. */
7589 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
7590 UNSPEC_VPERM);
7591 }
7592 }
7593
7594 emit_insn (gen_rtx_SET (target, x));
7595 }
7596
7597 /* Extract field ELT from VEC into TARGET. */
7598
7599 void
7600 rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
7601 {
7602 machine_mode mode = GET_MODE (vec);
7603 machine_mode inner_mode = GET_MODE_INNER (mode);
7604 rtx mem;
7605
7606 if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (elt))
7607 {
7608 switch (mode)
7609 {
7610 default:
7611 break;
7612 case E_V1TImode:
7613 emit_move_insn (target, gen_lowpart (TImode, vec));
7614 break;
7615 case E_V2DFmode:
7616 emit_insn (gen_vsx_extract_v2df (target, vec, elt));
7617 return;
7618 case E_V2DImode:
7619 emit_insn (gen_vsx_extract_v2di (target, vec, elt));
7620 return;
7621 case E_V4SFmode:
7622 emit_insn (gen_vsx_extract_v4sf (target, vec, elt));
7623 return;
7624 case E_V16QImode:
7625 if (TARGET_DIRECT_MOVE_64BIT)
7626 {
7627 emit_insn (gen_vsx_extract_v16qi (target, vec, elt));
7628 return;
7629 }
7630 else
7631 break;
7632 case E_V8HImode:
7633 if (TARGET_DIRECT_MOVE_64BIT)
7634 {
7635 emit_insn (gen_vsx_extract_v8hi (target, vec, elt));
7636 return;
7637 }
7638 else
7639 break;
7640 case E_V4SImode:
7641 if (TARGET_DIRECT_MOVE_64BIT)
7642 {
7643 emit_insn (gen_vsx_extract_v4si (target, vec, elt));
7644 return;
7645 }
7646 break;
7647 }
7648 }
7649 else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt)
7650 && TARGET_DIRECT_MOVE_64BIT)
7651 {
7652 if (GET_MODE (elt) != DImode)
7653 {
7654 rtx tmp = gen_reg_rtx (DImode);
7655 convert_move (tmp, elt, 0);
7656 elt = tmp;
7657 }
7658 else if (!REG_P (elt))
7659 elt = force_reg (DImode, elt);
7660
7661 switch (mode)
7662 {
7663 case E_V1TImode:
7664 emit_move_insn (target, gen_lowpart (TImode, vec));
7665 return;
7666
7667 case E_V2DFmode:
7668 emit_insn (gen_vsx_extract_v2df_var (target, vec, elt));
7669 return;
7670
7671 case E_V2DImode:
7672 emit_insn (gen_vsx_extract_v2di_var (target, vec, elt));
7673 return;
7674
7675 case E_V4SFmode:
7676 emit_insn (gen_vsx_extract_v4sf_var (target, vec, elt));
7677 return;
7678
7679 case E_V4SImode:
7680 emit_insn (gen_vsx_extract_v4si_var (target, vec, elt));
7681 return;
7682
7683 case E_V8HImode:
7684 emit_insn (gen_vsx_extract_v8hi_var (target, vec, elt));
7685 return;
7686
7687 case E_V16QImode:
7688 emit_insn (gen_vsx_extract_v16qi_var (target, vec, elt));
7689 return;
7690
7691 default:
7692 gcc_unreachable ();
7693 }
7694 }
7695
7696 /* Allocate mode-sized buffer. */
7697 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7698
7699 emit_move_insn (mem, vec);
7700 if (CONST_INT_P (elt))
7701 {
7702 int modulo_elt = INTVAL (elt) % GET_MODE_NUNITS (mode);
7703
7704 /* Add offset to field within buffer matching vector element. */
7705 mem = adjust_address_nv (mem, inner_mode,
7706 modulo_elt * GET_MODE_SIZE (inner_mode));
7707 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
7708 }
7709 else
7710 {
7711 unsigned int ele_size = GET_MODE_SIZE (inner_mode);
7712 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1);
7713 rtx new_addr = gen_reg_rtx (Pmode);
7714
7715 elt = gen_rtx_AND (Pmode, elt, num_ele_m1);
7716 if (ele_size > 1)
7717 elt = gen_rtx_MULT (Pmode, elt, GEN_INT (ele_size));
7718 new_addr = gen_rtx_PLUS (Pmode, XEXP (mem, 0), elt);
7719 new_addr = change_address (mem, inner_mode, new_addr);
7720 emit_move_insn (target, new_addr);
7721 }
7722 }
7723
7724 /* Return the offset within a memory object (MEM) of a vector type to a given
7725 element within the vector (ELEMENT) with an element size (SCALAR_SIZE). If
7726 the element is constant, we return a constant integer.
7727
7728 Otherwise, we use a base register temporary to calculate the offset after
7729 masking it to fit within the bounds of the vector and scaling it. The
7730 masking is required by the 64-bit ELF version 2 ABI for the vec_extract
7731 built-in function. */
7732
7733 static rtx
7734 get_vector_offset (rtx mem, rtx element, rtx base_tmp, unsigned scalar_size)
7735 {
7736 if (CONST_INT_P (element))
7737 return GEN_INT (INTVAL (element) * scalar_size);
7738
7739 /* All insns should use the 'Q' constraint (address is a single register) if
7740 the element number is not a constant. */
7741 gcc_assert (satisfies_constraint_Q (mem));
7742
7743 /* Mask the element to make sure the element number is between 0 and the
7744 maximum number of elements - 1 so that we don't generate an address
7745 outside the vector. */
7746 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (GET_MODE (mem)) - 1);
7747 rtx and_op = gen_rtx_AND (Pmode, element, num_ele_m1);
7748 emit_insn (gen_rtx_SET (base_tmp, and_op));
7749
7750 /* Shift the element to get the byte offset from the element number. */
7751 int shift = exact_log2 (scalar_size);
7752 gcc_assert (shift >= 0);
7753
7754 if (shift > 0)
7755 {
7756 rtx shift_op = gen_rtx_ASHIFT (Pmode, base_tmp, GEN_INT (shift));
7757 emit_insn (gen_rtx_SET (base_tmp, shift_op));
7758 }
7759
7760 return base_tmp;
7761 }
7762
7763 /* Helper function update PC-relative addresses when we are adjusting a memory
7764 address (ADDR) to a vector to point to a scalar field within the vector with
7765 a constant offset (ELEMENT_OFFSET). If the address is not valid, we can
7766 use the base register temporary (BASE_TMP) to form the address. */
7767
7768 static rtx
7769 adjust_vec_address_pcrel (rtx addr, rtx element_offset, rtx base_tmp)
7770 {
7771 rtx new_addr = NULL;
7772
7773 gcc_assert (CONST_INT_P (element_offset));
7774
7775 if (GET_CODE (addr) == CONST)
7776 addr = XEXP (addr, 0);
7777
7778 if (GET_CODE (addr) == PLUS)
7779 {
7780 rtx op0 = XEXP (addr, 0);
7781 rtx op1 = XEXP (addr, 1);
7782
7783 if (CONST_INT_P (op1))
7784 {
7785 HOST_WIDE_INT offset
7786 = INTVAL (XEXP (addr, 1)) + INTVAL (element_offset);
7787
7788 if (offset == 0)
7789 new_addr = op0;
7790
7791 else
7792 {
7793 rtx plus = gen_rtx_PLUS (Pmode, op0, GEN_INT (offset));
7794 new_addr = gen_rtx_CONST (Pmode, plus);
7795 }
7796 }
7797
7798 else
7799 {
7800 emit_move_insn (base_tmp, addr);
7801 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7802 }
7803 }
7804
7805 else if (SYMBOL_REF_P (addr) || LABEL_REF_P (addr))
7806 {
7807 rtx plus = gen_rtx_PLUS (Pmode, addr, element_offset);
7808 new_addr = gen_rtx_CONST (Pmode, plus);
7809 }
7810
7811 else
7812 gcc_unreachable ();
7813
7814 return new_addr;
7815 }
7816
7817 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
7818 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
7819 temporary (BASE_TMP) to fixup the address. Return the new memory address
7820 that is valid for reads or writes to a given register (SCALAR_REG).
7821
7822 This function is expected to be called after reload is completed when we are
7823 splitting insns. The temporary BASE_TMP might be set multiple times with
7824 this code. */
7825
7826 rtx
7827 rs6000_adjust_vec_address (rtx scalar_reg,
7828 rtx mem,
7829 rtx element,
7830 rtx base_tmp,
7831 machine_mode scalar_mode)
7832 {
7833 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7834 rtx addr = XEXP (mem, 0);
7835 rtx new_addr;
7836
7837 gcc_assert (!reg_mentioned_p (base_tmp, addr));
7838 gcc_assert (!reg_mentioned_p (base_tmp, element));
7839
7840 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
7841 gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
7842
7843 /* Calculate what we need to add to the address to get the element
7844 address. */
7845 rtx element_offset = get_vector_offset (mem, element, base_tmp, scalar_size);
7846
7847 /* Create the new address pointing to the element within the vector. If we
7848 are adding 0, we don't have to change the address. */
7849 if (element_offset == const0_rtx)
7850 new_addr = addr;
7851
7852 /* A simple indirect address can be converted into a reg + offset
7853 address. */
7854 else if (REG_P (addr) || SUBREG_P (addr))
7855 new_addr = gen_rtx_PLUS (Pmode, addr, element_offset);
7856
7857 /* For references to local static variables, fold a constant offset into the
7858 address. */
7859 else if (pcrel_local_address (addr, Pmode) && CONST_INT_P (element_offset))
7860 new_addr = adjust_vec_address_pcrel (addr, element_offset, base_tmp);
7861
7862 /* Optimize D-FORM addresses with constant offset with a constant element, to
7863 include the element offset in the address directly. */
7864 else if (GET_CODE (addr) == PLUS)
7865 {
7866 rtx op0 = XEXP (addr, 0);
7867 rtx op1 = XEXP (addr, 1);
7868
7869 gcc_assert (REG_P (op0) || SUBREG_P (op0));
7870 if (CONST_INT_P (op1) && CONST_INT_P (element_offset))
7871 {
7872 /* op0 should never be r0, because r0+offset is not valid. But it
7873 doesn't hurt to make sure it is not r0. */
7874 gcc_assert (reg_or_subregno (op0) != 0);
7875
7876 /* D-FORM address with constant element number. */
7877 HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset);
7878 rtx offset_rtx = GEN_INT (offset);
7879 new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
7880 }
7881 else
7882 {
7883 /* If we don't have a D-FORM address with a constant element number,
7884 add the two elements in the current address. Then add the offset.
7885
7886 Previously, we tried to add the offset to OP1 and change the
7887 address to an X-FORM format adding OP0 and BASE_TMP, but it became
7888 complicated because we had to verify that op1 was not GPR0 and we
7889 had a constant element offset (due to the way ADDI is defined).
7890 By doing the add of OP0 and OP1 first, and then adding in the
7891 offset, it has the benefit that if D-FORM instructions are
7892 allowed, the offset is part of the memory access to the vector
7893 element. */
7894 emit_insn (gen_rtx_SET (base_tmp, gen_rtx_PLUS (Pmode, op0, op1)));
7895 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7896 }
7897 }
7898
7899 else
7900 {
7901 emit_move_insn (base_tmp, addr);
7902 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7903 }
7904
7905 /* If the address isn't valid, move the address into the temporary base
7906 register. Some reasons it could not be valid include:
7907
7908 The address offset overflowed the 16 or 34 bit offset size;
7909 We need to use a DS-FORM load, and the bottom 2 bits are non-zero;
7910 We need to use a DQ-FORM load, and the bottom 4 bits are non-zero;
7911 Only X_FORM loads can be done, and the address is D_FORM. */
7912
7913 enum insn_form iform
7914 = address_to_insn_form (new_addr, scalar_mode,
7915 reg_to_non_prefixed (scalar_reg, scalar_mode));
7916
7917 if (iform == INSN_FORM_BAD)
7918 {
7919 emit_move_insn (base_tmp, new_addr);
7920 new_addr = base_tmp;
7921 }
7922
7923 return change_address (mem, scalar_mode, new_addr);
7924 }
7925
7926 /* Split a variable vec_extract operation into the component instructions. */
7927
7928 void
7929 rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
7930 rtx tmp_altivec)
7931 {
7932 machine_mode mode = GET_MODE (src);
7933 machine_mode scalar_mode = GET_MODE_INNER (GET_MODE (src));
7934 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7935 int byte_shift = exact_log2 (scalar_size);
7936
7937 gcc_assert (byte_shift >= 0);
7938
7939 /* If we are given a memory address, optimize to load just the element. We
7940 don't have to adjust the vector element number on little endian
7941 systems. */
7942 if (MEM_P (src))
7943 {
7944 emit_move_insn (dest,
7945 rs6000_adjust_vec_address (dest, src, element, tmp_gpr,
7946 scalar_mode));
7947 return;
7948 }
7949
7950 else if (REG_P (src) || SUBREG_P (src))
7951 {
7952 int num_elements = GET_MODE_NUNITS (mode);
7953 int bits_in_element = mode_to_bits (GET_MODE_INNER (mode));
7954 int bit_shift = 7 - exact_log2 (num_elements);
7955 rtx element2;
7956 unsigned int dest_regno = reg_or_subregno (dest);
7957 unsigned int src_regno = reg_or_subregno (src);
7958 unsigned int element_regno = reg_or_subregno (element);
7959
7960 gcc_assert (REG_P (tmp_gpr));
7961
7962 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
7963 a general purpose register. */
7964 if (TARGET_P9_VECTOR
7965 && (mode == V16QImode || mode == V8HImode || mode == V4SImode)
7966 && INT_REGNO_P (dest_regno)
7967 && ALTIVEC_REGNO_P (src_regno)
7968 && INT_REGNO_P (element_regno))
7969 {
7970 rtx dest_si = gen_rtx_REG (SImode, dest_regno);
7971 rtx element_si = gen_rtx_REG (SImode, element_regno);
7972
7973 if (mode == V16QImode)
7974 emit_insn (BYTES_BIG_ENDIAN
7975 ? gen_vextublx (dest_si, element_si, src)
7976 : gen_vextubrx (dest_si, element_si, src));
7977
7978 else if (mode == V8HImode)
7979 {
7980 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7981 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const1_rtx));
7982 emit_insn (BYTES_BIG_ENDIAN
7983 ? gen_vextuhlx (dest_si, tmp_gpr_si, src)
7984 : gen_vextuhrx (dest_si, tmp_gpr_si, src));
7985 }
7986
7987
7988 else
7989 {
7990 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7991 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const2_rtx));
7992 emit_insn (BYTES_BIG_ENDIAN
7993 ? gen_vextuwlx (dest_si, tmp_gpr_si, src)
7994 : gen_vextuwrx (dest_si, tmp_gpr_si, src));
7995 }
7996
7997 return;
7998 }
7999
8000
8001 gcc_assert (REG_P (tmp_altivec));
8002
8003 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
8004 an XOR, otherwise we need to subtract. The shift amount is so VSLO
8005 will shift the element into the upper position (adding 3 to convert a
8006 byte shift into a bit shift). */
8007 if (scalar_size == 8)
8008 {
8009 if (!BYTES_BIG_ENDIAN)
8010 {
8011 emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx));
8012 element2 = tmp_gpr;
8013 }
8014 else
8015 element2 = element;
8016
8017 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
8018 bit. */
8019 emit_insn (gen_rtx_SET (tmp_gpr,
8020 gen_rtx_AND (DImode,
8021 gen_rtx_ASHIFT (DImode,
8022 element2,
8023 GEN_INT (6)),
8024 GEN_INT (64))));
8025 }
8026 else
8027 {
8028 if (!BYTES_BIG_ENDIAN)
8029 {
8030 rtx num_ele_m1 = GEN_INT (num_elements - 1);
8031
8032 emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1));
8033 emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr));
8034 element2 = tmp_gpr;
8035 }
8036 else
8037 element2 = element;
8038
8039 emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift)));
8040 }
8041
8042 /* Get the value into the lower byte of the Altivec register where VSLO
8043 expects it. */
8044 if (TARGET_P9_VECTOR)
8045 emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr));
8046 else if (can_create_pseudo_p ())
8047 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr));
8048 else
8049 {
8050 rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
8051 emit_move_insn (tmp_di, tmp_gpr);
8052 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di));
8053 }
8054
8055 /* Do the VSLO to get the value into the final location. */
8056 switch (mode)
8057 {
8058 case E_V2DFmode:
8059 emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec));
8060 return;
8061
8062 case E_V2DImode:
8063 emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec));
8064 return;
8065
8066 case E_V4SFmode:
8067 {
8068 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
8069 rtx tmp_altivec_v4sf = gen_rtx_REG (V4SFmode, REGNO (tmp_altivec));
8070 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
8071 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
8072 tmp_altivec));
8073
8074 emit_insn (gen_vsx_xscvspdp_scalar2 (dest, tmp_altivec_v4sf));
8075 return;
8076 }
8077
8078 case E_V4SImode:
8079 case E_V8HImode:
8080 case E_V16QImode:
8081 {
8082 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
8083 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
8084 rtx tmp_gpr_di = gen_rtx_REG (DImode, REGNO (dest));
8085 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
8086 tmp_altivec));
8087 emit_move_insn (tmp_gpr_di, tmp_altivec_di);
8088 emit_insn (gen_lshrdi3 (tmp_gpr_di, tmp_gpr_di,
8089 GEN_INT (64 - bits_in_element)));
8090 return;
8091 }
8092
8093 default:
8094 gcc_unreachable ();
8095 }
8096
8097 return;
8098 }
8099 else
8100 gcc_unreachable ();
8101 }
8102
8103 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
8104 selects whether the alignment is abi mandated, optional, or
8105 both abi and optional alignment. */
8106
8107 unsigned int
8108 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
8109 {
8110 if (how != align_opt)
8111 {
8112 if (TREE_CODE (type) == VECTOR_TYPE && align < 128)
8113 align = 128;
8114 }
8115
8116 if (how != align_abi)
8117 {
8118 if (TREE_CODE (type) == ARRAY_TYPE
8119 && TYPE_MODE (TREE_TYPE (type)) == QImode)
8120 {
8121 if (align < BITS_PER_WORD)
8122 align = BITS_PER_WORD;
8123 }
8124 }
8125
8126 return align;
8127 }
8128
8129 /* Implement TARGET_SLOW_UNALIGNED_ACCESS. Altivec vector memory
8130 instructions simply ignore the low bits; VSX memory instructions
8131 are aligned to 4 or 8 bytes. */
8132
8133 static bool
8134 rs6000_slow_unaligned_access (machine_mode mode, unsigned int align)
8135 {
8136 return (STRICT_ALIGNMENT
8137 || (!TARGET_EFFICIENT_UNALIGNED_VSX
8138 && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && align < 32)
8139 || ((VECTOR_MODE_P (mode) || VECTOR_ALIGNMENT_P (mode))
8140 && (int) align < VECTOR_ALIGN (mode)))));
8141 }
8142
8143 /* AIX word-aligns FP doubles but doubleword-aligns 64-bit ints. */
8144
8145 unsigned int
8146 rs6000_special_adjust_field_align (tree type, unsigned int computed)
8147 {
8148 if (computed <= 32 || TYPE_PACKED (type))
8149 return computed;
8150
8151 /* Strip initial arrays. */
8152 while (TREE_CODE (type) == ARRAY_TYPE)
8153 type = TREE_TYPE (type);
8154
8155 /* If RECORD or UNION, recursively find the first field. */
8156 while (AGGREGATE_TYPE_P (type))
8157 {
8158 tree field = TYPE_FIELDS (type);
8159
8160 /* Skip all non field decls */
8161 while (field != NULL
8162 && (TREE_CODE (field) != FIELD_DECL
8163 || DECL_FIELD_ABI_IGNORED (field)))
8164 field = DECL_CHAIN (field);
8165
8166 if (! field)
8167 break;
8168
8169 /* A packed field does not contribute any extra alignment. */
8170 if (DECL_PACKED (field))
8171 return computed;
8172
8173 type = TREE_TYPE (field);
8174
8175 /* Strip arrays. */
8176 while (TREE_CODE (type) == ARRAY_TYPE)
8177 type = TREE_TYPE (type);
8178 }
8179
8180 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node
8181 && (TYPE_MODE (type) == DFmode || TYPE_MODE (type) == DCmode))
8182 computed = MIN (computed, 32);
8183
8184 return computed;
8185 }
8186
8187 /* AIX increases natural record alignment to doubleword if the innermost first
8188 field is an FP double while the FP fields remain word aligned.
8189 Only called if TYPE initially is a RECORD or UNION. */
8190
8191 unsigned int
8192 rs6000_special_round_type_align (tree type, unsigned int computed,
8193 unsigned int specified)
8194 {
8195 unsigned int align = MAX (computed, specified);
8196
8197 if (TYPE_PACKED (type) || align >= 64)
8198 return align;
8199
8200 /* If RECORD or UNION, recursively find the first field. */
8201 do
8202 {
8203 tree field = TYPE_FIELDS (type);
8204
8205 /* Skip all non field decls */
8206 while (field != NULL
8207 && (TREE_CODE (field) != FIELD_DECL
8208 || DECL_FIELD_ABI_IGNORED (field)))
8209 field = DECL_CHAIN (field);
8210
8211 if (! field)
8212 break;
8213
8214 /* A packed field does not contribute any extra alignment. */
8215 if (DECL_PACKED (field))
8216 return align;
8217
8218 type = TREE_TYPE (field);
8219
8220 /* Strip arrays. */
8221 while (TREE_CODE (type) == ARRAY_TYPE)
8222 type = TREE_TYPE (type);
8223 } while (AGGREGATE_TYPE_P (type));
8224
8225 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node
8226 && (TYPE_MODE (type) == DFmode || TYPE_MODE (type) == DCmode))
8227 align = MAX (align, 64);
8228
8229 return align;
8230 }
8231
8232 /* Darwin increases record alignment to the natural alignment of
8233 the first field. */
8234
8235 unsigned int
8236 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
8237 unsigned int specified)
8238 {
8239 unsigned int align = MAX (computed, specified);
8240
8241 if (TYPE_PACKED (type))
8242 return align;
8243
8244 /* Find the first field, looking down into aggregates. */
8245 do {
8246 tree field = TYPE_FIELDS (type);
8247 /* Skip all non field decls */
8248 while (field != NULL
8249 && (TREE_CODE (field) != FIELD_DECL
8250 || DECL_FIELD_ABI_IGNORED (field)))
8251 field = DECL_CHAIN (field);
8252 if (! field)
8253 break;
8254 /* A packed field does not contribute any extra alignment. */
8255 if (DECL_PACKED (field))
8256 return align;
8257 type = TREE_TYPE (field);
8258 while (TREE_CODE (type) == ARRAY_TYPE)
8259 type = TREE_TYPE (type);
8260 } while (AGGREGATE_TYPE_P (type));
8261
8262 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
8263 align = MAX (align, TYPE_ALIGN (type));
8264
8265 return align;
8266 }
8267
8268 /* Return 1 for an operand in small memory on V.4/eabi. */
8269
8270 int
8271 small_data_operand (rtx op ATTRIBUTE_UNUSED,
8272 machine_mode mode ATTRIBUTE_UNUSED)
8273 {
8274 #if TARGET_ELF
8275 rtx sym_ref;
8276
8277 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
8278 return 0;
8279
8280 if (DEFAULT_ABI != ABI_V4)
8281 return 0;
8282
8283 if (SYMBOL_REF_P (op))
8284 sym_ref = op;
8285
8286 else if (GET_CODE (op) != CONST
8287 || GET_CODE (XEXP (op, 0)) != PLUS
8288 || !SYMBOL_REF_P (XEXP (XEXP (op, 0), 0))
8289 || !CONST_INT_P (XEXP (XEXP (op, 0), 1)))
8290 return 0;
8291
8292 else
8293 {
8294 rtx sum = XEXP (op, 0);
8295 HOST_WIDE_INT summand;
8296
8297 /* We have to be careful here, because it is the referenced address
8298 that must be 32k from _SDA_BASE_, not just the symbol. */
8299 summand = INTVAL (XEXP (sum, 1));
8300 if (summand < 0 || summand > g_switch_value)
8301 return 0;
8302
8303 sym_ref = XEXP (sum, 0);
8304 }
8305
8306 return SYMBOL_REF_SMALL_P (sym_ref);
8307 #else
8308 return 0;
8309 #endif
8310 }
8311
8312 /* Return true if either operand is a general purpose register. */
8313
8314 bool
8315 gpr_or_gpr_p (rtx op0, rtx op1)
8316 {
8317 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
8318 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
8319 }
8320
8321 /* Return true if this is a move direct operation between GPR registers and
8322 floating point/VSX registers. */
8323
8324 bool
8325 direct_move_p (rtx op0, rtx op1)
8326 {
8327 if (!REG_P (op0) || !REG_P (op1))
8328 return false;
8329
8330 if (!TARGET_DIRECT_MOVE)
8331 return false;
8332
8333 int regno0 = REGNO (op0);
8334 int regno1 = REGNO (op1);
8335 if (!HARD_REGISTER_NUM_P (regno0) || !HARD_REGISTER_NUM_P (regno1))
8336 return false;
8337
8338 if (INT_REGNO_P (regno0) && VSX_REGNO_P (regno1))
8339 return true;
8340
8341 if (VSX_REGNO_P (regno0) && INT_REGNO_P (regno1))
8342 return true;
8343
8344 return false;
8345 }
8346
8347 /* Return true if the ADDR is an acceptable address for a quad memory
8348 operation of mode MODE (either LQ/STQ for general purpose registers, or
8349 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
8350 is intended for LQ/STQ. If it is false, the address is intended for the ISA
8351 3.0 LXV/STXV instruction. */
8352
8353 bool
8354 quad_address_p (rtx addr, machine_mode mode, bool strict)
8355 {
8356 rtx op0, op1;
8357
8358 if (GET_MODE_SIZE (mode) < 16)
8359 return false;
8360
8361 if (legitimate_indirect_address_p (addr, strict))
8362 return true;
8363
8364 if (VECTOR_MODE_P (mode) && !mode_supports_dq_form (mode))
8365 return false;
8366
8367 /* Is this a valid prefixed address? If the bottom four bits of the offset
8368 are non-zero, we could use a prefixed instruction (which does not have the
8369 DQ-form constraint that the traditional instruction had) instead of
8370 forcing the unaligned offset to a GPR. */
8371 if (address_is_prefixed (addr, mode, NON_PREFIXED_DQ))
8372 return true;
8373
8374 if (GET_CODE (addr) != PLUS)
8375 return false;
8376
8377 op0 = XEXP (addr, 0);
8378 if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, strict))
8379 return false;
8380
8381 op1 = XEXP (addr, 1);
8382 if (!CONST_INT_P (op1))
8383 return false;
8384
8385 return quad_address_offset_p (INTVAL (op1));
8386 }
8387
8388 /* Return true if this is a load or store quad operation. This function does
8389 not handle the atomic quad memory instructions. */
8390
8391 bool
8392 quad_load_store_p (rtx op0, rtx op1)
8393 {
8394 bool ret;
8395
8396 if (!TARGET_QUAD_MEMORY)
8397 ret = false;
8398
8399 else if (REG_P (op0) && MEM_P (op1))
8400 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
8401 && quad_memory_operand (op1, GET_MODE (op1))
8402 && !reg_overlap_mentioned_p (op0, op1));
8403
8404 else if (MEM_P (op0) && REG_P (op1))
8405 ret = (quad_memory_operand (op0, GET_MODE (op0))
8406 && quad_int_reg_operand (op1, GET_MODE (op1)));
8407
8408 else
8409 ret = false;
8410
8411 if (TARGET_DEBUG_ADDR)
8412 {
8413 fprintf (stderr, "\n========== quad_load_store, return %s\n",
8414 ret ? "true" : "false");
8415 debug_rtx (gen_rtx_SET (op0, op1));
8416 }
8417
8418 return ret;
8419 }
8420
8421 /* Given an address, return a constant offset term if one exists. */
8422
8423 static rtx
8424 address_offset (rtx op)
8425 {
8426 if (GET_CODE (op) == PRE_INC
8427 || GET_CODE (op) == PRE_DEC)
8428 op = XEXP (op, 0);
8429 else if (GET_CODE (op) == PRE_MODIFY
8430 || GET_CODE (op) == LO_SUM)
8431 op = XEXP (op, 1);
8432
8433 if (GET_CODE (op) == CONST)
8434 op = XEXP (op, 0);
8435
8436 if (GET_CODE (op) == PLUS)
8437 op = XEXP (op, 1);
8438
8439 if (CONST_INT_P (op))
8440 return op;
8441
8442 return NULL_RTX;
8443 }
8444
8445 /* This tests that a lo_sum {constant, symbol, symbol+offset} is valid for
8446 the mode. If we can't find (or don't know) the alignment of the symbol
8447 we assume (optimistically) that it's sufficiently aligned [??? maybe we
8448 should be pessimistic]. Offsets are validated in the same way as for
8449 reg + offset. */
8450 static bool
8451 darwin_rs6000_legitimate_lo_sum_const_p (rtx x, machine_mode mode)
8452 {
8453 /* We should not get here with this. */
8454 gcc_checking_assert (! mode_supports_dq_form (mode));
8455
8456 if (GET_CODE (x) == CONST)
8457 x = XEXP (x, 0);
8458
8459 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_MACHOPIC_OFFSET)
8460 x = XVECEXP (x, 0, 0);
8461
8462 rtx sym = NULL_RTX;
8463 unsigned HOST_WIDE_INT offset = 0;
8464
8465 if (GET_CODE (x) == PLUS)
8466 {
8467 sym = XEXP (x, 0);
8468 if (! SYMBOL_REF_P (sym))
8469 return false;
8470 if (!CONST_INT_P (XEXP (x, 1)))
8471 return false;
8472 offset = INTVAL (XEXP (x, 1));
8473 }
8474 else if (SYMBOL_REF_P (x))
8475 sym = x;
8476 else if (CONST_INT_P (x))
8477 offset = INTVAL (x);
8478 else if (GET_CODE (x) == LABEL_REF)
8479 offset = 0; // We assume code labels are Pmode aligned
8480 else
8481 return false; // not sure what we have here.
8482
8483 /* If we don't know the alignment of the thing to which the symbol refers,
8484 we assume optimistically it is "enough".
8485 ??? maybe we should be pessimistic instead. */
8486 unsigned align = 0;
8487
8488 if (sym)
8489 {
8490 tree decl = SYMBOL_REF_DECL (sym);
8491 #if TARGET_MACHO
8492 if (MACHO_SYMBOL_INDIRECTION_P (sym))
8493 /* The decl in an indirection symbol is the original one, which might
8494 be less aligned than the indirection. Our indirections are always
8495 pointer-aligned. */
8496 ;
8497 else
8498 #endif
8499 if (decl && DECL_ALIGN (decl))
8500 align = DECL_ALIGN_UNIT (decl);
8501 }
8502
8503 unsigned int extra = 0;
8504 switch (mode)
8505 {
8506 case E_DFmode:
8507 case E_DDmode:
8508 case E_DImode:
8509 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8510 addressing. */
8511 if (VECTOR_MEM_VSX_P (mode))
8512 return false;
8513
8514 if (!TARGET_POWERPC64)
8515 extra = 4;
8516 else if ((offset & 3) || (align & 3))
8517 return false;
8518 break;
8519
8520 case E_TFmode:
8521 case E_IFmode:
8522 case E_KFmode:
8523 case E_TDmode:
8524 case E_TImode:
8525 case E_PTImode:
8526 extra = 8;
8527 if (!TARGET_POWERPC64)
8528 extra = 12;
8529 else if ((offset & 3) || (align & 3))
8530 return false;
8531 break;
8532
8533 default:
8534 break;
8535 }
8536
8537 /* We only care if the access(es) would cause a change to the high part. */
8538 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
8539 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8540 }
8541
8542 /* Return true if the MEM operand is a memory operand suitable for use
8543 with a (full width, possibly multiple) gpr load/store. On
8544 powerpc64 this means the offset must be divisible by 4.
8545 Implements 'Y' constraint.
8546
8547 Accept direct, indexed, offset, lo_sum and tocref. Since this is
8548 a constraint function we know the operand has satisfied a suitable
8549 memory predicate.
8550
8551 Offsetting a lo_sum should not be allowed, except where we know by
8552 alignment that a 32k boundary is not crossed. Note that by
8553 "offsetting" here we mean a further offset to access parts of the
8554 MEM. It's fine to have a lo_sum where the inner address is offset
8555 from a sym, since the same sym+offset will appear in the high part
8556 of the address calculation. */
8557
8558 bool
8559 mem_operand_gpr (rtx op, machine_mode mode)
8560 {
8561 unsigned HOST_WIDE_INT offset;
8562 int extra;
8563 rtx addr = XEXP (op, 0);
8564
8565 /* PR85755: Allow PRE_INC and PRE_DEC addresses. */
8566 if (TARGET_UPDATE
8567 && (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
8568 && mode_supports_pre_incdec_p (mode)
8569 && legitimate_indirect_address_p (XEXP (addr, 0), false))
8570 return true;
8571
8572 /* Allow prefixed instructions if supported. If the bottom two bits of the
8573 offset are non-zero, we could use a prefixed instruction (which does not
8574 have the DS-form constraint that the traditional instruction had) instead
8575 of forcing the unaligned offset to a GPR. */
8576 if (address_is_prefixed (addr, mode, NON_PREFIXED_DS))
8577 return true;
8578
8579 /* We need to look through Mach-O PIC unspecs to determine if a lo_sum is
8580 really OK. Doing this early avoids teaching all the other machinery
8581 about them. */
8582 if (TARGET_MACHO && GET_CODE (addr) == LO_SUM)
8583 return darwin_rs6000_legitimate_lo_sum_const_p (XEXP (addr, 1), mode);
8584
8585 /* Only allow offsettable addresses. See PRs 83969 and 84279. */
8586 if (!rs6000_offsettable_memref_p (op, mode, false))
8587 return false;
8588
8589 op = address_offset (addr);
8590 if (op == NULL_RTX)
8591 return true;
8592
8593 offset = INTVAL (op);
8594 if (TARGET_POWERPC64 && (offset & 3) != 0)
8595 return false;
8596
8597 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8598 if (extra < 0)
8599 extra = 0;
8600
8601 if (GET_CODE (addr) == LO_SUM)
8602 /* For lo_sum addresses, we must allow any offset except one that
8603 causes a wrap, so test only the low 16 bits. */
8604 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
8605
8606 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8607 }
8608
8609 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
8610 enforce an offset divisible by 4 even for 32-bit. */
8611
8612 bool
8613 mem_operand_ds_form (rtx op, machine_mode mode)
8614 {
8615 unsigned HOST_WIDE_INT offset;
8616 int extra;
8617 rtx addr = XEXP (op, 0);
8618
8619 /* Allow prefixed instructions if supported. If the bottom two bits of the
8620 offset are non-zero, we could use a prefixed instruction (which does not
8621 have the DS-form constraint that the traditional instruction had) instead
8622 of forcing the unaligned offset to a GPR. */
8623 if (address_is_prefixed (addr, mode, NON_PREFIXED_DS))
8624 return true;
8625
8626 if (!offsettable_address_p (false, mode, addr))
8627 return false;
8628
8629 op = address_offset (addr);
8630 if (op == NULL_RTX)
8631 return true;
8632
8633 offset = INTVAL (op);
8634 if ((offset & 3) != 0)
8635 return false;
8636
8637 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8638 if (extra < 0)
8639 extra = 0;
8640
8641 if (GET_CODE (addr) == LO_SUM)
8642 /* For lo_sum addresses, we must allow any offset except one that
8643 causes a wrap, so test only the low 16 bits. */
8644 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
8645
8646 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8647 }
8648 \f
8649 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
8650
8651 static bool
8652 reg_offset_addressing_ok_p (machine_mode mode)
8653 {
8654 switch (mode)
8655 {
8656 case E_V16QImode:
8657 case E_V8HImode:
8658 case E_V4SFmode:
8659 case E_V4SImode:
8660 case E_V2DFmode:
8661 case E_V2DImode:
8662 case E_V1TImode:
8663 case E_TImode:
8664 case E_TFmode:
8665 case E_KFmode:
8666 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
8667 ISA 3.0 vector d-form addressing mode was added. While TImode is not
8668 a vector mode, if we want to use the VSX registers to move it around,
8669 we need to restrict ourselves to reg+reg addressing. Similarly for
8670 IEEE 128-bit floating point that is passed in a single vector
8671 register. */
8672 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
8673 return mode_supports_dq_form (mode);
8674 break;
8675
8676 /* The vector pair/quad types support offset addressing if the
8677 underlying vectors support offset addressing. */
8678 case E_OOmode:
8679 case E_XOmode:
8680 return TARGET_MMA;
8681
8682 case E_SDmode:
8683 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
8684 addressing for the LFIWZX and STFIWX instructions. */
8685 if (TARGET_NO_SDMODE_STACK)
8686 return false;
8687 break;
8688
8689 default:
8690 break;
8691 }
8692
8693 return true;
8694 }
8695
8696 static bool
8697 virtual_stack_registers_memory_p (rtx op)
8698 {
8699 int regnum;
8700
8701 if (REG_P (op))
8702 regnum = REGNO (op);
8703
8704 else if (GET_CODE (op) == PLUS
8705 && REG_P (XEXP (op, 0))
8706 && CONST_INT_P (XEXP (op, 1)))
8707 regnum = REGNO (XEXP (op, 0));
8708
8709 else
8710 return false;
8711
8712 return (regnum >= FIRST_VIRTUAL_REGISTER
8713 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
8714 }
8715
8716 /* Return true if a MODE sized memory accesses to OP plus OFFSET
8717 is known to not straddle a 32k boundary. This function is used
8718 to determine whether -mcmodel=medium code can use TOC pointer
8719 relative addressing for OP. This means the alignment of the TOC
8720 pointer must also be taken into account, and unfortunately that is
8721 only 8 bytes. */
8722
8723 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
8724 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
8725 #endif
8726
8727 static bool
8728 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
8729 machine_mode mode)
8730 {
8731 tree decl;
8732 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
8733
8734 if (!SYMBOL_REF_P (op))
8735 return false;
8736
8737 /* ISA 3.0 vector d-form addressing is restricted, don't allow
8738 SYMBOL_REF. */
8739 if (mode_supports_dq_form (mode))
8740 return false;
8741
8742 dsize = GET_MODE_SIZE (mode);
8743 decl = SYMBOL_REF_DECL (op);
8744 if (!decl)
8745 {
8746 if (dsize == 0)
8747 return false;
8748
8749 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
8750 replacing memory addresses with an anchor plus offset. We
8751 could find the decl by rummaging around in the block->objects
8752 VEC for the given offset but that seems like too much work. */
8753 dalign = BITS_PER_UNIT;
8754 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
8755 && SYMBOL_REF_ANCHOR_P (op)
8756 && SYMBOL_REF_BLOCK (op) != NULL)
8757 {
8758 struct object_block *block = SYMBOL_REF_BLOCK (op);
8759
8760 dalign = block->alignment;
8761 offset += SYMBOL_REF_BLOCK_OFFSET (op);
8762 }
8763 else if (CONSTANT_POOL_ADDRESS_P (op))
8764 {
8765 /* It would be nice to have get_pool_align().. */
8766 machine_mode cmode = get_pool_mode (op);
8767
8768 dalign = GET_MODE_ALIGNMENT (cmode);
8769 }
8770 }
8771 else if (DECL_P (decl))
8772 {
8773 dalign = DECL_ALIGN (decl);
8774
8775 if (dsize == 0)
8776 {
8777 /* Allow BLKmode when the entire object is known to not
8778 cross a 32k boundary. */
8779 if (!DECL_SIZE_UNIT (decl))
8780 return false;
8781
8782 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
8783 return false;
8784
8785 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
8786 if (dsize > 32768)
8787 return false;
8788
8789 dalign /= BITS_PER_UNIT;
8790 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8791 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8792 return dalign >= dsize;
8793 }
8794 }
8795 else
8796 gcc_unreachable ();
8797
8798 /* Find how many bits of the alignment we know for this access. */
8799 dalign /= BITS_PER_UNIT;
8800 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8801 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8802 mask = dalign - 1;
8803 lsb = offset & -offset;
8804 mask &= lsb - 1;
8805 dalign = mask + 1;
8806
8807 return dalign >= dsize;
8808 }
8809
8810 static bool
8811 constant_pool_expr_p (rtx op)
8812 {
8813 rtx base, offset;
8814
8815 split_const (op, &base, &offset);
8816 return (SYMBOL_REF_P (base)
8817 && CONSTANT_POOL_ADDRESS_P (base)
8818 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
8819 }
8820
8821 /* Create a TOC reference for symbol_ref SYMBOL. If LARGETOC_REG is non-null,
8822 use that as the register to put the HIGH value into if register allocation
8823 is already done. */
8824
8825 rtx
8826 create_TOC_reference (rtx symbol, rtx largetoc_reg)
8827 {
8828 rtx tocrel, tocreg, hi;
8829
8830 gcc_assert (TARGET_TOC);
8831
8832 if (TARGET_DEBUG_ADDR)
8833 {
8834 if (SYMBOL_REF_P (symbol))
8835 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
8836 XSTR (symbol, 0));
8837 else
8838 {
8839 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
8840 GET_RTX_NAME (GET_CODE (symbol)));
8841 debug_rtx (symbol);
8842 }
8843 }
8844
8845 if (!can_create_pseudo_p ())
8846 df_set_regs_ever_live (TOC_REGISTER, true);
8847
8848 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
8849 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
8850 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
8851 return tocrel;
8852
8853 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
8854 if (largetoc_reg != NULL)
8855 {
8856 emit_move_insn (largetoc_reg, hi);
8857 hi = largetoc_reg;
8858 }
8859 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
8860 }
8861
8862 /* These are only used to pass through from print_operand/print_operand_address
8863 to rs6000_output_addr_const_extra over the intervening function
8864 output_addr_const which is not target code. */
8865 static const_rtx tocrel_base_oac, tocrel_offset_oac;
8866
8867 /* Return true if OP is a toc pointer relative address (the output
8868 of create_TOC_reference). If STRICT, do not match non-split
8869 -mcmodel=large/medium toc pointer relative addresses. If the pointers
8870 are non-NULL, place base and offset pieces in TOCREL_BASE_RET and
8871 TOCREL_OFFSET_RET respectively. */
8872
8873 bool
8874 toc_relative_expr_p (const_rtx op, bool strict, const_rtx *tocrel_base_ret,
8875 const_rtx *tocrel_offset_ret)
8876 {
8877 if (!TARGET_TOC)
8878 return false;
8879
8880 if (TARGET_CMODEL != CMODEL_SMALL)
8881 {
8882 /* When strict ensure we have everything tidy. */
8883 if (strict
8884 && !(GET_CODE (op) == LO_SUM
8885 && REG_P (XEXP (op, 0))
8886 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict)))
8887 return false;
8888
8889 /* When not strict, allow non-split TOC addresses and also allow
8890 (lo_sum (high ..)) TOC addresses created during reload. */
8891 if (GET_CODE (op) == LO_SUM)
8892 op = XEXP (op, 1);
8893 }
8894
8895 const_rtx tocrel_base = op;
8896 const_rtx tocrel_offset = const0_rtx;
8897
8898 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
8899 {
8900 tocrel_base = XEXP (op, 0);
8901 tocrel_offset = XEXP (op, 1);
8902 }
8903
8904 if (tocrel_base_ret)
8905 *tocrel_base_ret = tocrel_base;
8906 if (tocrel_offset_ret)
8907 *tocrel_offset_ret = tocrel_offset;
8908
8909 return (GET_CODE (tocrel_base) == UNSPEC
8910 && XINT (tocrel_base, 1) == UNSPEC_TOCREL
8911 && REG_P (XVECEXP (tocrel_base, 0, 1))
8912 && REGNO (XVECEXP (tocrel_base, 0, 1)) == TOC_REGISTER);
8913 }
8914
8915 /* Return true if X is a constant pool address, and also for cmodel=medium
8916 if X is a toc-relative address known to be offsettable within MODE. */
8917
8918 bool
8919 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
8920 bool strict)
8921 {
8922 const_rtx tocrel_base, tocrel_offset;
8923 return (toc_relative_expr_p (x, strict, &tocrel_base, &tocrel_offset)
8924 && (TARGET_CMODEL != CMODEL_MEDIUM
8925 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
8926 || mode == QImode
8927 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
8928 INTVAL (tocrel_offset), mode)));
8929 }
8930
8931 static bool
8932 legitimate_small_data_p (machine_mode mode, rtx x)
8933 {
8934 return (DEFAULT_ABI == ABI_V4
8935 && !flag_pic && !TARGET_TOC
8936 && (SYMBOL_REF_P (x) || GET_CODE (x) == CONST)
8937 && small_data_operand (x, mode));
8938 }
8939
8940 bool
8941 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
8942 bool strict, bool worst_case)
8943 {
8944 unsigned HOST_WIDE_INT offset;
8945 unsigned int extra;
8946
8947 if (GET_CODE (x) != PLUS)
8948 return false;
8949 if (!REG_P (XEXP (x, 0)))
8950 return false;
8951 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8952 return false;
8953 if (mode_supports_dq_form (mode))
8954 return quad_address_p (x, mode, strict);
8955 if (!reg_offset_addressing_ok_p (mode))
8956 return virtual_stack_registers_memory_p (x);
8957 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
8958 return true;
8959 if (!CONST_INT_P (XEXP (x, 1)))
8960 return false;
8961
8962 offset = INTVAL (XEXP (x, 1));
8963 extra = 0;
8964 switch (mode)
8965 {
8966 case E_DFmode:
8967 case E_DDmode:
8968 case E_DImode:
8969 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8970 addressing. */
8971 if (VECTOR_MEM_VSX_P (mode))
8972 return false;
8973
8974 if (!worst_case)
8975 break;
8976 if (!TARGET_POWERPC64)
8977 extra = 4;
8978 else if (offset & 3)
8979 return false;
8980 break;
8981
8982 case E_TFmode:
8983 case E_IFmode:
8984 case E_KFmode:
8985 case E_TDmode:
8986 case E_TImode:
8987 case E_PTImode:
8988 extra = 8;
8989 if (!worst_case)
8990 break;
8991 if (!TARGET_POWERPC64)
8992 extra = 12;
8993 else if (offset & 3)
8994 return false;
8995 break;
8996
8997 default:
8998 break;
8999 }
9000
9001 if (TARGET_PREFIXED)
9002 return SIGNED_34BIT_OFFSET_EXTRA_P (offset, extra);
9003 else
9004 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
9005 }
9006
9007 bool
9008 legitimate_indexed_address_p (rtx x, int strict)
9009 {
9010 rtx op0, op1;
9011
9012 if (GET_CODE (x) != PLUS)
9013 return false;
9014
9015 op0 = XEXP (x, 0);
9016 op1 = XEXP (x, 1);
9017
9018 return (REG_P (op0) && REG_P (op1)
9019 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
9020 && INT_REG_OK_FOR_INDEX_P (op1, strict))
9021 || (INT_REG_OK_FOR_BASE_P (op1, strict)
9022 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
9023 }
9024
9025 bool
9026 avoiding_indexed_address_p (machine_mode mode)
9027 {
9028 unsigned int msize = GET_MODE_SIZE (mode);
9029
9030 /* Avoid indexed addressing for modes that have non-indexed load/store
9031 instruction forms. On power10, vector pairs have an indexed
9032 form, but vector quads don't. */
9033 if (msize > 16)
9034 return msize != 32;
9035
9036 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
9037 }
9038
9039 bool
9040 legitimate_indirect_address_p (rtx x, int strict)
9041 {
9042 return REG_P (x) && INT_REG_OK_FOR_BASE_P (x, strict);
9043 }
9044
9045 bool
9046 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
9047 {
9048 if (!TARGET_MACHO || !flag_pic
9049 || mode != SImode || !MEM_P (x))
9050 return false;
9051 x = XEXP (x, 0);
9052
9053 if (GET_CODE (x) != LO_SUM)
9054 return false;
9055 if (!REG_P (XEXP (x, 0)))
9056 return false;
9057 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
9058 return false;
9059 x = XEXP (x, 1);
9060
9061 return CONSTANT_P (x);
9062 }
9063
9064 static bool
9065 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
9066 {
9067 if (GET_CODE (x) != LO_SUM)
9068 return false;
9069 if (!REG_P (XEXP (x, 0)))
9070 return false;
9071 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
9072 return false;
9073 /* quad word addresses are restricted, and we can't use LO_SUM. */
9074 if (mode_supports_dq_form (mode))
9075 return false;
9076 x = XEXP (x, 1);
9077
9078 if (TARGET_ELF || TARGET_MACHO)
9079 {
9080 bool large_toc_ok;
9081
9082 if (DEFAULT_ABI == ABI_V4 && flag_pic)
9083 return false;
9084 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
9085 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
9086 recognizes some LO_SUM addresses as valid although this
9087 function says opposite. In most cases, LRA through different
9088 transformations can generate correct code for address reloads.
9089 It cannot manage only some LO_SUM cases. So we need to add
9090 code here saying that some addresses are still valid. */
9091 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
9092 && small_toc_ref (x, VOIDmode));
9093 if (TARGET_TOC && ! large_toc_ok)
9094 return false;
9095 if (GET_MODE_NUNITS (mode) != 1)
9096 return false;
9097 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
9098 && !(/* ??? Assume floating point reg based on mode? */
9099 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
9100 return false;
9101
9102 return CONSTANT_P (x) || large_toc_ok;
9103 }
9104
9105 return false;
9106 }
9107
9108
9109 /* Try machine-dependent ways of modifying an illegitimate address
9110 to be legitimate. If we find one, return the new, valid address.
9111 This is used from only one place: `memory_address' in explow.cc.
9112
9113 OLDX is the address as it was before break_out_memory_refs was
9114 called. In some cases it is useful to look at this to decide what
9115 needs to be done.
9116
9117 It is always safe for this function to do nothing. It exists to
9118 recognize opportunities to optimize the output.
9119
9120 On RS/6000, first check for the sum of a register with a constant
9121 integer that is out of range. If so, generate code to add the
9122 constant with the low-order 16 bits masked to the register and force
9123 this result into another register (this can be done with `cau').
9124 Then generate an address of REG+(CONST&0xffff), allowing for the
9125 possibility of bit 16 being a one.
9126
9127 Then check for the sum of a register and something not constant, try to
9128 load the other things into a register and return the sum. */
9129
9130 static rtx
9131 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
9132 machine_mode mode)
9133 {
9134 unsigned int extra;
9135
9136 if (!reg_offset_addressing_ok_p (mode)
9137 || mode_supports_dq_form (mode))
9138 {
9139 if (virtual_stack_registers_memory_p (x))
9140 return x;
9141
9142 /* In theory we should not be seeing addresses of the form reg+0,
9143 but just in case it is generated, optimize it away. */
9144 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
9145 return force_reg (Pmode, XEXP (x, 0));
9146
9147 /* For TImode with load/store quad, restrict addresses to just a single
9148 pointer, so it works with both GPRs and VSX registers. */
9149 /* Make sure both operands are registers. */
9150 else if (GET_CODE (x) == PLUS
9151 && (mode != TImode || !TARGET_VSX))
9152 return gen_rtx_PLUS (Pmode,
9153 force_reg (Pmode, XEXP (x, 0)),
9154 force_reg (Pmode, XEXP (x, 1)));
9155 else
9156 return force_reg (Pmode, x);
9157 }
9158 if (SYMBOL_REF_P (x))
9159 {
9160 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
9161 if (model != 0)
9162 return rs6000_legitimize_tls_address (x, model);
9163 }
9164
9165 extra = 0;
9166 switch (mode)
9167 {
9168 case E_TFmode:
9169 case E_TDmode:
9170 case E_TImode:
9171 case E_PTImode:
9172 case E_IFmode:
9173 case E_KFmode:
9174 /* As in legitimate_offset_address_p we do not assume
9175 worst-case. The mode here is just a hint as to the registers
9176 used. A TImode is usually in gprs, but may actually be in
9177 fprs. Leave worst-case scenario for reload to handle via
9178 insn constraints. PTImode is only GPRs. */
9179 extra = 8;
9180 break;
9181 default:
9182 break;
9183 }
9184
9185 if (GET_CODE (x) == PLUS
9186 && REG_P (XEXP (x, 0))
9187 && CONST_INT_P (XEXP (x, 1))
9188 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
9189 >= 0x10000 - extra))
9190 {
9191 HOST_WIDE_INT high_int, low_int;
9192 rtx sum;
9193 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
9194 if (low_int >= 0x8000 - extra)
9195 low_int = 0;
9196 high_int = INTVAL (XEXP (x, 1)) - low_int;
9197 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
9198 gen_int_mode (high_int, Pmode)), 0);
9199 return plus_constant (Pmode, sum, low_int);
9200 }
9201 else if (GET_CODE (x) == PLUS
9202 && REG_P (XEXP (x, 0))
9203 && !CONST_INT_P (XEXP (x, 1))
9204 && GET_MODE_NUNITS (mode) == 1
9205 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
9206 || (/* ??? Assume floating point reg based on mode? */
9207 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
9208 && !avoiding_indexed_address_p (mode))
9209 {
9210 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
9211 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
9212 }
9213 else if ((TARGET_ELF
9214 #if TARGET_MACHO
9215 || !MACHO_DYNAMIC_NO_PIC_P
9216 #endif
9217 )
9218 && TARGET_32BIT
9219 && TARGET_NO_TOC_OR_PCREL
9220 && !flag_pic
9221 && !CONST_INT_P (x)
9222 && !CONST_WIDE_INT_P (x)
9223 && !CONST_DOUBLE_P (x)
9224 && CONSTANT_P (x)
9225 && GET_MODE_NUNITS (mode) == 1
9226 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
9227 || (/* ??? Assume floating point reg based on mode? */
9228 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode))))
9229 {
9230 rtx reg = gen_reg_rtx (Pmode);
9231 if (TARGET_ELF)
9232 emit_insn (gen_elf_high (reg, x));
9233 else
9234 emit_insn (gen_macho_high (Pmode, reg, x));
9235 return gen_rtx_LO_SUM (Pmode, reg, x);
9236 }
9237 else if (TARGET_TOC
9238 && SYMBOL_REF_P (x)
9239 && constant_pool_expr_p (x)
9240 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
9241 return create_TOC_reference (x, NULL_RTX);
9242 else
9243 return x;
9244 }
9245
9246 /* Debug version of rs6000_legitimize_address. */
9247 static rtx
9248 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
9249 {
9250 rtx ret;
9251 rtx_insn *insns;
9252
9253 start_sequence ();
9254 ret = rs6000_legitimize_address (x, oldx, mode);
9255 insns = get_insns ();
9256 end_sequence ();
9257
9258 if (ret != x)
9259 {
9260 fprintf (stderr,
9261 "\nrs6000_legitimize_address: mode %s, old code %s, "
9262 "new code %s, modified\n",
9263 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
9264 GET_RTX_NAME (GET_CODE (ret)));
9265
9266 fprintf (stderr, "Original address:\n");
9267 debug_rtx (x);
9268
9269 fprintf (stderr, "oldx:\n");
9270 debug_rtx (oldx);
9271
9272 fprintf (stderr, "New address:\n");
9273 debug_rtx (ret);
9274
9275 if (insns)
9276 {
9277 fprintf (stderr, "Insns added:\n");
9278 debug_rtx_list (insns, 20);
9279 }
9280 }
9281 else
9282 {
9283 fprintf (stderr,
9284 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
9285 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
9286
9287 debug_rtx (x);
9288 }
9289
9290 if (insns)
9291 emit_insn (insns);
9292
9293 return ret;
9294 }
9295
9296 /* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL.
9297 We need to emit DTP-relative relocations. */
9298
9299 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
9300 static void
9301 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
9302 {
9303 switch (size)
9304 {
9305 case 4:
9306 fputs ("\t.long\t", file);
9307 break;
9308 case 8:
9309 fputs (DOUBLE_INT_ASM_OP, file);
9310 break;
9311 default:
9312 gcc_unreachable ();
9313 }
9314 output_addr_const (file, x);
9315 if (TARGET_ELF)
9316 fputs ("@dtprel+0x8000", file);
9317 }
9318
9319 /* Return true if X is a symbol that refers to real (rather than emulated)
9320 TLS. */
9321
9322 static bool
9323 rs6000_real_tls_symbol_ref_p (rtx x)
9324 {
9325 return (SYMBOL_REF_P (x)
9326 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
9327 }
9328
9329 /* In the name of slightly smaller debug output, and to cater to
9330 general assembler lossage, recognize various UNSPEC sequences
9331 and turn them back into a direct symbol reference. */
9332
9333 static rtx
9334 rs6000_delegitimize_address (rtx orig_x)
9335 {
9336 rtx x, y, offset;
9337
9338 /* UNSPEC_FUSION_GPR is created by the peephole2 for power8 fusion. It
9339 encodes loading up the high part of the address of a TOC reference along
9340 with a load of a GPR using the same base register used for the load. We
9341 return the original SYMBOL_REF.
9342
9343 (set (reg:INT1 <reg>
9344 (unspec:INT1 [<combined-address>] UNSPEC_FUSION_GPR)))
9345
9346 UNSPEC_PCREL_OPT_LD_ADDR is used by the power10 PCREL_OPT pass. These
9347 UNSPECs include the external SYMBOL_REF along with the value being loaded.
9348 We return the original SYMBOL_REF.
9349
9350 (parallel [(set (reg:DI <base-reg>)
9351 (unspec:DI [(symbol_ref <symbol>)
9352 (const_int <marker>)]
9353 UNSPEC_PCREL_OPT_LD_ADDR))
9354 (set (reg:DI <load-reg>)
9355 (unspec:DI [(const_int 0)]
9356 UNSPEC_PCREL_OPT_LD_DATA))])
9357
9358 UNSPEC_PCREL_OPT_LD_SAME_REG is an alternative that is used if the
9359 GPR being loaded is the same as the GPR used to hold the external address.
9360
9361 (set (reg:DI <base-reg>)
9362 (unspec:DI [(symbol_ref <symbol>)
9363 (const_int <marker>)]
9364 UNSPEC_PCREL_OPT_LD_SAME_REG))
9365
9366 UNSPEC_PCREL_OPT_ST_ADDR is used by the power10 PCREL_OPT pass. This
9367 UNSPEC include the external SYMBOL_REF along with the value being loaded.
9368 We return the original SYMBOL_REF.
9369
9370 (parallel [(set (reg:DI <base-reg>)
9371 (unspec:DI [(symbol_ref <symbol>)
9372 (const_int <marker>)]
9373 UNSPEC_PCREL_OPT_ST_ADDR))
9374 (use (reg <store-reg>))]) */
9375
9376 if (GET_CODE (orig_x) == UNSPEC)
9377 switch (XINT (orig_x, 1))
9378 {
9379 case UNSPEC_FUSION_GPR:
9380 case UNSPEC_PCREL_OPT_LD_ADDR:
9381 case UNSPEC_PCREL_OPT_LD_SAME_REG:
9382 case UNSPEC_PCREL_OPT_ST_ADDR:
9383 orig_x = XVECEXP (orig_x, 0, 0);
9384 break;
9385
9386 default:
9387 break;
9388 }
9389
9390 orig_x = delegitimize_mem_from_attrs (orig_x);
9391
9392 x = orig_x;
9393 if (MEM_P (x))
9394 x = XEXP (x, 0);
9395
9396 y = x;
9397 if (TARGET_CMODEL != CMODEL_SMALL && GET_CODE (y) == LO_SUM)
9398 y = XEXP (y, 1);
9399
9400 offset = NULL_RTX;
9401 if (GET_CODE (y) == PLUS
9402 && GET_MODE (y) == Pmode
9403 && CONST_INT_P (XEXP (y, 1)))
9404 {
9405 offset = XEXP (y, 1);
9406 y = XEXP (y, 0);
9407 }
9408
9409 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_TOCREL)
9410 {
9411 y = XVECEXP (y, 0, 0);
9412
9413 #ifdef HAVE_AS_TLS
9414 /* Do not associate thread-local symbols with the original
9415 constant pool symbol. */
9416 if (TARGET_XCOFF
9417 && SYMBOL_REF_P (y)
9418 && CONSTANT_POOL_ADDRESS_P (y)
9419 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
9420 return orig_x;
9421 #endif
9422
9423 if (offset != NULL_RTX)
9424 y = gen_rtx_PLUS (Pmode, y, offset);
9425 if (!MEM_P (orig_x))
9426 return y;
9427 else
9428 return replace_equiv_address_nv (orig_x, y);
9429 }
9430
9431 if (TARGET_MACHO
9432 && GET_CODE (orig_x) == LO_SUM
9433 && GET_CODE (XEXP (orig_x, 1)) == CONST)
9434 {
9435 y = XEXP (XEXP (orig_x, 1), 0);
9436 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
9437 return XVECEXP (y, 0, 0);
9438 }
9439
9440 return orig_x;
9441 }
9442
9443 /* Return true if X shouldn't be emitted into the debug info.
9444 The linker doesn't like .toc section references from
9445 .debug_* sections, so reject .toc section symbols. */
9446
9447 static bool
9448 rs6000_const_not_ok_for_debug_p (rtx x)
9449 {
9450 if (GET_CODE (x) == UNSPEC)
9451 return true;
9452 if (SYMBOL_REF_P (x)
9453 && CONSTANT_POOL_ADDRESS_P (x))
9454 {
9455 rtx c = get_pool_constant (x);
9456 machine_mode cmode = get_pool_mode (x);
9457 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
9458 return true;
9459 }
9460
9461 return false;
9462 }
9463
9464 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
9465
9466 static bool
9467 rs6000_legitimate_combined_insn (rtx_insn *insn)
9468 {
9469 int icode = INSN_CODE (insn);
9470
9471 /* Reject creating doloop insns. Combine should not be allowed
9472 to create these for a number of reasons:
9473 1) In a nested loop, if combine creates one of these in an
9474 outer loop and the register allocator happens to allocate ctr
9475 to the outer loop insn, then the inner loop can't use ctr.
9476 Inner loops ought to be more highly optimized.
9477 2) Combine often wants to create one of these from what was
9478 originally a three insn sequence, first combining the three
9479 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not
9480 allocated ctr, the splitter takes use back to the three insn
9481 sequence. It's better to stop combine at the two insn
9482 sequence.
9483 3) Faced with not being able to allocate ctr for ctrsi/crtdi
9484 insns, the register allocator sometimes uses floating point
9485 or vector registers for the pseudo. Since ctrsi/ctrdi is a
9486 jump insn and output reloads are not implemented for jumps,
9487 the ctrsi/ctrdi splitters need to handle all possible cases.
9488 That's a pain, and it gets to be seriously difficult when a
9489 splitter that runs after reload needs memory to transfer from
9490 a gpr to fpr. See PR70098 and PR71763 which are not fixed
9491 for the difficult case. It's better to not create problems
9492 in the first place. */
9493 if (icode != CODE_FOR_nothing
9494 && (icode == CODE_FOR_bdz_si
9495 || icode == CODE_FOR_bdz_di
9496 || icode == CODE_FOR_bdnz_si
9497 || icode == CODE_FOR_bdnz_di
9498 || icode == CODE_FOR_bdztf_si
9499 || icode == CODE_FOR_bdztf_di
9500 || icode == CODE_FOR_bdnztf_si
9501 || icode == CODE_FOR_bdnztf_di))
9502 return false;
9503
9504 return true;
9505 }
9506
9507 /* Construct the SYMBOL_REF for the tls_get_addr function. */
9508
9509 static GTY(()) rtx rs6000_tls_symbol;
9510 static rtx
9511 rs6000_tls_get_addr (void)
9512 {
9513 if (!rs6000_tls_symbol)
9514 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
9515
9516 return rs6000_tls_symbol;
9517 }
9518
9519 /* Construct the SYMBOL_REF for TLS GOT references. */
9520
9521 static GTY(()) rtx rs6000_got_symbol;
9522 rtx
9523 rs6000_got_sym (void)
9524 {
9525 if (!rs6000_got_symbol)
9526 {
9527 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
9528 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
9529 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
9530 }
9531
9532 return rs6000_got_symbol;
9533 }
9534
9535 /* AIX Thread-Local Address support. */
9536
9537 static rtx
9538 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
9539 {
9540 rtx sym, mem, tocref, tlsreg, tmpreg, dest;
9541 const char *name;
9542 char *tlsname;
9543
9544 /* Place addr into TOC constant pool. */
9545 sym = force_const_mem (GET_MODE (addr), addr);
9546
9547 /* Output the TOC entry and create the MEM referencing the value. */
9548 if (constant_pool_expr_p (XEXP (sym, 0))
9549 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
9550 {
9551 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
9552 mem = gen_const_mem (Pmode, tocref);
9553 set_mem_alias_set (mem, get_TOC_alias_set ());
9554 }
9555 else
9556 return sym;
9557
9558 /* Use global-dynamic for local-dynamic. */
9559 if (model == TLS_MODEL_GLOBAL_DYNAMIC
9560 || model == TLS_MODEL_LOCAL_DYNAMIC)
9561 {
9562 /* Create new TOC reference for @m symbol. */
9563 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
9564 tlsname = XALLOCAVEC (char, strlen (name) + 1);
9565 strcpy (tlsname, "*LCM");
9566 strcat (tlsname, name + 3);
9567 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
9568 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
9569 tocref = create_TOC_reference (modaddr, NULL_RTX);
9570 rtx modmem = gen_const_mem (Pmode, tocref);
9571 set_mem_alias_set (modmem, get_TOC_alias_set ());
9572
9573 rtx modreg = gen_reg_rtx (Pmode);
9574 emit_insn (gen_rtx_SET (modreg, modmem));
9575
9576 tmpreg = gen_reg_rtx (Pmode);
9577 emit_insn (gen_rtx_SET (tmpreg, mem));
9578
9579 dest = gen_reg_rtx (Pmode);
9580 if (TARGET_32BIT)
9581 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
9582 else
9583 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
9584 return dest;
9585 }
9586 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
9587 else if (TARGET_32BIT)
9588 {
9589 tlsreg = gen_reg_rtx (SImode);
9590 emit_insn (gen_tls_get_tpointer (tlsreg));
9591 }
9592 else
9593 {
9594 tlsreg = gen_rtx_REG (DImode, 13);
9595 xcoff_tls_exec_model_detected = true;
9596 }
9597
9598 /* Load the TOC value into temporary register. */
9599 tmpreg = gen_reg_rtx (Pmode);
9600 emit_insn (gen_rtx_SET (tmpreg, mem));
9601 set_unique_reg_note (get_last_insn (), REG_EQUAL,
9602 gen_rtx_MINUS (Pmode, addr, tlsreg));
9603
9604 /* Add TOC symbol value to TLS pointer. */
9605 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
9606
9607 return dest;
9608 }
9609
9610 /* Passes the tls arg value for global dynamic and local dynamic
9611 emit_library_call_value in rs6000_legitimize_tls_address to
9612 rs6000_call_aix and rs6000_call_sysv. This is used to emit the
9613 marker relocs put on __tls_get_addr calls. */
9614 static rtx global_tlsarg;
9615
9616 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
9617 this (thread-local) address. */
9618
9619 static rtx
9620 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
9621 {
9622 rtx dest, insn;
9623
9624 if (TARGET_XCOFF)
9625 return rs6000_legitimize_tls_address_aix (addr, model);
9626
9627 dest = gen_reg_rtx (Pmode);
9628 if (model == TLS_MODEL_LOCAL_EXEC
9629 && (rs6000_tls_size == 16 || rs6000_pcrel_p ()))
9630 {
9631 rtx tlsreg;
9632
9633 if (TARGET_64BIT)
9634 {
9635 tlsreg = gen_rtx_REG (Pmode, 13);
9636 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
9637 }
9638 else
9639 {
9640 tlsreg = gen_rtx_REG (Pmode, 2);
9641 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
9642 }
9643 emit_insn (insn);
9644 }
9645 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
9646 {
9647 rtx tlsreg, tmp;
9648
9649 tmp = gen_reg_rtx (Pmode);
9650 if (TARGET_64BIT)
9651 {
9652 tlsreg = gen_rtx_REG (Pmode, 13);
9653 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
9654 }
9655 else
9656 {
9657 tlsreg = gen_rtx_REG (Pmode, 2);
9658 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
9659 }
9660 emit_insn (insn);
9661 if (TARGET_64BIT)
9662 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
9663 else
9664 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
9665 emit_insn (insn);
9666 }
9667 else
9668 {
9669 rtx got, tga, tmp1, tmp2;
9670
9671 /* We currently use relocations like @got@tlsgd for tls, which
9672 means the linker will handle allocation of tls entries, placing
9673 them in the .got section. So use a pointer to the .got section,
9674 not one to secondary TOC sections used by 64-bit -mminimal-toc,
9675 or to secondary GOT sections used by 32-bit -fPIC. */
9676 if (rs6000_pcrel_p ())
9677 got = const0_rtx;
9678 else if (TARGET_64BIT)
9679 got = gen_rtx_REG (Pmode, 2);
9680 else
9681 {
9682 if (flag_pic == 1)
9683 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
9684 else
9685 {
9686 rtx gsym = rs6000_got_sym ();
9687 got = gen_reg_rtx (Pmode);
9688 if (flag_pic == 0)
9689 rs6000_emit_move (got, gsym, Pmode);
9690 else
9691 {
9692 rtx mem, lab;
9693
9694 tmp1 = gen_reg_rtx (Pmode);
9695 tmp2 = gen_reg_rtx (Pmode);
9696 mem = gen_const_mem (Pmode, tmp1);
9697 lab = gen_label_rtx ();
9698 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
9699 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
9700 if (TARGET_LINK_STACK)
9701 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
9702 emit_move_insn (tmp2, mem);
9703 rtx_insn *last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
9704 set_unique_reg_note (last, REG_EQUAL, gsym);
9705 }
9706 }
9707 }
9708
9709 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
9710 {
9711 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addr, got),
9712 UNSPEC_TLSGD);
9713 tga = rs6000_tls_get_addr ();
9714 rtx argreg = gen_rtx_REG (Pmode, 3);
9715 emit_insn (gen_rtx_SET (argreg, arg));
9716 global_tlsarg = arg;
9717 emit_library_call_value (tga, dest, LCT_CONST, Pmode, argreg, Pmode);
9718 global_tlsarg = NULL_RTX;
9719
9720 /* Make a note so that the result of this call can be CSEd. */
9721 rtvec vec = gen_rtvec (1, copy_rtx (arg));
9722 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
9723 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
9724 }
9725 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
9726 {
9727 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got), UNSPEC_TLSLD);
9728 tga = rs6000_tls_get_addr ();
9729 tmp1 = gen_reg_rtx (Pmode);
9730 rtx argreg = gen_rtx_REG (Pmode, 3);
9731 emit_insn (gen_rtx_SET (argreg, arg));
9732 global_tlsarg = arg;
9733 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode, argreg, Pmode);
9734 global_tlsarg = NULL_RTX;
9735
9736 /* Make a note so that the result of this call can be CSEd. */
9737 rtvec vec = gen_rtvec (1, copy_rtx (arg));
9738 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
9739 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
9740
9741 if (rs6000_tls_size == 16 || rs6000_pcrel_p ())
9742 {
9743 if (TARGET_64BIT)
9744 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
9745 else
9746 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
9747 }
9748 else if (rs6000_tls_size == 32)
9749 {
9750 tmp2 = gen_reg_rtx (Pmode);
9751 if (TARGET_64BIT)
9752 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
9753 else
9754 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
9755 emit_insn (insn);
9756 if (TARGET_64BIT)
9757 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
9758 else
9759 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
9760 }
9761 else
9762 {
9763 tmp2 = gen_reg_rtx (Pmode);
9764 if (TARGET_64BIT)
9765 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
9766 else
9767 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
9768 emit_insn (insn);
9769 insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
9770 }
9771 emit_insn (insn);
9772 }
9773 else
9774 {
9775 /* IE, or 64-bit offset LE. */
9776 tmp2 = gen_reg_rtx (Pmode);
9777 if (TARGET_64BIT)
9778 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
9779 else
9780 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
9781 emit_insn (insn);
9782 if (rs6000_pcrel_p ())
9783 {
9784 if (TARGET_64BIT)
9785 insn = gen_tls_tls_pcrel_64 (dest, tmp2, addr);
9786 else
9787 insn = gen_tls_tls_pcrel_32 (dest, tmp2, addr);
9788 }
9789 else if (TARGET_64BIT)
9790 insn = gen_tls_tls_64 (dest, tmp2, addr);
9791 else
9792 insn = gen_tls_tls_32 (dest, tmp2, addr);
9793 emit_insn (insn);
9794 }
9795 }
9796
9797 return dest;
9798 }
9799
9800 /* Only create the global variable for the stack protect guard if we are using
9801 the global flavor of that guard. */
9802 static tree
9803 rs6000_init_stack_protect_guard (void)
9804 {
9805 if (rs6000_stack_protector_guard == SSP_GLOBAL)
9806 return default_stack_protect_guard ();
9807
9808 return NULL_TREE;
9809 }
9810
9811 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9812
9813 static bool
9814 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9815 {
9816 if (GET_CODE (x) == HIGH
9817 && GET_CODE (XEXP (x, 0)) == UNSPEC)
9818 return true;
9819
9820 /* A TLS symbol in the TOC cannot contain a sum. */
9821 if (GET_CODE (x) == CONST
9822 && GET_CODE (XEXP (x, 0)) == PLUS
9823 && SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
9824 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
9825 return true;
9826
9827 /* Allow AIX TOC TLS symbols in the constant pool,
9828 but not ELF TLS symbols. */
9829 return TARGET_ELF && tls_referenced_p (x);
9830 }
9831
9832 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
9833 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
9834 can be addressed relative to the toc pointer. */
9835
9836 static bool
9837 use_toc_relative_ref (rtx sym, machine_mode mode)
9838 {
9839 return ((constant_pool_expr_p (sym)
9840 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
9841 get_pool_mode (sym)))
9842 || (TARGET_CMODEL == CMODEL_MEDIUM
9843 && SYMBOL_REF_LOCAL_P (sym)
9844 && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
9845 }
9846
9847 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
9848 that is a valid memory address for an instruction.
9849 The MODE argument is the machine mode for the MEM expression
9850 that wants to use this address.
9851
9852 On the RS/6000, there are four valid address: a SYMBOL_REF that
9853 refers to a constant pool entry of an address (or the sum of it
9854 plus a constant), a short (16-bit signed) constant plus a register,
9855 the sum of two registers, or a register indirect, possibly with an
9856 auto-increment. For DFmode, DDmode and DImode with a constant plus
9857 register, we must ensure that both words are addressable or PowerPC64
9858 with offset word aligned.
9859
9860 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
9861 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
9862 because adjacent memory cells are accessed by adding word-sized offsets
9863 during assembly output. */
9864 static bool
9865 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
9866 {
9867 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
9868 bool quad_offset_p = mode_supports_dq_form (mode);
9869
9870 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
9871 return 0;
9872
9873 /* Handle unaligned altivec lvx/stvx type addresses. */
9874 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
9875 && GET_CODE (x) == AND
9876 && CONST_INT_P (XEXP (x, 1))
9877 && INTVAL (XEXP (x, 1)) == -16)
9878 {
9879 x = XEXP (x, 0);
9880 return (legitimate_indirect_address_p (x, reg_ok_strict)
9881 || legitimate_indexed_address_p (x, reg_ok_strict)
9882 || virtual_stack_registers_memory_p (x));
9883 }
9884
9885 if (legitimate_indirect_address_p (x, reg_ok_strict))
9886 return 1;
9887 if (TARGET_UPDATE
9888 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
9889 && mode_supports_pre_incdec_p (mode)
9890 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
9891 return 1;
9892
9893 /* Handle prefixed addresses (PC-relative or 34-bit offset). */
9894 if (address_is_prefixed (x, mode, NON_PREFIXED_DEFAULT))
9895 return 1;
9896
9897 /* Handle restricted vector d-form offsets in ISA 3.0. */
9898 if (quad_offset_p)
9899 {
9900 if (quad_address_p (x, mode, reg_ok_strict))
9901 return 1;
9902 }
9903 else if (virtual_stack_registers_memory_p (x))
9904 return 1;
9905
9906 else if (reg_offset_p)
9907 {
9908 if (legitimate_small_data_p (mode, x))
9909 return 1;
9910 if (legitimate_constant_pool_address_p (x, mode,
9911 reg_ok_strict || lra_in_progress))
9912 return 1;
9913 }
9914
9915 /* For TImode, if we have TImode in VSX registers, only allow register
9916 indirect addresses. This will allow the values to go in either GPRs
9917 or VSX registers without reloading. The vector types would tend to
9918 go into VSX registers, so we allow REG+REG, while TImode seems
9919 somewhat split, in that some uses are GPR based, and some VSX based. */
9920 /* FIXME: We could loosen this by changing the following to
9921 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX)
9922 but currently we cannot allow REG+REG addressing for TImode. See
9923 PR72827 for complete details on how this ends up hoodwinking DSE. */
9924 if (mode == TImode && TARGET_VSX)
9925 return 0;
9926 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
9927 if (! reg_ok_strict
9928 && reg_offset_p
9929 && GET_CODE (x) == PLUS
9930 && REG_P (XEXP (x, 0))
9931 && (XEXP (x, 0) == virtual_stack_vars_rtx
9932 || XEXP (x, 0) == arg_pointer_rtx)
9933 && CONST_INT_P (XEXP (x, 1)))
9934 return 1;
9935 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
9936 return 1;
9937 if (!FLOAT128_2REG_P (mode)
9938 && (TARGET_HARD_FLOAT
9939 || TARGET_POWERPC64
9940 || (mode != DFmode && mode != DDmode))
9941 && (TARGET_POWERPC64 || mode != DImode)
9942 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
9943 && mode != PTImode
9944 && !avoiding_indexed_address_p (mode)
9945 && legitimate_indexed_address_p (x, reg_ok_strict))
9946 return 1;
9947 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
9948 && mode_supports_pre_modify_p (mode)
9949 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
9950 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
9951 reg_ok_strict, false)
9952 || (!avoiding_indexed_address_p (mode)
9953 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
9954 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
9955 {
9956 /* There is no prefixed version of the load/store with update. */
9957 rtx addr = XEXP (x, 1);
9958 return !address_is_prefixed (addr, mode, NON_PREFIXED_DEFAULT);
9959 }
9960 if (reg_offset_p && !quad_offset_p
9961 && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
9962 return 1;
9963 return 0;
9964 }
9965
9966 /* Debug version of rs6000_legitimate_address_p. */
9967 static bool
9968 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
9969 bool reg_ok_strict)
9970 {
9971 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
9972 fprintf (stderr,
9973 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
9974 "strict = %d, reload = %s, code = %s\n",
9975 ret ? "true" : "false",
9976 GET_MODE_NAME (mode),
9977 reg_ok_strict,
9978 (reload_completed ? "after" : "before"),
9979 GET_RTX_NAME (GET_CODE (x)));
9980 debug_rtx (x);
9981
9982 return ret;
9983 }
9984
9985 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
9986
9987 static bool
9988 rs6000_mode_dependent_address_p (const_rtx addr,
9989 addr_space_t as ATTRIBUTE_UNUSED)
9990 {
9991 return rs6000_mode_dependent_address_ptr (addr);
9992 }
9993
9994 /* Go to LABEL if ADDR (a legitimate address expression)
9995 has an effect that depends on the machine mode it is used for.
9996
9997 On the RS/6000 this is true of all integral offsets (since AltiVec
9998 and VSX modes don't allow them) or is a pre-increment or decrement.
9999
10000 ??? Except that due to conceptual problems in offsettable_address_p
10001 we can't really report the problems of integral offsets. So leave
10002 this assuming that the adjustable offset must be valid for the
10003 sub-words of a TFmode operand, which is what we had before. */
10004
10005 static bool
10006 rs6000_mode_dependent_address (const_rtx addr)
10007 {
10008 switch (GET_CODE (addr))
10009 {
10010 case PLUS:
10011 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
10012 is considered a legitimate address before reload, so there
10013 are no offset restrictions in that case. Note that this
10014 condition is safe in strict mode because any address involving
10015 virtual_stack_vars_rtx or arg_pointer_rtx would already have
10016 been rejected as illegitimate. */
10017 if (XEXP (addr, 0) != virtual_stack_vars_rtx
10018 && XEXP (addr, 0) != arg_pointer_rtx
10019 && CONST_INT_P (XEXP (addr, 1)))
10020 {
10021 HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
10022 HOST_WIDE_INT extra = TARGET_POWERPC64 ? 8 : 12;
10023 if (TARGET_PREFIXED)
10024 return !SIGNED_34BIT_OFFSET_EXTRA_P (val, extra);
10025 else
10026 return !SIGNED_16BIT_OFFSET_EXTRA_P (val, extra);
10027 }
10028 break;
10029
10030 case LO_SUM:
10031 /* Anything in the constant pool is sufficiently aligned that
10032 all bytes have the same high part address. */
10033 return !legitimate_constant_pool_address_p (addr, QImode, false);
10034
10035 /* Auto-increment cases are now treated generically in recog.cc. */
10036 case PRE_MODIFY:
10037 return TARGET_UPDATE;
10038
10039 /* AND is only allowed in Altivec loads. */
10040 case AND:
10041 return true;
10042
10043 default:
10044 break;
10045 }
10046
10047 return false;
10048 }
10049
10050 /* Debug version of rs6000_mode_dependent_address. */
10051 static bool
10052 rs6000_debug_mode_dependent_address (const_rtx addr)
10053 {
10054 bool ret = rs6000_mode_dependent_address (addr);
10055
10056 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
10057 ret ? "true" : "false");
10058 debug_rtx (addr);
10059
10060 return ret;
10061 }
10062
10063 /* Implement FIND_BASE_TERM. */
10064
10065 rtx
10066 rs6000_find_base_term (rtx op)
10067 {
10068 rtx base;
10069
10070 base = op;
10071 if (GET_CODE (base) == CONST)
10072 base = XEXP (base, 0);
10073 if (GET_CODE (base) == PLUS)
10074 base = XEXP (base, 0);
10075 if (GET_CODE (base) == UNSPEC)
10076 switch (XINT (base, 1))
10077 {
10078 case UNSPEC_TOCREL:
10079 case UNSPEC_MACHOPIC_OFFSET:
10080 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
10081 for aliasing purposes. */
10082 return XVECEXP (base, 0, 0);
10083 }
10084
10085 return op;
10086 }
10087
10088 /* More elaborate version of recog's offsettable_memref_p predicate
10089 that works around the ??? note of rs6000_mode_dependent_address.
10090 In particular it accepts
10091
10092 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
10093
10094 in 32-bit mode, that the recog predicate rejects. */
10095
10096 static bool
10097 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode, bool strict)
10098 {
10099 bool worst_case;
10100
10101 if (!MEM_P (op))
10102 return false;
10103
10104 /* First mimic offsettable_memref_p. */
10105 if (offsettable_address_p (strict, GET_MODE (op), XEXP (op, 0)))
10106 return true;
10107
10108 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
10109 the latter predicate knows nothing about the mode of the memory
10110 reference and, therefore, assumes that it is the largest supported
10111 mode (TFmode). As a consequence, legitimate offsettable memory
10112 references are rejected. rs6000_legitimate_offset_address_p contains
10113 the correct logic for the PLUS case of rs6000_mode_dependent_address,
10114 at least with a little bit of help here given that we know the
10115 actual registers used. */
10116 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
10117 || GET_MODE_SIZE (reg_mode) == 4);
10118 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
10119 strict, worst_case);
10120 }
10121
10122 /* Determine the reassociation width to be used in reassociate_bb.
10123 This takes into account how many parallel operations we
10124 can actually do of a given type, and also the latency.
10125 P8:
10126 int add/sub 6/cycle
10127 mul 2/cycle
10128 vect add/sub/mul 2/cycle
10129 fp add/sub/mul 2/cycle
10130 dfp 1/cycle
10131 */
10132
10133 static int
10134 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
10135 machine_mode mode)
10136 {
10137 switch (rs6000_tune)
10138 {
10139 case PROCESSOR_POWER8:
10140 case PROCESSOR_POWER9:
10141 case PROCESSOR_POWER10:
10142 if (DECIMAL_FLOAT_MODE_P (mode))
10143 return 1;
10144 if (VECTOR_MODE_P (mode))
10145 return 4;
10146 if (INTEGRAL_MODE_P (mode))
10147 return 1;
10148 if (FLOAT_MODE_P (mode))
10149 return 4;
10150 break;
10151 default:
10152 break;
10153 }
10154 return 1;
10155 }
10156
10157 /* Change register usage conditional on target flags. */
10158 static void
10159 rs6000_conditional_register_usage (void)
10160 {
10161 int i;
10162
10163 if (TARGET_DEBUG_TARGET)
10164 fprintf (stderr, "rs6000_conditional_register_usage called\n");
10165
10166 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
10167 if (TARGET_64BIT)
10168 fixed_regs[13] = call_used_regs[13] = 1;
10169
10170 /* Conditionally disable FPRs. */
10171 if (TARGET_SOFT_FLOAT)
10172 for (i = 32; i < 64; i++)
10173 fixed_regs[i] = call_used_regs[i] = 1;
10174
10175 /* The TOC register is not killed across calls in a way that is
10176 visible to the compiler. */
10177 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
10178 call_used_regs[2] = 0;
10179
10180 if (DEFAULT_ABI == ABI_V4 && flag_pic == 2)
10181 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10182
10183 if (DEFAULT_ABI == ABI_V4 && flag_pic == 1)
10184 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10185 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10186
10187 if (DEFAULT_ABI == ABI_DARWIN && flag_pic)
10188 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10189 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10190
10191 if (TARGET_TOC && TARGET_MINIMAL_TOC)
10192 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10193
10194 if (!TARGET_ALTIVEC && !TARGET_VSX)
10195 {
10196 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
10197 fixed_regs[i] = call_used_regs[i] = 1;
10198 call_used_regs[VRSAVE_REGNO] = 1;
10199 }
10200
10201 if (TARGET_ALTIVEC || TARGET_VSX)
10202 global_regs[VSCR_REGNO] = 1;
10203
10204 if (TARGET_ALTIVEC_ABI)
10205 {
10206 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
10207 call_used_regs[i] = 1;
10208
10209 /* AIX reserves VR20:31 in non-extended ABI mode. */
10210 if (TARGET_XCOFF && !rs6000_aix_extabi)
10211 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
10212 fixed_regs[i] = call_used_regs[i] = 1;
10213 }
10214 }
10215
10216 \f
10217 /* Output insns to set DEST equal to the constant SOURCE as a series of
10218 lis, ori and shl instructions and return TRUE. */
10219
10220 bool
10221 rs6000_emit_set_const (rtx dest, rtx source)
10222 {
10223 machine_mode mode = GET_MODE (dest);
10224 rtx temp, set;
10225 rtx_insn *insn;
10226 HOST_WIDE_INT c;
10227
10228 gcc_checking_assert (CONST_INT_P (source));
10229 c = INTVAL (source);
10230 switch (mode)
10231 {
10232 case E_QImode:
10233 case E_HImode:
10234 emit_insn (gen_rtx_SET (dest, source));
10235 return true;
10236
10237 case E_SImode:
10238 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
10239
10240 emit_insn (gen_rtx_SET (copy_rtx (temp),
10241 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
10242 emit_insn (gen_rtx_SET (dest,
10243 gen_rtx_IOR (SImode, copy_rtx (temp),
10244 GEN_INT (c & 0xffff))));
10245 break;
10246
10247 case E_DImode:
10248 if (!TARGET_POWERPC64)
10249 {
10250 rtx hi, lo;
10251
10252 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
10253 DImode);
10254 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
10255 DImode);
10256 emit_move_insn (hi, GEN_INT (c >> 32));
10257 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
10258 emit_move_insn (lo, GEN_INT (c));
10259 }
10260 else
10261 rs6000_emit_set_long_const (dest, c);
10262 break;
10263
10264 default:
10265 gcc_unreachable ();
10266 }
10267
10268 insn = get_last_insn ();
10269 set = single_set (insn);
10270 if (! CONSTANT_P (SET_SRC (set)))
10271 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
10272
10273 return true;
10274 }
10275
10276 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
10277 Output insns to set DEST equal to the constant C as a series of
10278 lis, ori and shl instructions. */
10279
10280 static void
10281 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
10282 {
10283 rtx temp;
10284 HOST_WIDE_INT ud1, ud2, ud3, ud4;
10285
10286 ud1 = c & 0xffff;
10287 c = c >> 16;
10288 ud2 = c & 0xffff;
10289 c = c >> 16;
10290 ud3 = c & 0xffff;
10291 c = c >> 16;
10292 ud4 = c & 0xffff;
10293
10294 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
10295 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
10296 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
10297
10298 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
10299 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
10300 {
10301 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10302
10303 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
10304 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
10305 if (ud1 != 0)
10306 emit_move_insn (dest,
10307 gen_rtx_IOR (DImode, copy_rtx (temp),
10308 GEN_INT (ud1)));
10309 }
10310 else if (ud3 == 0 && ud4 == 0)
10311 {
10312 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10313
10314 gcc_assert (ud2 & 0x8000);
10315 emit_move_insn (copy_rtx (temp),
10316 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
10317 if (ud1 != 0)
10318 emit_move_insn (copy_rtx (temp),
10319 gen_rtx_IOR (DImode, copy_rtx (temp),
10320 GEN_INT (ud1)));
10321 emit_move_insn (dest,
10322 gen_rtx_ZERO_EXTEND (DImode,
10323 gen_lowpart (SImode,
10324 copy_rtx (temp))));
10325 }
10326 else if (ud1 == ud3 && ud2 == ud4)
10327 {
10328 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10329 HOST_WIDE_INT num = (ud2 << 16) | ud1;
10330 rs6000_emit_set_long_const (temp, (num ^ 0x80000000) - 0x80000000);
10331 rtx one = gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff));
10332 rtx two = gen_rtx_ASHIFT (DImode, temp, GEN_INT (32));
10333 emit_move_insn (dest, gen_rtx_IOR (DImode, one, two));
10334 }
10335 else if ((ud4 == 0xffff && (ud3 & 0x8000))
10336 || (ud4 == 0 && ! (ud3 & 0x8000)))
10337 {
10338 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10339
10340 emit_move_insn (copy_rtx (temp),
10341 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
10342 if (ud2 != 0)
10343 emit_move_insn (copy_rtx (temp),
10344 gen_rtx_IOR (DImode, copy_rtx (temp),
10345 GEN_INT (ud2)));
10346 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
10347 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
10348 GEN_INT (16)));
10349 if (ud1 != 0)
10350 emit_move_insn (dest,
10351 gen_rtx_IOR (DImode, copy_rtx (temp),
10352 GEN_INT (ud1)));
10353 }
10354 else
10355 {
10356 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10357
10358 emit_move_insn (copy_rtx (temp),
10359 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
10360 if (ud3 != 0)
10361 emit_move_insn (copy_rtx (temp),
10362 gen_rtx_IOR (DImode, copy_rtx (temp),
10363 GEN_INT (ud3)));
10364
10365 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
10366 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
10367 GEN_INT (32)));
10368 if (ud2 != 0)
10369 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
10370 gen_rtx_IOR (DImode, copy_rtx (temp),
10371 GEN_INT (ud2 << 16)));
10372 if (ud1 != 0)
10373 emit_move_insn (dest,
10374 gen_rtx_IOR (DImode, copy_rtx (temp),
10375 GEN_INT (ud1)));
10376 }
10377 }
10378
10379 /* Helper for the following. Get rid of [r+r] memory refs
10380 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
10381
10382 static void
10383 rs6000_eliminate_indexed_memrefs (rtx operands[2])
10384 {
10385 if (MEM_P (operands[0])
10386 && !REG_P (XEXP (operands[0], 0))
10387 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
10388 GET_MODE (operands[0]), false))
10389 operands[0]
10390 = replace_equiv_address (operands[0],
10391 copy_addr_to_reg (XEXP (operands[0], 0)));
10392
10393 if (MEM_P (operands[1])
10394 && !REG_P (XEXP (operands[1], 0))
10395 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
10396 GET_MODE (operands[1]), false))
10397 operands[1]
10398 = replace_equiv_address (operands[1],
10399 copy_addr_to_reg (XEXP (operands[1], 0)));
10400 }
10401
10402 /* Generate a vector of constants to permute MODE for a little-endian
10403 storage operation by swapping the two halves of a vector. */
10404 static rtvec
10405 rs6000_const_vec (machine_mode mode)
10406 {
10407 int i, subparts;
10408 rtvec v;
10409
10410 switch (mode)
10411 {
10412 case E_V1TImode:
10413 subparts = 1;
10414 break;
10415 case E_V2DFmode:
10416 case E_V2DImode:
10417 subparts = 2;
10418 break;
10419 case E_V4SFmode:
10420 case E_V4SImode:
10421 subparts = 4;
10422 break;
10423 case E_V8HImode:
10424 subparts = 8;
10425 break;
10426 case E_V16QImode:
10427 subparts = 16;
10428 break;
10429 default:
10430 gcc_unreachable();
10431 }
10432
10433 v = rtvec_alloc (subparts);
10434
10435 for (i = 0; i < subparts / 2; ++i)
10436 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
10437 for (i = subparts / 2; i < subparts; ++i)
10438 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
10439
10440 return v;
10441 }
10442
10443 /* Emit an lxvd2x, stxvd2x, or xxpermdi instruction for a VSX load or
10444 store operation. */
10445 void
10446 rs6000_emit_le_vsx_permute (rtx dest, rtx source, machine_mode mode)
10447 {
10448 gcc_assert (!altivec_indexed_or_indirect_operand (dest, mode));
10449 gcc_assert (!altivec_indexed_or_indirect_operand (source, mode));
10450
10451 /* Scalar permutations are easier to express in integer modes rather than
10452 floating-point modes, so cast them here. We use V1TImode instead
10453 of TImode to ensure that the values don't go through GPRs. */
10454 if (FLOAT128_VECTOR_P (mode))
10455 {
10456 dest = gen_lowpart (V1TImode, dest);
10457 source = gen_lowpart (V1TImode, source);
10458 mode = V1TImode;
10459 }
10460
10461 /* Use ROTATE instead of VEC_SELECT if the mode contains only a single
10462 scalar. */
10463 if (mode == TImode || mode == V1TImode)
10464 emit_insn (gen_rtx_SET (dest, gen_rtx_ROTATE (mode, source,
10465 GEN_INT (64))));
10466 else
10467 {
10468 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
10469 emit_insn (gen_rtx_SET (dest, gen_rtx_VEC_SELECT (mode, source, par)));
10470 }
10471 }
10472
10473 /* Emit a little-endian load from vector memory location SOURCE to VSX
10474 register DEST in mode MODE. The load is done with two permuting
10475 insn's that represent an lxvd2x and xxpermdi. */
10476 void
10477 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
10478 {
10479 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
10480 V1TImode). */
10481 if (mode == TImode || mode == V1TImode)
10482 {
10483 mode = V2DImode;
10484 dest = gen_lowpart (V2DImode, dest);
10485 source = adjust_address (source, V2DImode, 0);
10486 }
10487
10488 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
10489 rs6000_emit_le_vsx_permute (tmp, source, mode);
10490 rs6000_emit_le_vsx_permute (dest, tmp, mode);
10491 }
10492
10493 /* Emit a little-endian store to vector memory location DEST from VSX
10494 register SOURCE in mode MODE. The store is done with two permuting
10495 insn's that represent an xxpermdi and an stxvd2x. */
10496 void
10497 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
10498 {
10499 /* This should never be called after LRA. */
10500 gcc_assert (can_create_pseudo_p ());
10501
10502 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
10503 V1TImode). */
10504 if (mode == TImode || mode == V1TImode)
10505 {
10506 mode = V2DImode;
10507 dest = adjust_address (dest, V2DImode, 0);
10508 source = gen_lowpart (V2DImode, source);
10509 }
10510
10511 rtx tmp = gen_reg_rtx_and_attrs (source);
10512 rs6000_emit_le_vsx_permute (tmp, source, mode);
10513 rs6000_emit_le_vsx_permute (dest, tmp, mode);
10514 }
10515
10516 /* Emit a sequence representing a little-endian VSX load or store,
10517 moving data from SOURCE to DEST in mode MODE. This is done
10518 separately from rs6000_emit_move to ensure it is called only
10519 during expand. LE VSX loads and stores introduced later are
10520 handled with a split. The expand-time RTL generation allows
10521 us to optimize away redundant pairs of register-permutes. */
10522 void
10523 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
10524 {
10525 gcc_assert (!BYTES_BIG_ENDIAN
10526 && VECTOR_MEM_VSX_P (mode)
10527 && !TARGET_P9_VECTOR
10528 && !gpr_or_gpr_p (dest, source)
10529 && (MEM_P (source) ^ MEM_P (dest)));
10530
10531 if (MEM_P (source))
10532 {
10533 gcc_assert (REG_P (dest) || SUBREG_P (dest));
10534 rs6000_emit_le_vsx_load (dest, source, mode);
10535 }
10536 else
10537 {
10538 if (!REG_P (source))
10539 source = force_reg (mode, source);
10540 rs6000_emit_le_vsx_store (dest, source, mode);
10541 }
10542 }
10543
10544 /* Return whether a SFmode or SImode move can be done without converting one
10545 mode to another. This arrises when we have:
10546
10547 (SUBREG:SF (REG:SI ...))
10548 (SUBREG:SI (REG:SF ...))
10549
10550 and one of the values is in a floating point/vector register, where SFmode
10551 scalars are stored in DFmode format. */
10552
10553 bool
10554 valid_sf_si_move (rtx dest, rtx src, machine_mode mode)
10555 {
10556 if (TARGET_ALLOW_SF_SUBREG)
10557 return true;
10558
10559 if (mode != SFmode && GET_MODE_CLASS (mode) != MODE_INT)
10560 return true;
10561
10562 if (!SUBREG_P (src) || !sf_subreg_operand (src, mode))
10563 return true;
10564
10565 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */
10566 if (SUBREG_P (dest))
10567 {
10568 rtx dest_subreg = SUBREG_REG (dest);
10569 rtx src_subreg = SUBREG_REG (src);
10570 return GET_MODE (dest_subreg) == GET_MODE (src_subreg);
10571 }
10572
10573 return false;
10574 }
10575
10576
10577 /* Helper function to change moves with:
10578
10579 (SUBREG:SF (REG:SI)) and
10580 (SUBREG:SI (REG:SF))
10581
10582 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode
10583 values are stored as DFmode values in the VSX registers. We need to convert
10584 the bits before we can use a direct move or operate on the bits in the
10585 vector register as an integer type.
10586
10587 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */
10588
10589 static bool
10590 rs6000_emit_move_si_sf_subreg (rtx dest, rtx source, machine_mode mode)
10591 {
10592 if (TARGET_DIRECT_MOVE_64BIT && !reload_completed
10593 && (!SUBREG_P (dest) || !sf_subreg_operand (dest, mode))
10594 && SUBREG_P (source) && sf_subreg_operand (source, mode))
10595 {
10596 rtx inner_source = SUBREG_REG (source);
10597 machine_mode inner_mode = GET_MODE (inner_source);
10598
10599 if (mode == SImode && inner_mode == SFmode)
10600 {
10601 emit_insn (gen_movsi_from_sf (dest, inner_source));
10602 return true;
10603 }
10604
10605 if (mode == SFmode && inner_mode == SImode)
10606 {
10607 emit_insn (gen_movsf_from_si (dest, inner_source));
10608 return true;
10609 }
10610 }
10611
10612 return false;
10613 }
10614
10615 /* Emit a move from SOURCE to DEST in mode MODE. */
10616 void
10617 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
10618 {
10619 rtx operands[2];
10620 operands[0] = dest;
10621 operands[1] = source;
10622
10623 if (TARGET_DEBUG_ADDR)
10624 {
10625 fprintf (stderr,
10626 "\nrs6000_emit_move: mode = %s, lra_in_progress = %d, "
10627 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
10628 GET_MODE_NAME (mode),
10629 lra_in_progress,
10630 reload_completed,
10631 can_create_pseudo_p ());
10632 debug_rtx (dest);
10633 fprintf (stderr, "source:\n");
10634 debug_rtx (source);
10635 }
10636
10637 /* Check that we get CONST_WIDE_INT only when we should. */
10638 if (CONST_WIDE_INT_P (operands[1])
10639 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
10640 gcc_unreachable ();
10641
10642 #ifdef HAVE_AS_GNU_ATTRIBUTE
10643 /* If we use a long double type, set the flags in .gnu_attribute that say
10644 what the long double type is. This is to allow the linker's warning
10645 message for the wrong long double to be useful, even if the function does
10646 not do a call (for example, doing a 128-bit add on power9 if the long
10647 double type is IEEE 128-bit. Do not set this if __ibm128 or __floa128 are
10648 used if they aren't the default long dobule type. */
10649 if (rs6000_gnu_attr && (HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT))
10650 {
10651 if (TARGET_LONG_DOUBLE_128 && (mode == TFmode || mode == TCmode))
10652 rs6000_passes_float = rs6000_passes_long_double = true;
10653
10654 else if (!TARGET_LONG_DOUBLE_128 && (mode == DFmode || mode == DCmode))
10655 rs6000_passes_float = rs6000_passes_long_double = true;
10656 }
10657 #endif
10658
10659 /* See if we need to special case SImode/SFmode SUBREG moves. */
10660 if ((mode == SImode || mode == SFmode) && SUBREG_P (source)
10661 && rs6000_emit_move_si_sf_subreg (dest, source, mode))
10662 return;
10663
10664 /* Check if GCC is setting up a block move that will end up using FP
10665 registers as temporaries. We must make sure this is acceptable. */
10666 if (MEM_P (operands[0])
10667 && MEM_P (operands[1])
10668 && mode == DImode
10669 && (rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[0]))
10670 || rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[1])))
10671 && ! (rs6000_slow_unaligned_access (SImode,
10672 (MEM_ALIGN (operands[0]) > 32
10673 ? 32 : MEM_ALIGN (operands[0])))
10674 || rs6000_slow_unaligned_access (SImode,
10675 (MEM_ALIGN (operands[1]) > 32
10676 ? 32 : MEM_ALIGN (operands[1]))))
10677 && ! MEM_VOLATILE_P (operands [0])
10678 && ! MEM_VOLATILE_P (operands [1]))
10679 {
10680 emit_move_insn (adjust_address (operands[0], SImode, 0),
10681 adjust_address (operands[1], SImode, 0));
10682 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
10683 adjust_address (copy_rtx (operands[1]), SImode, 4));
10684 return;
10685 }
10686
10687 if (can_create_pseudo_p () && MEM_P (operands[0])
10688 && !gpc_reg_operand (operands[1], mode))
10689 operands[1] = force_reg (mode, operands[1]);
10690
10691 /* Recognize the case where operand[1] is a reference to thread-local
10692 data and load its address to a register. */
10693 if (tls_referenced_p (operands[1]))
10694 {
10695 enum tls_model model;
10696 rtx tmp = operands[1];
10697 rtx addend = NULL;
10698
10699 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
10700 {
10701 addend = XEXP (XEXP (tmp, 0), 1);
10702 tmp = XEXP (XEXP (tmp, 0), 0);
10703 }
10704
10705 gcc_assert (SYMBOL_REF_P (tmp));
10706 model = SYMBOL_REF_TLS_MODEL (tmp);
10707 gcc_assert (model != 0);
10708
10709 tmp = rs6000_legitimize_tls_address (tmp, model);
10710 if (addend)
10711 {
10712 tmp = gen_rtx_PLUS (mode, tmp, addend);
10713 tmp = force_operand (tmp, operands[0]);
10714 }
10715 operands[1] = tmp;
10716 }
10717
10718 /* 128-bit constant floating-point values on Darwin should really be loaded
10719 as two parts. However, this premature splitting is a problem when DFmode
10720 values can go into Altivec registers. */
10721 if (TARGET_MACHO && CONST_DOUBLE_P (operands[1]) && FLOAT128_IBM_P (mode)
10722 && !reg_addr[DFmode].scalar_in_vmx_p)
10723 {
10724 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
10725 simplify_gen_subreg (DFmode, operands[1], mode, 0),
10726 DFmode);
10727 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
10728 GET_MODE_SIZE (DFmode)),
10729 simplify_gen_subreg (DFmode, operands[1], mode,
10730 GET_MODE_SIZE (DFmode)),
10731 DFmode);
10732 return;
10733 }
10734
10735 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
10736 p1:SD) if p1 is not of floating point class and p0 is spilled as
10737 we can have no analogous movsd_store for this. */
10738 if (lra_in_progress && mode == DDmode
10739 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
10740 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
10741 && SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1]))
10742 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
10743 {
10744 enum reg_class cl;
10745 int regno = REGNO (SUBREG_REG (operands[1]));
10746
10747 if (!HARD_REGISTER_NUM_P (regno))
10748 {
10749 cl = reg_preferred_class (regno);
10750 regno = reg_renumber[regno];
10751 if (regno < 0)
10752 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
10753 }
10754 if (regno >= 0 && ! FP_REGNO_P (regno))
10755 {
10756 mode = SDmode;
10757 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
10758 operands[1] = SUBREG_REG (operands[1]);
10759 }
10760 }
10761 if (lra_in_progress
10762 && mode == SDmode
10763 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
10764 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
10765 && (REG_P (operands[1])
10766 || (SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1])))))
10767 {
10768 int regno = reg_or_subregno (operands[1]);
10769 enum reg_class cl;
10770
10771 if (!HARD_REGISTER_NUM_P (regno))
10772 {
10773 cl = reg_preferred_class (regno);
10774 gcc_assert (cl != NO_REGS);
10775 regno = reg_renumber[regno];
10776 if (regno < 0)
10777 regno = ira_class_hard_regs[cl][0];
10778 }
10779 if (FP_REGNO_P (regno))
10780 {
10781 if (GET_MODE (operands[0]) != DDmode)
10782 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
10783 emit_insn (gen_movsd_store (operands[0], operands[1]));
10784 }
10785 else if (INT_REGNO_P (regno))
10786 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
10787 else
10788 gcc_unreachable();
10789 return;
10790 }
10791 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
10792 p:DD)) if p0 is not of floating point class and p1 is spilled as
10793 we can have no analogous movsd_load for this. */
10794 if (lra_in_progress && mode == DDmode
10795 && SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))
10796 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
10797 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
10798 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
10799 {
10800 enum reg_class cl;
10801 int regno = REGNO (SUBREG_REG (operands[0]));
10802
10803 if (!HARD_REGISTER_NUM_P (regno))
10804 {
10805 cl = reg_preferred_class (regno);
10806 regno = reg_renumber[regno];
10807 if (regno < 0)
10808 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
10809 }
10810 if (regno >= 0 && ! FP_REGNO_P (regno))
10811 {
10812 mode = SDmode;
10813 operands[0] = SUBREG_REG (operands[0]);
10814 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
10815 }
10816 }
10817 if (lra_in_progress
10818 && mode == SDmode
10819 && (REG_P (operands[0])
10820 || (SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))))
10821 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
10822 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
10823 {
10824 int regno = reg_or_subregno (operands[0]);
10825 enum reg_class cl;
10826
10827 if (!HARD_REGISTER_NUM_P (regno))
10828 {
10829 cl = reg_preferred_class (regno);
10830 gcc_assert (cl != NO_REGS);
10831 regno = reg_renumber[regno];
10832 if (regno < 0)
10833 regno = ira_class_hard_regs[cl][0];
10834 }
10835 if (FP_REGNO_P (regno))
10836 {
10837 if (GET_MODE (operands[1]) != DDmode)
10838 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
10839 emit_insn (gen_movsd_load (operands[0], operands[1]));
10840 }
10841 else if (INT_REGNO_P (regno))
10842 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
10843 else
10844 gcc_unreachable();
10845 return;
10846 }
10847
10848 /* FIXME: In the long term, this switch statement should go away
10849 and be replaced by a sequence of tests based on things like
10850 mode == Pmode. */
10851 switch (mode)
10852 {
10853 case E_HImode:
10854 case E_QImode:
10855 if (CONSTANT_P (operands[1])
10856 && !CONST_INT_P (operands[1]))
10857 operands[1] = force_const_mem (mode, operands[1]);
10858 break;
10859
10860 case E_TFmode:
10861 case E_TDmode:
10862 case E_IFmode:
10863 case E_KFmode:
10864 if (FLOAT128_2REG_P (mode))
10865 rs6000_eliminate_indexed_memrefs (operands);
10866 /* fall through */
10867
10868 case E_DFmode:
10869 case E_DDmode:
10870 case E_SFmode:
10871 case E_SDmode:
10872 if (CONSTANT_P (operands[1])
10873 && ! easy_fp_constant (operands[1], mode))
10874 operands[1] = force_const_mem (mode, operands[1]);
10875 break;
10876
10877 case E_V16QImode:
10878 case E_V8HImode:
10879 case E_V4SFmode:
10880 case E_V4SImode:
10881 case E_V2DFmode:
10882 case E_V2DImode:
10883 case E_V1TImode:
10884 if (CONSTANT_P (operands[1])
10885 && !easy_vector_constant (operands[1], mode))
10886 operands[1] = force_const_mem (mode, operands[1]);
10887 break;
10888
10889 case E_OOmode:
10890 case E_XOmode:
10891 if (CONST_INT_P (operands[1]) && INTVAL (operands[1]) != 0)
10892 error ("%qs is an opaque type, and you cannot set it to other values",
10893 (mode == OOmode) ? "__vector_pair" : "__vector_quad");
10894 break;
10895
10896 case E_SImode:
10897 case E_DImode:
10898 /* Use default pattern for address of ELF small data */
10899 if (TARGET_ELF
10900 && mode == Pmode
10901 && DEFAULT_ABI == ABI_V4
10902 && (SYMBOL_REF_P (operands[1])
10903 || GET_CODE (operands[1]) == CONST)
10904 && small_data_operand (operands[1], mode))
10905 {
10906 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10907 return;
10908 }
10909
10910 /* Use the default pattern for loading up PC-relative addresses. */
10911 if (TARGET_PCREL && mode == Pmode
10912 && pcrel_local_or_external_address (operands[1], Pmode))
10913 {
10914 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10915 return;
10916 }
10917
10918 if (DEFAULT_ABI == ABI_V4
10919 && mode == Pmode && mode == SImode
10920 && flag_pic == 1 && got_operand (operands[1], mode))
10921 {
10922 emit_insn (gen_movsi_got (operands[0], operands[1]));
10923 return;
10924 }
10925
10926 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
10927 && TARGET_NO_TOC_OR_PCREL
10928 && ! flag_pic
10929 && mode == Pmode
10930 && CONSTANT_P (operands[1])
10931 && GET_CODE (operands[1]) != HIGH
10932 && !CONST_INT_P (operands[1]))
10933 {
10934 rtx target = (!can_create_pseudo_p ()
10935 ? operands[0]
10936 : gen_reg_rtx (mode));
10937
10938 /* If this is a function address on -mcall-aixdesc,
10939 convert it to the address of the descriptor. */
10940 if (DEFAULT_ABI == ABI_AIX
10941 && SYMBOL_REF_P (operands[1])
10942 && XSTR (operands[1], 0)[0] == '.')
10943 {
10944 const char *name = XSTR (operands[1], 0);
10945 rtx new_ref;
10946 while (*name == '.')
10947 name++;
10948 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
10949 CONSTANT_POOL_ADDRESS_P (new_ref)
10950 = CONSTANT_POOL_ADDRESS_P (operands[1]);
10951 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
10952 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
10953 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
10954 operands[1] = new_ref;
10955 }
10956
10957 if (DEFAULT_ABI == ABI_DARWIN)
10958 {
10959 #if TARGET_MACHO
10960 /* This is not PIC code, but could require the subset of
10961 indirections used by mdynamic-no-pic. */
10962 if (MACHO_DYNAMIC_NO_PIC_P)
10963 {
10964 /* Take care of any required data indirection. */
10965 operands[1] = rs6000_machopic_legitimize_pic_address (
10966 operands[1], mode, operands[0]);
10967 if (operands[0] != operands[1])
10968 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10969 return;
10970 }
10971 #endif
10972 emit_insn (gen_macho_high (Pmode, target, operands[1]));
10973 emit_insn (gen_macho_low (Pmode, operands[0],
10974 target, operands[1]));
10975 return;
10976 }
10977
10978 emit_insn (gen_elf_high (target, operands[1]));
10979 emit_insn (gen_elf_low (operands[0], target, operands[1]));
10980 return;
10981 }
10982
10983 /* If this is a SYMBOL_REF that refers to a constant pool entry,
10984 and we have put it in the TOC, we just need to make a TOC-relative
10985 reference to it. */
10986 if (TARGET_TOC
10987 && SYMBOL_REF_P (operands[1])
10988 && use_toc_relative_ref (operands[1], mode))
10989 operands[1] = create_TOC_reference (operands[1], operands[0]);
10990 else if (mode == Pmode
10991 && CONSTANT_P (operands[1])
10992 && GET_CODE (operands[1]) != HIGH
10993 && ((REG_P (operands[0])
10994 && FP_REGNO_P (REGNO (operands[0])))
10995 || !CONST_INT_P (operands[1])
10996 || (num_insns_constant (operands[1], mode)
10997 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
10998 && !toc_relative_expr_p (operands[1], false, NULL, NULL)
10999 && (TARGET_CMODEL == CMODEL_SMALL
11000 || can_create_pseudo_p ()
11001 || (REG_P (operands[0])
11002 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
11003 {
11004
11005 #if TARGET_MACHO
11006 /* Darwin uses a special PIC legitimizer. */
11007 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
11008 {
11009 operands[1] =
11010 rs6000_machopic_legitimize_pic_address (operands[1], mode,
11011 operands[0]);
11012 if (operands[0] != operands[1])
11013 emit_insn (gen_rtx_SET (operands[0], operands[1]));
11014 return;
11015 }
11016 #endif
11017
11018 /* If we are to limit the number of things we put in the TOC and
11019 this is a symbol plus a constant we can add in one insn,
11020 just put the symbol in the TOC and add the constant. */
11021 if (GET_CODE (operands[1]) == CONST
11022 && TARGET_NO_SUM_IN_TOC
11023 && GET_CODE (XEXP (operands[1], 0)) == PLUS
11024 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
11025 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
11026 || SYMBOL_REF_P (XEXP (XEXP (operands[1], 0), 0)))
11027 && ! side_effects_p (operands[0]))
11028 {
11029 rtx sym =
11030 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
11031 rtx other = XEXP (XEXP (operands[1], 0), 1);
11032
11033 sym = force_reg (mode, sym);
11034 emit_insn (gen_add3_insn (operands[0], sym, other));
11035 return;
11036 }
11037
11038 operands[1] = force_const_mem (mode, operands[1]);
11039
11040 if (TARGET_TOC
11041 && SYMBOL_REF_P (XEXP (operands[1], 0))
11042 && use_toc_relative_ref (XEXP (operands[1], 0), mode))
11043 {
11044 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
11045 operands[0]);
11046 operands[1] = gen_const_mem (mode, tocref);
11047 set_mem_alias_set (operands[1], get_TOC_alias_set ());
11048 }
11049 }
11050 break;
11051
11052 case E_TImode:
11053 if (!VECTOR_MEM_VSX_P (TImode))
11054 rs6000_eliminate_indexed_memrefs (operands);
11055 break;
11056
11057 case E_PTImode:
11058 rs6000_eliminate_indexed_memrefs (operands);
11059 break;
11060
11061 default:
11062 fatal_insn ("bad move", gen_rtx_SET (dest, source));
11063 }
11064
11065 /* Above, we may have called force_const_mem which may have returned
11066 an invalid address. If we can, fix this up; otherwise, reload will
11067 have to deal with it. */
11068 if (MEM_P (operands[1]))
11069 operands[1] = validize_mem (operands[1]);
11070
11071 emit_insn (gen_rtx_SET (operands[0], operands[1]));
11072 }
11073 \f
11074
11075 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
11076 static void
11077 init_float128_ibm (machine_mode mode)
11078 {
11079 if (!TARGET_XL_COMPAT)
11080 {
11081 set_optab_libfunc (add_optab, mode, "__gcc_qadd");
11082 set_optab_libfunc (sub_optab, mode, "__gcc_qsub");
11083 set_optab_libfunc (smul_optab, mode, "__gcc_qmul");
11084 set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv");
11085
11086 if (!TARGET_HARD_FLOAT)
11087 {
11088 set_optab_libfunc (neg_optab, mode, "__gcc_qneg");
11089 set_optab_libfunc (eq_optab, mode, "__gcc_qeq");
11090 set_optab_libfunc (ne_optab, mode, "__gcc_qne");
11091 set_optab_libfunc (gt_optab, mode, "__gcc_qgt");
11092 set_optab_libfunc (ge_optab, mode, "__gcc_qge");
11093 set_optab_libfunc (lt_optab, mode, "__gcc_qlt");
11094 set_optab_libfunc (le_optab, mode, "__gcc_qle");
11095 set_optab_libfunc (unord_optab, mode, "__gcc_qunord");
11096
11097 set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq");
11098 set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq");
11099 set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos");
11100 set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod");
11101 set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi");
11102 set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou");
11103 set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq");
11104 set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq");
11105 }
11106 }
11107 else
11108 {
11109 set_optab_libfunc (add_optab, mode, "_xlqadd");
11110 set_optab_libfunc (sub_optab, mode, "_xlqsub");
11111 set_optab_libfunc (smul_optab, mode, "_xlqmul");
11112 set_optab_libfunc (sdiv_optab, mode, "_xlqdiv");
11113 }
11114
11115 /* Add various conversions for IFmode to use the traditional TFmode
11116 names. */
11117 if (mode == IFmode)
11118 {
11119 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf");
11120 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf");
11121 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdtf");
11122 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd");
11123 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd");
11124 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtftd");
11125
11126 if (TARGET_POWERPC64)
11127 {
11128 set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti");
11129 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti");
11130 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf");
11131 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf");
11132 }
11133 }
11134 }
11135
11136 /* Create a decl for either complex long double multiply or complex long double
11137 divide when long double is IEEE 128-bit floating point. We can't use
11138 __multc3 and __divtc3 because the original long double using IBM extended
11139 double used those names. The complex multiply/divide functions are encoded
11140 as builtin functions with a complex result and 4 scalar inputs. */
11141
11142 static void
11143 create_complex_muldiv (const char *name, built_in_function fncode, tree fntype)
11144 {
11145 tree fndecl = add_builtin_function (name, fntype, fncode, BUILT_IN_NORMAL,
11146 name, NULL_TREE);
11147
11148 set_builtin_decl (fncode, fndecl, true);
11149
11150 if (TARGET_DEBUG_BUILTIN)
11151 fprintf (stderr, "create complex %s, fncode: %d\n", name, (int) fncode);
11152
11153 return;
11154 }
11155
11156 /* Set up IEEE 128-bit floating point routines. Use different names if the
11157 arguments can be passed in a vector register. The historical PowerPC
11158 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
11159 continue to use that if we aren't using vector registers to pass IEEE
11160 128-bit floating point. */
11161
11162 static void
11163 init_float128_ieee (machine_mode mode)
11164 {
11165 if (FLOAT128_VECTOR_P (mode))
11166 {
11167 static bool complex_muldiv_init_p = false;
11168
11169 /* Set up to call __mulkc3 and __divkc3 under -mabi=ieeelongdouble. If
11170 we have clone or target attributes, this will be called a second
11171 time. We want to create the built-in function only once. */
11172 if (mode == TFmode && TARGET_IEEEQUAD && !complex_muldiv_init_p)
11173 {
11174 complex_muldiv_init_p = true;
11175 built_in_function fncode_mul =
11176 (built_in_function) (BUILT_IN_COMPLEX_MUL_MIN + TCmode
11177 - MIN_MODE_COMPLEX_FLOAT);
11178 built_in_function fncode_div =
11179 (built_in_function) (BUILT_IN_COMPLEX_DIV_MIN + TCmode
11180 - MIN_MODE_COMPLEX_FLOAT);
11181
11182 tree fntype = build_function_type_list (complex_long_double_type_node,
11183 long_double_type_node,
11184 long_double_type_node,
11185 long_double_type_node,
11186 long_double_type_node,
11187 NULL_TREE);
11188
11189 create_complex_muldiv ("__mulkc3", fncode_mul, fntype);
11190 create_complex_muldiv ("__divkc3", fncode_div, fntype);
11191 }
11192
11193 set_optab_libfunc (add_optab, mode, "__addkf3");
11194 set_optab_libfunc (sub_optab, mode, "__subkf3");
11195 set_optab_libfunc (neg_optab, mode, "__negkf2");
11196 set_optab_libfunc (smul_optab, mode, "__mulkf3");
11197 set_optab_libfunc (sdiv_optab, mode, "__divkf3");
11198 set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2");
11199 set_optab_libfunc (abs_optab, mode, "__abskf2");
11200 set_optab_libfunc (powi_optab, mode, "__powikf2");
11201
11202 set_optab_libfunc (eq_optab, mode, "__eqkf2");
11203 set_optab_libfunc (ne_optab, mode, "__nekf2");
11204 set_optab_libfunc (gt_optab, mode, "__gtkf2");
11205 set_optab_libfunc (ge_optab, mode, "__gekf2");
11206 set_optab_libfunc (lt_optab, mode, "__ltkf2");
11207 set_optab_libfunc (le_optab, mode, "__lekf2");
11208 set_optab_libfunc (unord_optab, mode, "__unordkf2");
11209
11210 set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2");
11211 set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2");
11212 set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2");
11213 set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2");
11214
11215 set_conv_libfunc (sext_optab, mode, IFmode, "__trunctfkf2");
11216 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
11217 set_conv_libfunc (sext_optab, mode, TFmode, "__trunctfkf2");
11218
11219 set_conv_libfunc (trunc_optab, IFmode, mode, "__extendkftf2");
11220 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
11221 set_conv_libfunc (trunc_optab, TFmode, mode, "__extendkftf2");
11222
11223 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf");
11224 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf");
11225 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdkf");
11226 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd");
11227 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd");
11228 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendkftd");
11229
11230 set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi");
11231 set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi");
11232 set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi");
11233 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi");
11234
11235 set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf");
11236 set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf");
11237 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf");
11238 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf");
11239
11240 if (TARGET_POWERPC64)
11241 {
11242 set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti_sw");
11243 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti_sw");
11244 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf_sw");
11245 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf_sw");
11246 }
11247 }
11248
11249 else
11250 {
11251 set_optab_libfunc (add_optab, mode, "_q_add");
11252 set_optab_libfunc (sub_optab, mode, "_q_sub");
11253 set_optab_libfunc (neg_optab, mode, "_q_neg");
11254 set_optab_libfunc (smul_optab, mode, "_q_mul");
11255 set_optab_libfunc (sdiv_optab, mode, "_q_div");
11256 if (TARGET_PPC_GPOPT)
11257 set_optab_libfunc (sqrt_optab, mode, "_q_sqrt");
11258
11259 set_optab_libfunc (eq_optab, mode, "_q_feq");
11260 set_optab_libfunc (ne_optab, mode, "_q_fne");
11261 set_optab_libfunc (gt_optab, mode, "_q_fgt");
11262 set_optab_libfunc (ge_optab, mode, "_q_fge");
11263 set_optab_libfunc (lt_optab, mode, "_q_flt");
11264 set_optab_libfunc (le_optab, mode, "_q_fle");
11265
11266 set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq");
11267 set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq");
11268 set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos");
11269 set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod");
11270 set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi");
11271 set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou");
11272 set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq");
11273 set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq");
11274 }
11275 }
11276
11277 static void
11278 rs6000_init_libfuncs (void)
11279 {
11280 /* __float128 support. */
11281 if (TARGET_FLOAT128_TYPE)
11282 {
11283 init_float128_ibm (IFmode);
11284 init_float128_ieee (KFmode);
11285 }
11286
11287 /* AIX/Darwin/64-bit Linux quad floating point routines. */
11288 if (TARGET_LONG_DOUBLE_128)
11289 {
11290 if (!TARGET_IEEEQUAD)
11291 init_float128_ibm (TFmode);
11292
11293 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
11294 else
11295 init_float128_ieee (TFmode);
11296 }
11297 }
11298
11299 /* Emit a potentially record-form instruction, setting DST from SRC.
11300 If DOT is 0, that is all; otherwise, set CCREG to the result of the
11301 signed comparison of DST with zero. If DOT is 1, the generated RTL
11302 doesn't care about the DST result; if DOT is 2, it does. If CCREG
11303 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
11304 a separate COMPARE. */
11305
11306 void
11307 rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
11308 {
11309 if (dot == 0)
11310 {
11311 emit_move_insn (dst, src);
11312 return;
11313 }
11314
11315 if (cc_reg_not_cr0_operand (ccreg, CCmode))
11316 {
11317 emit_move_insn (dst, src);
11318 emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx));
11319 return;
11320 }
11321
11322 rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx));
11323 if (dot == 1)
11324 {
11325 rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst);
11326 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber)));
11327 }
11328 else
11329 {
11330 rtx set = gen_rtx_SET (dst, src);
11331 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set)));
11332 }
11333 }
11334
11335 \f
11336 /* A validation routine: say whether CODE, a condition code, and MODE
11337 match. The other alternatives either don't make sense or should
11338 never be generated. */
11339
11340 void
11341 validate_condition_mode (enum rtx_code code, machine_mode mode)
11342 {
11343 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
11344 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
11345 && GET_MODE_CLASS (mode) == MODE_CC);
11346
11347 /* These don't make sense. */
11348 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
11349 || mode != CCUNSmode);
11350
11351 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
11352 || mode == CCUNSmode);
11353
11354 gcc_assert (mode == CCFPmode
11355 || (code != ORDERED && code != UNORDERED
11356 && code != UNEQ && code != LTGT
11357 && code != UNGT && code != UNLT
11358 && code != UNGE && code != UNLE));
11359
11360 /* These are invalid; the information is not there. */
11361 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
11362 }
11363
11364 \f
11365 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
11366 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
11367 not zero, store there the bit offset (counted from the right) where
11368 the single stretch of 1 bits begins; and similarly for B, the bit
11369 offset where it ends. */
11370
11371 bool
11372 rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
11373 {
11374 unsigned HOST_WIDE_INT val = INTVAL (mask);
11375 unsigned HOST_WIDE_INT bit;
11376 int nb, ne;
11377 int n = GET_MODE_PRECISION (mode);
11378
11379 if (mode != DImode && mode != SImode)
11380 return false;
11381
11382 if (INTVAL (mask) >= 0)
11383 {
11384 bit = val & -val;
11385 ne = exact_log2 (bit);
11386 nb = exact_log2 (val + bit);
11387 }
11388 else if (val + 1 == 0)
11389 {
11390 nb = n;
11391 ne = 0;
11392 }
11393 else if (val & 1)
11394 {
11395 val = ~val;
11396 bit = val & -val;
11397 nb = exact_log2 (bit);
11398 ne = exact_log2 (val + bit);
11399 }
11400 else
11401 {
11402 bit = val & -val;
11403 ne = exact_log2 (bit);
11404 if (val + bit == 0)
11405 nb = n;
11406 else
11407 nb = 0;
11408 }
11409
11410 nb--;
11411
11412 if (nb < 0 || ne < 0 || nb >= n || ne >= n)
11413 return false;
11414
11415 if (b)
11416 *b = nb;
11417 if (e)
11418 *e = ne;
11419
11420 return true;
11421 }
11422
11423 bool
11424 rs6000_is_valid_rotate_dot_mask (rtx mask, machine_mode mode)
11425 {
11426 int nb, ne;
11427 return rs6000_is_valid_mask (mask, &nb, &ne, mode) && nb >= ne && ne > 0;
11428 }
11429
11430 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
11431 or rldicr instruction, to implement an AND with it in mode MODE. */
11432
11433 bool
11434 rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
11435 {
11436 int nb, ne;
11437
11438 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
11439 return false;
11440
11441 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
11442 does not wrap. */
11443 if (mode == DImode)
11444 return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
11445
11446 /* For SImode, rlwinm can do everything. */
11447 if (mode == SImode)
11448 return (nb < 32 && ne < 32);
11449
11450 return false;
11451 }
11452
11453 /* Return the instruction template for an AND with mask in mode MODE, with
11454 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11455
11456 const char *
11457 rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
11458 {
11459 int nb, ne;
11460
11461 if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
11462 gcc_unreachable ();
11463
11464 if (mode == DImode && ne == 0)
11465 {
11466 operands[3] = GEN_INT (63 - nb);
11467 if (dot)
11468 return "rldicl. %0,%1,0,%3";
11469 return "rldicl %0,%1,0,%3";
11470 }
11471
11472 if (mode == DImode && nb == 63)
11473 {
11474 operands[3] = GEN_INT (63 - ne);
11475 if (dot)
11476 return "rldicr. %0,%1,0,%3";
11477 return "rldicr %0,%1,0,%3";
11478 }
11479
11480 if (nb < 32 && ne < 32)
11481 {
11482 operands[3] = GEN_INT (31 - nb);
11483 operands[4] = GEN_INT (31 - ne);
11484 if (dot)
11485 return "rlwinm. %0,%1,0,%3,%4";
11486 return "rlwinm %0,%1,0,%3,%4";
11487 }
11488
11489 gcc_unreachable ();
11490 }
11491
11492 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
11493 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
11494 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
11495
11496 bool
11497 rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
11498 {
11499 int nb, ne;
11500
11501 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
11502 return false;
11503
11504 int n = GET_MODE_PRECISION (mode);
11505 int sh = -1;
11506
11507 if (CONST_INT_P (XEXP (shift, 1)))
11508 {
11509 sh = INTVAL (XEXP (shift, 1));
11510 if (sh < 0 || sh >= n)
11511 return false;
11512 }
11513
11514 rtx_code code = GET_CODE (shift);
11515
11516 /* Convert any shift by 0 to a rotate, to simplify below code. */
11517 if (sh == 0)
11518 code = ROTATE;
11519
11520 /* Convert rotate to simple shift if we can, to make analysis simpler. */
11521 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
11522 code = ASHIFT;
11523 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
11524 {
11525 code = LSHIFTRT;
11526 sh = n - sh;
11527 }
11528
11529 /* DImode rotates need rld*. */
11530 if (mode == DImode && code == ROTATE)
11531 return (nb == 63 || ne == 0 || ne == sh);
11532
11533 /* SImode rotates need rlw*. */
11534 if (mode == SImode && code == ROTATE)
11535 return (nb < 32 && ne < 32 && sh < 32);
11536
11537 /* Wrap-around masks are only okay for rotates. */
11538 if (ne > nb)
11539 return false;
11540
11541 /* Variable shifts are only okay for rotates. */
11542 if (sh < 0)
11543 return false;
11544
11545 /* Don't allow ASHIFT if the mask is wrong for that. */
11546 if (code == ASHIFT && ne < sh)
11547 return false;
11548
11549 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
11550 if the mask is wrong for that. */
11551 if (nb < 32 && ne < 32 && sh < 32
11552 && !(code == LSHIFTRT && nb >= 32 - sh))
11553 return true;
11554
11555 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
11556 if the mask is wrong for that. */
11557 if (code == LSHIFTRT)
11558 sh = 64 - sh;
11559 if (nb == 63 || ne == 0 || ne == sh)
11560 return !(code == LSHIFTRT && nb >= sh);
11561
11562 return false;
11563 }
11564
11565 /* Return the instruction template for a shift with mask in mode MODE, with
11566 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11567
11568 const char *
11569 rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
11570 {
11571 int nb, ne;
11572
11573 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
11574 gcc_unreachable ();
11575
11576 if (mode == DImode && ne == 0)
11577 {
11578 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
11579 operands[2] = GEN_INT (64 - INTVAL (operands[2]));
11580 operands[3] = GEN_INT (63 - nb);
11581 if (dot)
11582 return "rld%I2cl. %0,%1,%2,%3";
11583 return "rld%I2cl %0,%1,%2,%3";
11584 }
11585
11586 if (mode == DImode && nb == 63)
11587 {
11588 operands[3] = GEN_INT (63 - ne);
11589 if (dot)
11590 return "rld%I2cr. %0,%1,%2,%3";
11591 return "rld%I2cr %0,%1,%2,%3";
11592 }
11593
11594 if (mode == DImode
11595 && GET_CODE (operands[4]) != LSHIFTRT
11596 && CONST_INT_P (operands[2])
11597 && ne == INTVAL (operands[2]))
11598 {
11599 operands[3] = GEN_INT (63 - nb);
11600 if (dot)
11601 return "rld%I2c. %0,%1,%2,%3";
11602 return "rld%I2c %0,%1,%2,%3";
11603 }
11604
11605 if (nb < 32 && ne < 32)
11606 {
11607 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
11608 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
11609 operands[3] = GEN_INT (31 - nb);
11610 operands[4] = GEN_INT (31 - ne);
11611 /* This insn can also be a 64-bit rotate with mask that really makes
11612 it just a shift right (with mask); the %h below are to adjust for
11613 that situation (shift count is >= 32 in that case). */
11614 if (dot)
11615 return "rlw%I2nm. %0,%1,%h2,%3,%4";
11616 return "rlw%I2nm %0,%1,%h2,%3,%4";
11617 }
11618
11619 gcc_unreachable ();
11620 }
11621
11622 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
11623 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
11624 ASHIFT, or LSHIFTRT) in mode MODE. */
11625
11626 bool
11627 rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
11628 {
11629 int nb, ne;
11630
11631 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
11632 return false;
11633
11634 int n = GET_MODE_PRECISION (mode);
11635
11636 int sh = INTVAL (XEXP (shift, 1));
11637 if (sh < 0 || sh >= n)
11638 return false;
11639
11640 rtx_code code = GET_CODE (shift);
11641
11642 /* Convert any shift by 0 to a rotate, to simplify below code. */
11643 if (sh == 0)
11644 code = ROTATE;
11645
11646 /* Convert rotate to simple shift if we can, to make analysis simpler. */
11647 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
11648 code = ASHIFT;
11649 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
11650 {
11651 code = LSHIFTRT;
11652 sh = n - sh;
11653 }
11654
11655 /* DImode rotates need rldimi. */
11656 if (mode == DImode && code == ROTATE)
11657 return (ne == sh);
11658
11659 /* SImode rotates need rlwimi. */
11660 if (mode == SImode && code == ROTATE)
11661 return (nb < 32 && ne < 32 && sh < 32);
11662
11663 /* Wrap-around masks are only okay for rotates. */
11664 if (ne > nb)
11665 return false;
11666
11667 /* Don't allow ASHIFT if the mask is wrong for that. */
11668 if (code == ASHIFT && ne < sh)
11669 return false;
11670
11671 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
11672 if the mask is wrong for that. */
11673 if (nb < 32 && ne < 32 && sh < 32
11674 && !(code == LSHIFTRT && nb >= 32 - sh))
11675 return true;
11676
11677 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
11678 if the mask is wrong for that. */
11679 if (code == LSHIFTRT)
11680 sh = 64 - sh;
11681 if (ne == sh)
11682 return !(code == LSHIFTRT && nb >= sh);
11683
11684 return false;
11685 }
11686
11687 /* Return the instruction template for an insert with mask in mode MODE, with
11688 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11689
11690 const char *
11691 rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
11692 {
11693 int nb, ne;
11694
11695 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
11696 gcc_unreachable ();
11697
11698 /* Prefer rldimi because rlwimi is cracked. */
11699 if (TARGET_POWERPC64
11700 && (!dot || mode == DImode)
11701 && GET_CODE (operands[4]) != LSHIFTRT
11702 && ne == INTVAL (operands[2]))
11703 {
11704 operands[3] = GEN_INT (63 - nb);
11705 if (dot)
11706 return "rldimi. %0,%1,%2,%3";
11707 return "rldimi %0,%1,%2,%3";
11708 }
11709
11710 if (nb < 32 && ne < 32)
11711 {
11712 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
11713 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
11714 operands[3] = GEN_INT (31 - nb);
11715 operands[4] = GEN_INT (31 - ne);
11716 if (dot)
11717 return "rlwimi. %0,%1,%2,%3,%4";
11718 return "rlwimi %0,%1,%2,%3,%4";
11719 }
11720
11721 gcc_unreachable ();
11722 }
11723
11724 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
11725 using two machine instructions. */
11726
11727 bool
11728 rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
11729 {
11730 /* There are two kinds of AND we can handle with two insns:
11731 1) those we can do with two rl* insn;
11732 2) ori[s];xori[s].
11733
11734 We do not handle that last case yet. */
11735
11736 /* If there is just one stretch of ones, we can do it. */
11737 if (rs6000_is_valid_mask (c, NULL, NULL, mode))
11738 return true;
11739
11740 /* Otherwise, fill in the lowest "hole"; if we can do the result with
11741 one insn, we can do the whole thing with two. */
11742 unsigned HOST_WIDE_INT val = INTVAL (c);
11743 unsigned HOST_WIDE_INT bit1 = val & -val;
11744 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
11745 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
11746 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
11747 return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
11748 }
11749
11750 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
11751 If EXPAND is true, split rotate-and-mask instructions we generate to
11752 their constituent parts as well (this is used during expand); if DOT
11753 is 1, make the last insn a record-form instruction clobbering the
11754 destination GPR and setting the CC reg (from operands[3]); if 2, set
11755 that GPR as well as the CC reg. */
11756
11757 void
11758 rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot)
11759 {
11760 gcc_assert (!(expand && dot));
11761
11762 unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
11763
11764 /* If it is one stretch of ones, it is DImode; shift left, mask, then
11765 shift right. This generates better code than doing the masks without
11766 shifts, or shifting first right and then left. */
11767 int nb, ne;
11768 if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
11769 {
11770 gcc_assert (mode == DImode);
11771
11772 int shift = 63 - nb;
11773 if (expand)
11774 {
11775 rtx tmp1 = gen_reg_rtx (DImode);
11776 rtx tmp2 = gen_reg_rtx (DImode);
11777 emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
11778 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
11779 emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
11780 }
11781 else
11782 {
11783 rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
11784 tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
11785 emit_move_insn (operands[0], tmp);
11786 tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift));
11787 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
11788 }
11789 return;
11790 }
11791
11792 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
11793 that does the rest. */
11794 unsigned HOST_WIDE_INT bit1 = val & -val;
11795 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
11796 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
11797 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
11798
11799 unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
11800 unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
11801
11802 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
11803
11804 /* Two "no-rotate"-and-mask instructions, for SImode. */
11805 if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
11806 {
11807 gcc_assert (mode == SImode);
11808
11809 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
11810 rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
11811 emit_move_insn (reg, tmp);
11812 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
11813 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
11814 return;
11815 }
11816
11817 gcc_assert (mode == DImode);
11818
11819 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
11820 insns; we have to do the first in SImode, because it wraps. */
11821 if (mask2 <= 0xffffffff
11822 && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
11823 {
11824 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
11825 rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
11826 GEN_INT (mask1));
11827 rtx reg_low = gen_lowpart (SImode, reg);
11828 emit_move_insn (reg_low, tmp);
11829 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
11830 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
11831 return;
11832 }
11833
11834 /* Two rld* insns: rotate, clear the hole in the middle (which now is
11835 at the top end), rotate back and clear the other hole. */
11836 int right = exact_log2 (bit3);
11837 int left = 64 - right;
11838
11839 /* Rotate the mask too. */
11840 mask1 = (mask1 >> right) | ((bit2 - 1) << left);
11841
11842 if (expand)
11843 {
11844 rtx tmp1 = gen_reg_rtx (DImode);
11845 rtx tmp2 = gen_reg_rtx (DImode);
11846 rtx tmp3 = gen_reg_rtx (DImode);
11847 emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
11848 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
11849 emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
11850 emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
11851 }
11852 else
11853 {
11854 rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
11855 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
11856 emit_move_insn (operands[0], tmp);
11857 tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
11858 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
11859 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
11860 }
11861 }
11862 \f
11863 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
11864 for lfq and stfq insns iff the registers are hard registers. */
11865
11866 int
11867 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
11868 {
11869 /* We might have been passed a SUBREG. */
11870 if (!REG_P (reg1) || !REG_P (reg2))
11871 return 0;
11872
11873 /* We might have been passed non floating point registers. */
11874 if (!FP_REGNO_P (REGNO (reg1))
11875 || !FP_REGNO_P (REGNO (reg2)))
11876 return 0;
11877
11878 return (REGNO (reg1) == REGNO (reg2) - 1);
11879 }
11880
11881 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
11882 addr1 and addr2 must be in consecutive memory locations
11883 (addr2 == addr1 + 8). */
11884
11885 int
11886 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
11887 {
11888 rtx addr1, addr2;
11889 unsigned int reg1, reg2;
11890 int offset1, offset2;
11891
11892 /* The mems cannot be volatile. */
11893 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
11894 return 0;
11895
11896 addr1 = XEXP (mem1, 0);
11897 addr2 = XEXP (mem2, 0);
11898
11899 /* Extract an offset (if used) from the first addr. */
11900 if (GET_CODE (addr1) == PLUS)
11901 {
11902 /* If not a REG, return zero. */
11903 if (!REG_P (XEXP (addr1, 0)))
11904 return 0;
11905 else
11906 {
11907 reg1 = REGNO (XEXP (addr1, 0));
11908 /* The offset must be constant! */
11909 if (!CONST_INT_P (XEXP (addr1, 1)))
11910 return 0;
11911 offset1 = INTVAL (XEXP (addr1, 1));
11912 }
11913 }
11914 else if (!REG_P (addr1))
11915 return 0;
11916 else
11917 {
11918 reg1 = REGNO (addr1);
11919 /* This was a simple (mem (reg)) expression. Offset is 0. */
11920 offset1 = 0;
11921 }
11922
11923 /* And now for the second addr. */
11924 if (GET_CODE (addr2) == PLUS)
11925 {
11926 /* If not a REG, return zero. */
11927 if (!REG_P (XEXP (addr2, 0)))
11928 return 0;
11929 else
11930 {
11931 reg2 = REGNO (XEXP (addr2, 0));
11932 /* The offset must be constant. */
11933 if (!CONST_INT_P (XEXP (addr2, 1)))
11934 return 0;
11935 offset2 = INTVAL (XEXP (addr2, 1));
11936 }
11937 }
11938 else if (!REG_P (addr2))
11939 return 0;
11940 else
11941 {
11942 reg2 = REGNO (addr2);
11943 /* This was a simple (mem (reg)) expression. Offset is 0. */
11944 offset2 = 0;
11945 }
11946
11947 /* Both of these must have the same base register. */
11948 if (reg1 != reg2)
11949 return 0;
11950
11951 /* The offset for the second addr must be 8 more than the first addr. */
11952 if (offset2 != offset1 + 8)
11953 return 0;
11954
11955 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
11956 instructions. */
11957 return 1;
11958 }
11959 \f
11960 /* Implement TARGET_SECONDARY_RELOAD_NEEDED_MODE. For SDmode values we
11961 need to use DDmode, in all other cases we can use the same mode. */
11962 static machine_mode
11963 rs6000_secondary_memory_needed_mode (machine_mode mode)
11964 {
11965 if (lra_in_progress && mode == SDmode)
11966 return DDmode;
11967 return mode;
11968 }
11969
11970 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
11971 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
11972 only work on the traditional altivec registers, note if an altivec register
11973 was chosen. */
11974
11975 static enum rs6000_reg_type
11976 register_to_reg_type (rtx reg, bool *is_altivec)
11977 {
11978 HOST_WIDE_INT regno;
11979 enum reg_class rclass;
11980
11981 if (SUBREG_P (reg))
11982 reg = SUBREG_REG (reg);
11983
11984 if (!REG_P (reg))
11985 return NO_REG_TYPE;
11986
11987 regno = REGNO (reg);
11988 if (!HARD_REGISTER_NUM_P (regno))
11989 {
11990 if (!lra_in_progress && !reload_completed)
11991 return PSEUDO_REG_TYPE;
11992
11993 regno = true_regnum (reg);
11994 if (regno < 0 || !HARD_REGISTER_NUM_P (regno))
11995 return PSEUDO_REG_TYPE;
11996 }
11997
11998 gcc_assert (regno >= 0);
11999
12000 if (is_altivec && ALTIVEC_REGNO_P (regno))
12001 *is_altivec = true;
12002
12003 rclass = rs6000_regno_regclass[regno];
12004 return reg_class_to_reg_type[(int)rclass];
12005 }
12006
12007 /* Helper function to return the cost of adding a TOC entry address. */
12008
12009 static inline int
12010 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
12011 {
12012 int ret;
12013
12014 if (TARGET_CMODEL != CMODEL_SMALL)
12015 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
12016
12017 else
12018 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
12019
12020 return ret;
12021 }
12022
12023 /* Helper function for rs6000_secondary_reload to determine whether the memory
12024 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
12025 needs reloading. Return negative if the memory is not handled by the memory
12026 helper functions and to try a different reload method, 0 if no additional
12027 instructions are need, and positive to give the extra cost for the
12028 memory. */
12029
12030 static int
12031 rs6000_secondary_reload_memory (rtx addr,
12032 enum reg_class rclass,
12033 machine_mode mode)
12034 {
12035 int extra_cost = 0;
12036 rtx reg, and_arg, plus_arg0, plus_arg1;
12037 addr_mask_type addr_mask;
12038 const char *type = NULL;
12039 const char *fail_msg = NULL;
12040
12041 if (GPR_REG_CLASS_P (rclass))
12042 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
12043
12044 else if (rclass == FLOAT_REGS)
12045 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
12046
12047 else if (rclass == ALTIVEC_REGS)
12048 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
12049
12050 /* For the combined VSX_REGS, turn off Altivec AND -16. */
12051 else if (rclass == VSX_REGS)
12052 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
12053 & ~RELOAD_REG_AND_M16);
12054
12055 /* If the register allocator hasn't made up its mind yet on the register
12056 class to use, settle on defaults to use. */
12057 else if (rclass == NO_REGS)
12058 {
12059 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY]
12060 & ~RELOAD_REG_AND_M16);
12061
12062 if ((addr_mask & RELOAD_REG_MULTIPLE) != 0)
12063 addr_mask &= ~(RELOAD_REG_INDEXED
12064 | RELOAD_REG_PRE_INCDEC
12065 | RELOAD_REG_PRE_MODIFY);
12066 }
12067
12068 else
12069 addr_mask = 0;
12070
12071 /* If the register isn't valid in this register class, just return now. */
12072 if ((addr_mask & RELOAD_REG_VALID) == 0)
12073 {
12074 if (TARGET_DEBUG_ADDR)
12075 {
12076 fprintf (stderr,
12077 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
12078 "not valid in class\n",
12079 GET_MODE_NAME (mode), reg_class_names[rclass]);
12080 debug_rtx (addr);
12081 }
12082
12083 return -1;
12084 }
12085
12086 switch (GET_CODE (addr))
12087 {
12088 /* Does the register class supports auto update forms for this mode? We
12089 don't need a scratch register, since the powerpc only supports
12090 PRE_INC, PRE_DEC, and PRE_MODIFY. */
12091 case PRE_INC:
12092 case PRE_DEC:
12093 reg = XEXP (addr, 0);
12094 if (!base_reg_operand (addr, GET_MODE (reg)))
12095 {
12096 fail_msg = "no base register #1";
12097 extra_cost = -1;
12098 }
12099
12100 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
12101 {
12102 extra_cost = 1;
12103 type = "update";
12104 }
12105 break;
12106
12107 case PRE_MODIFY:
12108 reg = XEXP (addr, 0);
12109 plus_arg1 = XEXP (addr, 1);
12110 if (!base_reg_operand (reg, GET_MODE (reg))
12111 || GET_CODE (plus_arg1) != PLUS
12112 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
12113 {
12114 fail_msg = "bad PRE_MODIFY";
12115 extra_cost = -1;
12116 }
12117
12118 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
12119 {
12120 extra_cost = 1;
12121 type = "update";
12122 }
12123 break;
12124
12125 /* Do we need to simulate AND -16 to clear the bottom address bits used
12126 in VMX load/stores? Only allow the AND for vector sizes. */
12127 case AND:
12128 and_arg = XEXP (addr, 0);
12129 if (GET_MODE_SIZE (mode) != 16
12130 || !CONST_INT_P (XEXP (addr, 1))
12131 || INTVAL (XEXP (addr, 1)) != -16)
12132 {
12133 fail_msg = "bad Altivec AND #1";
12134 extra_cost = -1;
12135 }
12136
12137 if (rclass != ALTIVEC_REGS)
12138 {
12139 if (legitimate_indirect_address_p (and_arg, false))
12140 extra_cost = 1;
12141
12142 else if (legitimate_indexed_address_p (and_arg, false))
12143 extra_cost = 2;
12144
12145 else
12146 {
12147 fail_msg = "bad Altivec AND #2";
12148 extra_cost = -1;
12149 }
12150
12151 type = "and";
12152 }
12153 break;
12154
12155 /* If this is an indirect address, make sure it is a base register. */
12156 case REG:
12157 case SUBREG:
12158 if (!legitimate_indirect_address_p (addr, false))
12159 {
12160 extra_cost = 1;
12161 type = "move";
12162 }
12163 break;
12164
12165 /* If this is an indexed address, make sure the register class can handle
12166 indexed addresses for this mode. */
12167 case PLUS:
12168 plus_arg0 = XEXP (addr, 0);
12169 plus_arg1 = XEXP (addr, 1);
12170
12171 /* (plus (plus (reg) (constant)) (constant)) is generated during
12172 push_reload processing, so handle it now. */
12173 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
12174 {
12175 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12176 {
12177 extra_cost = 1;
12178 type = "offset";
12179 }
12180 }
12181
12182 /* (plus (plus (reg) (constant)) (reg)) is also generated during
12183 push_reload processing, so handle it now. */
12184 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
12185 {
12186 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
12187 {
12188 extra_cost = 1;
12189 type = "indexed #2";
12190 }
12191 }
12192
12193 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
12194 {
12195 fail_msg = "no base register #2";
12196 extra_cost = -1;
12197 }
12198
12199 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
12200 {
12201 if ((addr_mask & RELOAD_REG_INDEXED) == 0
12202 || !legitimate_indexed_address_p (addr, false))
12203 {
12204 extra_cost = 1;
12205 type = "indexed";
12206 }
12207 }
12208
12209 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0
12210 && CONST_INT_P (plus_arg1))
12211 {
12212 if (!quad_address_offset_p (INTVAL (plus_arg1)))
12213 {
12214 extra_cost = 1;
12215 type = "vector d-form offset";
12216 }
12217 }
12218
12219 /* Make sure the register class can handle offset addresses. */
12220 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
12221 {
12222 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12223 {
12224 extra_cost = 1;
12225 type = "offset #2";
12226 }
12227 }
12228
12229 else
12230 {
12231 fail_msg = "bad PLUS";
12232 extra_cost = -1;
12233 }
12234
12235 break;
12236
12237 case LO_SUM:
12238 /* Quad offsets are restricted and can't handle normal addresses. */
12239 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
12240 {
12241 extra_cost = -1;
12242 type = "vector d-form lo_sum";
12243 }
12244
12245 else if (!legitimate_lo_sum_address_p (mode, addr, false))
12246 {
12247 fail_msg = "bad LO_SUM";
12248 extra_cost = -1;
12249 }
12250
12251 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12252 {
12253 extra_cost = 1;
12254 type = "lo_sum";
12255 }
12256 break;
12257
12258 /* Static addresses need to create a TOC entry. */
12259 case CONST:
12260 case SYMBOL_REF:
12261 case LABEL_REF:
12262 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
12263 {
12264 extra_cost = -1;
12265 type = "vector d-form lo_sum #2";
12266 }
12267
12268 else
12269 {
12270 type = "address";
12271 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
12272 }
12273 break;
12274
12275 /* TOC references look like offsetable memory. */
12276 case UNSPEC:
12277 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
12278 {
12279 fail_msg = "bad UNSPEC";
12280 extra_cost = -1;
12281 }
12282
12283 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
12284 {
12285 extra_cost = -1;
12286 type = "vector d-form lo_sum #3";
12287 }
12288
12289 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12290 {
12291 extra_cost = 1;
12292 type = "toc reference";
12293 }
12294 break;
12295
12296 default:
12297 {
12298 fail_msg = "bad address";
12299 extra_cost = -1;
12300 }
12301 }
12302
12303 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
12304 {
12305 if (extra_cost < 0)
12306 fprintf (stderr,
12307 "rs6000_secondary_reload_memory error: mode = %s, "
12308 "class = %s, addr_mask = '%s', %s\n",
12309 GET_MODE_NAME (mode),
12310 reg_class_names[rclass],
12311 rs6000_debug_addr_mask (addr_mask, false),
12312 (fail_msg != NULL) ? fail_msg : "<bad address>");
12313
12314 else
12315 fprintf (stderr,
12316 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
12317 "addr_mask = '%s', extra cost = %d, %s\n",
12318 GET_MODE_NAME (mode),
12319 reg_class_names[rclass],
12320 rs6000_debug_addr_mask (addr_mask, false),
12321 extra_cost,
12322 (type) ? type : "<none>");
12323
12324 debug_rtx (addr);
12325 }
12326
12327 return extra_cost;
12328 }
12329
12330 /* Helper function for rs6000_secondary_reload to return true if a move to a
12331 different register classe is really a simple move. */
12332
12333 static bool
12334 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
12335 enum rs6000_reg_type from_type,
12336 machine_mode mode)
12337 {
12338 int size = GET_MODE_SIZE (mode);
12339
12340 /* Add support for various direct moves available. In this function, we only
12341 look at cases where we don't need any extra registers, and one or more
12342 simple move insns are issued. Originally small integers are not allowed
12343 in FPR/VSX registers. Single precision binary floating is not a simple
12344 move because we need to convert to the single precision memory layout.
12345 The 4-byte SDmode can be moved. TDmode values are disallowed since they
12346 need special direct move handling, which we do not support yet. */
12347 if (TARGET_DIRECT_MOVE
12348 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
12349 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
12350 {
12351 if (TARGET_POWERPC64)
12352 {
12353 /* ISA 2.07: MTVSRD or MVFVSRD. */
12354 if (size == 8)
12355 return true;
12356
12357 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
12358 if (size == 16 && TARGET_P9_VECTOR && mode != TDmode)
12359 return true;
12360 }
12361
12362 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
12363 if (TARGET_P8_VECTOR)
12364 {
12365 if (mode == SImode)
12366 return true;
12367
12368 if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode))
12369 return true;
12370 }
12371
12372 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
12373 if (mode == SDmode)
12374 return true;
12375 }
12376
12377 /* Move to/from SPR. */
12378 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
12379 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
12380 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
12381 return true;
12382
12383 return false;
12384 }
12385
12386 /* Direct move helper function for rs6000_secondary_reload, handle all of the
12387 special direct moves that involve allocating an extra register, return the
12388 insn code of the helper function if there is such a function or
12389 CODE_FOR_nothing if not. */
12390
12391 static bool
12392 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
12393 enum rs6000_reg_type from_type,
12394 machine_mode mode,
12395 secondary_reload_info *sri,
12396 bool altivec_p)
12397 {
12398 bool ret = false;
12399 enum insn_code icode = CODE_FOR_nothing;
12400 int cost = 0;
12401 int size = GET_MODE_SIZE (mode);
12402
12403 if (TARGET_POWERPC64 && size == 16)
12404 {
12405 /* Handle moving 128-bit values from GPRs to VSX point registers on
12406 ISA 2.07 (power8, power9) when running in 64-bit mode using
12407 XXPERMDI to glue the two 64-bit values back together. */
12408 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
12409 {
12410 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
12411 icode = reg_addr[mode].reload_vsx_gpr;
12412 }
12413
12414 /* Handle moving 128-bit values from VSX point registers to GPRs on
12415 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
12416 bottom 64-bit value. */
12417 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
12418 {
12419 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
12420 icode = reg_addr[mode].reload_gpr_vsx;
12421 }
12422 }
12423
12424 else if (TARGET_POWERPC64 && mode == SFmode)
12425 {
12426 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
12427 {
12428 cost = 3; /* xscvdpspn, mfvsrd, and. */
12429 icode = reg_addr[mode].reload_gpr_vsx;
12430 }
12431
12432 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
12433 {
12434 cost = 2; /* mtvsrz, xscvspdpn. */
12435 icode = reg_addr[mode].reload_vsx_gpr;
12436 }
12437 }
12438
12439 else if (!TARGET_POWERPC64 && size == 8)
12440 {
12441 /* Handle moving 64-bit values from GPRs to floating point registers on
12442 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
12443 32-bit values back together. Altivec register classes must be handled
12444 specially since a different instruction is used, and the secondary
12445 reload support requires a single instruction class in the scratch
12446 register constraint. However, right now TFmode is not allowed in
12447 Altivec registers, so the pattern will never match. */
12448 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
12449 {
12450 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
12451 icode = reg_addr[mode].reload_fpr_gpr;
12452 }
12453 }
12454
12455 if (icode != CODE_FOR_nothing)
12456 {
12457 ret = true;
12458 if (sri)
12459 {
12460 sri->icode = icode;
12461 sri->extra_cost = cost;
12462 }
12463 }
12464
12465 return ret;
12466 }
12467
12468 /* Return whether a move between two register classes can be done either
12469 directly (simple move) or via a pattern that uses a single extra temporary
12470 (using ISA 2.07's direct move in this case. */
12471
12472 static bool
12473 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
12474 enum rs6000_reg_type from_type,
12475 machine_mode mode,
12476 secondary_reload_info *sri,
12477 bool altivec_p)
12478 {
12479 /* Fall back to load/store reloads if either type is not a register. */
12480 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
12481 return false;
12482
12483 /* If we haven't allocated registers yet, assume the move can be done for the
12484 standard register types. */
12485 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
12486 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
12487 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
12488 return true;
12489
12490 /* Moves to the same set of registers is a simple move for non-specialized
12491 registers. */
12492 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
12493 return true;
12494
12495 /* Check whether a simple move can be done directly. */
12496 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
12497 {
12498 if (sri)
12499 {
12500 sri->icode = CODE_FOR_nothing;
12501 sri->extra_cost = 0;
12502 }
12503 return true;
12504 }
12505
12506 /* Now check if we can do it in a few steps. */
12507 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
12508 altivec_p);
12509 }
12510
12511 /* Inform reload about cases where moving X with a mode MODE to a register in
12512 RCLASS requires an extra scratch or immediate register. Return the class
12513 needed for the immediate register.
12514
12515 For VSX and Altivec, we may need a register to convert sp+offset into
12516 reg+sp.
12517
12518 For misaligned 64-bit gpr loads and stores we need a register to
12519 convert an offset address to indirect. */
12520
12521 static reg_class_t
12522 rs6000_secondary_reload (bool in_p,
12523 rtx x,
12524 reg_class_t rclass_i,
12525 machine_mode mode,
12526 secondary_reload_info *sri)
12527 {
12528 enum reg_class rclass = (enum reg_class) rclass_i;
12529 reg_class_t ret = ALL_REGS;
12530 enum insn_code icode;
12531 bool default_p = false;
12532 bool done_p = false;
12533
12534 /* Allow subreg of memory before/during reload. */
12535 bool memory_p = (MEM_P (x)
12536 || (!reload_completed && SUBREG_P (x)
12537 && MEM_P (SUBREG_REG (x))));
12538
12539 sri->icode = CODE_FOR_nothing;
12540 sri->t_icode = CODE_FOR_nothing;
12541 sri->extra_cost = 0;
12542 icode = ((in_p)
12543 ? reg_addr[mode].reload_load
12544 : reg_addr[mode].reload_store);
12545
12546 if (REG_P (x) || register_operand (x, mode))
12547 {
12548 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
12549 bool altivec_p = (rclass == ALTIVEC_REGS);
12550 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
12551
12552 if (!in_p)
12553 std::swap (to_type, from_type);
12554
12555 /* Can we do a direct move of some sort? */
12556 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
12557 altivec_p))
12558 {
12559 icode = (enum insn_code)sri->icode;
12560 default_p = false;
12561 done_p = true;
12562 ret = NO_REGS;
12563 }
12564 }
12565
12566 /* Make sure 0.0 is not reloaded or forced into memory. */
12567 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
12568 {
12569 ret = NO_REGS;
12570 default_p = false;
12571 done_p = true;
12572 }
12573
12574 /* If this is a scalar floating point value and we want to load it into the
12575 traditional Altivec registers, do it via a move via a traditional floating
12576 point register, unless we have D-form addressing. Also make sure that
12577 non-zero constants use a FPR. */
12578 if (!done_p && reg_addr[mode].scalar_in_vmx_p
12579 && !mode_supports_vmx_dform (mode)
12580 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
12581 && (memory_p || CONST_DOUBLE_P (x)))
12582 {
12583 ret = FLOAT_REGS;
12584 default_p = false;
12585 done_p = true;
12586 }
12587
12588 /* Handle reload of load/stores if we have reload helper functions. */
12589 if (!done_p && icode != CODE_FOR_nothing && memory_p)
12590 {
12591 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
12592 mode);
12593
12594 if (extra_cost >= 0)
12595 {
12596 done_p = true;
12597 ret = NO_REGS;
12598 if (extra_cost > 0)
12599 {
12600 sri->extra_cost = extra_cost;
12601 sri->icode = icode;
12602 }
12603 }
12604 }
12605
12606 /* Handle unaligned loads and stores of integer registers. */
12607 if (!done_p && TARGET_POWERPC64
12608 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
12609 && memory_p
12610 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
12611 {
12612 rtx addr = XEXP (x, 0);
12613 rtx off = address_offset (addr);
12614
12615 if (off != NULL_RTX)
12616 {
12617 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
12618 unsigned HOST_WIDE_INT offset = INTVAL (off);
12619
12620 /* We need a secondary reload when our legitimate_address_p
12621 says the address is good (as otherwise the entire address
12622 will be reloaded), and the offset is not a multiple of
12623 four or we have an address wrap. Address wrap will only
12624 occur for LO_SUMs since legitimate_offset_address_p
12625 rejects addresses for 16-byte mems that will wrap. */
12626 if (GET_CODE (addr) == LO_SUM
12627 ? (1 /* legitimate_address_p allows any offset for lo_sum */
12628 && ((offset & 3) != 0
12629 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
12630 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
12631 && (offset & 3) != 0))
12632 {
12633 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
12634 if (in_p)
12635 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
12636 : CODE_FOR_reload_di_load);
12637 else
12638 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
12639 : CODE_FOR_reload_di_store);
12640 sri->extra_cost = 2;
12641 ret = NO_REGS;
12642 done_p = true;
12643 }
12644 else
12645 default_p = true;
12646 }
12647 else
12648 default_p = true;
12649 }
12650
12651 if (!done_p && !TARGET_POWERPC64
12652 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
12653 && memory_p
12654 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
12655 {
12656 rtx addr = XEXP (x, 0);
12657 rtx off = address_offset (addr);
12658
12659 if (off != NULL_RTX)
12660 {
12661 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
12662 unsigned HOST_WIDE_INT offset = INTVAL (off);
12663
12664 /* We need a secondary reload when our legitimate_address_p
12665 says the address is good (as otherwise the entire address
12666 will be reloaded), and we have a wrap.
12667
12668 legitimate_lo_sum_address_p allows LO_SUM addresses to
12669 have any offset so test for wrap in the low 16 bits.
12670
12671 legitimate_offset_address_p checks for the range
12672 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
12673 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
12674 [0x7ff4,0x7fff] respectively, so test for the
12675 intersection of these ranges, [0x7ffc,0x7fff] and
12676 [0x7ff4,0x7ff7] respectively.
12677
12678 Note that the address we see here may have been
12679 manipulated by legitimize_reload_address. */
12680 if (GET_CODE (addr) == LO_SUM
12681 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
12682 : offset - (0x8000 - extra) < UNITS_PER_WORD)
12683 {
12684 if (in_p)
12685 sri->icode = CODE_FOR_reload_si_load;
12686 else
12687 sri->icode = CODE_FOR_reload_si_store;
12688 sri->extra_cost = 2;
12689 ret = NO_REGS;
12690 done_p = true;
12691 }
12692 else
12693 default_p = true;
12694 }
12695 else
12696 default_p = true;
12697 }
12698
12699 if (!done_p)
12700 default_p = true;
12701
12702 if (default_p)
12703 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
12704
12705 gcc_assert (ret != ALL_REGS);
12706
12707 if (TARGET_DEBUG_ADDR)
12708 {
12709 fprintf (stderr,
12710 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
12711 "mode = %s",
12712 reg_class_names[ret],
12713 in_p ? "true" : "false",
12714 reg_class_names[rclass],
12715 GET_MODE_NAME (mode));
12716
12717 if (reload_completed)
12718 fputs (", after reload", stderr);
12719
12720 if (!done_p)
12721 fputs (", done_p not set", stderr);
12722
12723 if (default_p)
12724 fputs (", default secondary reload", stderr);
12725
12726 if (sri->icode != CODE_FOR_nothing)
12727 fprintf (stderr, ", reload func = %s, extra cost = %d",
12728 insn_data[sri->icode].name, sri->extra_cost);
12729
12730 else if (sri->extra_cost > 0)
12731 fprintf (stderr, ", extra cost = %d", sri->extra_cost);
12732
12733 fputs ("\n", stderr);
12734 debug_rtx (x);
12735 }
12736
12737 return ret;
12738 }
12739
12740 /* Better tracing for rs6000_secondary_reload_inner. */
12741
12742 static void
12743 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
12744 bool store_p)
12745 {
12746 rtx set, clobber;
12747
12748 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
12749
12750 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
12751 store_p ? "store" : "load");
12752
12753 if (store_p)
12754 set = gen_rtx_SET (mem, reg);
12755 else
12756 set = gen_rtx_SET (reg, mem);
12757
12758 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
12759 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
12760 }
12761
12762 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
12763 ATTRIBUTE_NORETURN;
12764
12765 static void
12766 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
12767 bool store_p)
12768 {
12769 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
12770 gcc_unreachable ();
12771 }
12772
12773 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
12774 reload helper functions. These were identified in
12775 rs6000_secondary_reload_memory, and if reload decided to use the secondary
12776 reload, it calls the insns:
12777 reload_<RELOAD:mode>_<P:mptrsize>_store
12778 reload_<RELOAD:mode>_<P:mptrsize>_load
12779
12780 which in turn calls this function, to do whatever is necessary to create
12781 valid addresses. */
12782
12783 void
12784 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
12785 {
12786 int regno = true_regnum (reg);
12787 machine_mode mode = GET_MODE (reg);
12788 addr_mask_type addr_mask;
12789 rtx addr;
12790 rtx new_addr;
12791 rtx op_reg, op0, op1;
12792 rtx and_op;
12793 rtx cc_clobber;
12794 rtvec rv;
12795
12796 if (regno < 0 || !HARD_REGISTER_NUM_P (regno) || !MEM_P (mem)
12797 || !base_reg_operand (scratch, GET_MODE (scratch)))
12798 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12799
12800 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
12801 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
12802
12803 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
12804 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
12805
12806 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
12807 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
12808
12809 else
12810 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12811
12812 /* Make sure the mode is valid in this register class. */
12813 if ((addr_mask & RELOAD_REG_VALID) == 0)
12814 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12815
12816 if (TARGET_DEBUG_ADDR)
12817 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
12818
12819 new_addr = addr = XEXP (mem, 0);
12820 switch (GET_CODE (addr))
12821 {
12822 /* Does the register class support auto update forms for this mode? If
12823 not, do the update now. We don't need a scratch register, since the
12824 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
12825 case PRE_INC:
12826 case PRE_DEC:
12827 op_reg = XEXP (addr, 0);
12828 if (!base_reg_operand (op_reg, Pmode))
12829 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12830
12831 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
12832 {
12833 int delta = GET_MODE_SIZE (mode);
12834 if (GET_CODE (addr) == PRE_DEC)
12835 delta = -delta;
12836 emit_insn (gen_add2_insn (op_reg, GEN_INT (delta)));
12837 new_addr = op_reg;
12838 }
12839 break;
12840
12841 case PRE_MODIFY:
12842 op0 = XEXP (addr, 0);
12843 op1 = XEXP (addr, 1);
12844 if (!base_reg_operand (op0, Pmode)
12845 || GET_CODE (op1) != PLUS
12846 || !rtx_equal_p (op0, XEXP (op1, 0)))
12847 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12848
12849 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
12850 {
12851 emit_insn (gen_rtx_SET (op0, op1));
12852 new_addr = reg;
12853 }
12854 break;
12855
12856 /* Do we need to simulate AND -16 to clear the bottom address bits used
12857 in VMX load/stores? */
12858 case AND:
12859 op0 = XEXP (addr, 0);
12860 op1 = XEXP (addr, 1);
12861 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
12862 {
12863 if (REG_P (op0) || SUBREG_P (op0))
12864 op_reg = op0;
12865
12866 else if (GET_CODE (op1) == PLUS)
12867 {
12868 emit_insn (gen_rtx_SET (scratch, op1));
12869 op_reg = scratch;
12870 }
12871
12872 else
12873 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12874
12875 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
12876 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
12877 rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
12878 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
12879 new_addr = scratch;
12880 }
12881 break;
12882
12883 /* If this is an indirect address, make sure it is a base register. */
12884 case REG:
12885 case SUBREG:
12886 if (!base_reg_operand (addr, GET_MODE (addr)))
12887 {
12888 emit_insn (gen_rtx_SET (scratch, addr));
12889 new_addr = scratch;
12890 }
12891 break;
12892
12893 /* If this is an indexed address, make sure the register class can handle
12894 indexed addresses for this mode. */
12895 case PLUS:
12896 op0 = XEXP (addr, 0);
12897 op1 = XEXP (addr, 1);
12898 if (!base_reg_operand (op0, Pmode))
12899 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12900
12901 else if (int_reg_operand (op1, Pmode))
12902 {
12903 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
12904 {
12905 emit_insn (gen_rtx_SET (scratch, addr));
12906 new_addr = scratch;
12907 }
12908 }
12909
12910 else if (mode_supports_dq_form (mode) && CONST_INT_P (op1))
12911 {
12912 if (((addr_mask & RELOAD_REG_QUAD_OFFSET) == 0)
12913 || !quad_address_p (addr, mode, false))
12914 {
12915 emit_insn (gen_rtx_SET (scratch, addr));
12916 new_addr = scratch;
12917 }
12918 }
12919
12920 /* Make sure the register class can handle offset addresses. */
12921 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
12922 {
12923 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12924 {
12925 emit_insn (gen_rtx_SET (scratch, addr));
12926 new_addr = scratch;
12927 }
12928 }
12929
12930 else
12931 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12932
12933 break;
12934
12935 case LO_SUM:
12936 op0 = XEXP (addr, 0);
12937 op1 = XEXP (addr, 1);
12938 if (!base_reg_operand (op0, Pmode))
12939 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12940
12941 else if (int_reg_operand (op1, Pmode))
12942 {
12943 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
12944 {
12945 emit_insn (gen_rtx_SET (scratch, addr));
12946 new_addr = scratch;
12947 }
12948 }
12949
12950 /* Quad offsets are restricted and can't handle normal addresses. */
12951 else if (mode_supports_dq_form (mode))
12952 {
12953 emit_insn (gen_rtx_SET (scratch, addr));
12954 new_addr = scratch;
12955 }
12956
12957 /* Make sure the register class can handle offset addresses. */
12958 else if (legitimate_lo_sum_address_p (mode, addr, false))
12959 {
12960 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12961 {
12962 emit_insn (gen_rtx_SET (scratch, addr));
12963 new_addr = scratch;
12964 }
12965 }
12966
12967 else
12968 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12969
12970 break;
12971
12972 case SYMBOL_REF:
12973 case CONST:
12974 case LABEL_REF:
12975 rs6000_emit_move (scratch, addr, Pmode);
12976 new_addr = scratch;
12977 break;
12978
12979 default:
12980 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12981 }
12982
12983 /* Adjust the address if it changed. */
12984 if (addr != new_addr)
12985 {
12986 mem = replace_equiv_address_nv (mem, new_addr);
12987 if (TARGET_DEBUG_ADDR)
12988 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
12989 }
12990
12991 /* Now create the move. */
12992 if (store_p)
12993 emit_insn (gen_rtx_SET (mem, reg));
12994 else
12995 emit_insn (gen_rtx_SET (reg, mem));
12996
12997 return;
12998 }
12999
13000 /* Convert reloads involving 64-bit gprs and misaligned offset
13001 addressing, or multiple 32-bit gprs and offsets that are too large,
13002 to use indirect addressing. */
13003
13004 void
13005 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
13006 {
13007 int regno = true_regnum (reg);
13008 enum reg_class rclass;
13009 rtx addr;
13010 rtx scratch_or_premodify = scratch;
13011
13012 if (TARGET_DEBUG_ADDR)
13013 {
13014 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
13015 store_p ? "store" : "load");
13016 fprintf (stderr, "reg:\n");
13017 debug_rtx (reg);
13018 fprintf (stderr, "mem:\n");
13019 debug_rtx (mem);
13020 fprintf (stderr, "scratch:\n");
13021 debug_rtx (scratch);
13022 }
13023
13024 gcc_assert (regno >= 0 && HARD_REGISTER_NUM_P (regno));
13025 gcc_assert (MEM_P (mem));
13026 rclass = REGNO_REG_CLASS (regno);
13027 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
13028 addr = XEXP (mem, 0);
13029
13030 if (GET_CODE (addr) == PRE_MODIFY)
13031 {
13032 gcc_assert (REG_P (XEXP (addr, 0))
13033 && GET_CODE (XEXP (addr, 1)) == PLUS
13034 && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0));
13035 scratch_or_premodify = XEXP (addr, 0);
13036 addr = XEXP (addr, 1);
13037 }
13038 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
13039
13040 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
13041
13042 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
13043
13044 /* Now create the move. */
13045 if (store_p)
13046 emit_insn (gen_rtx_SET (mem, reg));
13047 else
13048 emit_insn (gen_rtx_SET (reg, mem));
13049
13050 return;
13051 }
13052
13053 /* Given an rtx X being reloaded into a reg required to be
13054 in class CLASS, return the class of reg to actually use.
13055 In general this is just CLASS; but on some machines
13056 in some cases it is preferable to use a more restrictive class.
13057
13058 On the RS/6000, we have to return NO_REGS when we want to reload a
13059 floating-point CONST_DOUBLE to force it to be copied to memory.
13060
13061 We also don't want to reload integer values into floating-point
13062 registers if we can at all help it. In fact, this can
13063 cause reload to die, if it tries to generate a reload of CTR
13064 into a FP register and discovers it doesn't have the memory location
13065 required.
13066
13067 ??? Would it be a good idea to have reload do the converse, that is
13068 try to reload floating modes into FP registers if possible?
13069 */
13070
13071 static enum reg_class
13072 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
13073 {
13074 machine_mode mode = GET_MODE (x);
13075 bool is_constant = CONSTANT_P (x);
13076
13077 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
13078 reload class for it. */
13079 if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS)
13080 && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0)
13081 return NO_REGS;
13082
13083 if ((rclass == FLOAT_REGS || rclass == VSX_REGS)
13084 && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
13085 return NO_REGS;
13086
13087 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
13088 the reloading of address expressions using PLUS into floating point
13089 registers. */
13090 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
13091 {
13092 if (is_constant)
13093 {
13094 /* Zero is always allowed in all VSX registers. */
13095 if (x == CONST0_RTX (mode))
13096 return rclass;
13097
13098 /* If this is a vector constant that can be formed with a few Altivec
13099 instructions, we want altivec registers. */
13100 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
13101 return ALTIVEC_REGS;
13102
13103 /* If this is an integer constant that can easily be loaded into
13104 vector registers, allow it. */
13105 if (CONST_INT_P (x))
13106 {
13107 HOST_WIDE_INT value = INTVAL (x);
13108
13109 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
13110 2.06 can generate it in the Altivec registers with
13111 VSPLTI<x>. */
13112 if (value == -1)
13113 {
13114 if (TARGET_P8_VECTOR)
13115 return rclass;
13116 else if (rclass == ALTIVEC_REGS || rclass == VSX_REGS)
13117 return ALTIVEC_REGS;
13118 else
13119 return NO_REGS;
13120 }
13121
13122 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
13123 a sign extend in the Altivec registers. */
13124 if (IN_RANGE (value, -128, 127) && TARGET_P9_VECTOR
13125 && (rclass == ALTIVEC_REGS || rclass == VSX_REGS))
13126 return ALTIVEC_REGS;
13127 }
13128
13129 /* Force constant to memory. */
13130 return NO_REGS;
13131 }
13132
13133 /* D-form addressing can easily reload the value. */
13134 if (mode_supports_vmx_dform (mode)
13135 || mode_supports_dq_form (mode))
13136 return rclass;
13137
13138 /* If this is a scalar floating point value and we don't have D-form
13139 addressing, prefer the traditional floating point registers so that we
13140 can use D-form (register+offset) addressing. */
13141 if (rclass == VSX_REGS
13142 && (mode == SFmode || GET_MODE_SIZE (mode) == 8))
13143 return FLOAT_REGS;
13144
13145 /* Prefer the Altivec registers if Altivec is handling the vector
13146 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
13147 loads. */
13148 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
13149 || mode == V1TImode)
13150 return ALTIVEC_REGS;
13151
13152 return rclass;
13153 }
13154
13155 if (is_constant || GET_CODE (x) == PLUS)
13156 {
13157 if (reg_class_subset_p (GENERAL_REGS, rclass))
13158 return GENERAL_REGS;
13159 if (reg_class_subset_p (BASE_REGS, rclass))
13160 return BASE_REGS;
13161 return NO_REGS;
13162 }
13163
13164 /* For the vector pair and vector quad modes, prefer their natural register
13165 (VSX or FPR) rather than GPR registers. For other integer types, prefer
13166 the GPR registers. */
13167 if (rclass == GEN_OR_FLOAT_REGS)
13168 {
13169 if (mode == OOmode)
13170 return VSX_REGS;
13171
13172 if (mode == XOmode)
13173 return FLOAT_REGS;
13174
13175 if (GET_MODE_CLASS (mode) == MODE_INT)
13176 return GENERAL_REGS;
13177 }
13178
13179 return rclass;
13180 }
13181
13182 /* Debug version of rs6000_preferred_reload_class. */
13183 static enum reg_class
13184 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
13185 {
13186 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
13187
13188 fprintf (stderr,
13189 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
13190 "mode = %s, x:\n",
13191 reg_class_names[ret], reg_class_names[rclass],
13192 GET_MODE_NAME (GET_MODE (x)));
13193 debug_rtx (x);
13194
13195 return ret;
13196 }
13197
13198 /* If we are copying between FP or AltiVec registers and anything else, we need
13199 a memory location. The exception is when we are targeting ppc64 and the
13200 move to/from fpr to gpr instructions are available. Also, under VSX, you
13201 can copy vector registers from the FP register set to the Altivec register
13202 set and vice versa. */
13203
13204 static bool
13205 rs6000_secondary_memory_needed (machine_mode mode,
13206 reg_class_t from_class,
13207 reg_class_t to_class)
13208 {
13209 enum rs6000_reg_type from_type, to_type;
13210 bool altivec_p = ((from_class == ALTIVEC_REGS)
13211 || (to_class == ALTIVEC_REGS));
13212
13213 /* If a simple/direct move is available, we don't need secondary memory */
13214 from_type = reg_class_to_reg_type[(int)from_class];
13215 to_type = reg_class_to_reg_type[(int)to_class];
13216
13217 if (rs6000_secondary_reload_move (to_type, from_type, mode,
13218 (secondary_reload_info *)0, altivec_p))
13219 return false;
13220
13221 /* If we have a floating point or vector register class, we need to use
13222 memory to transfer the data. */
13223 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
13224 return true;
13225
13226 return false;
13227 }
13228
13229 /* Debug version of rs6000_secondary_memory_needed. */
13230 static bool
13231 rs6000_debug_secondary_memory_needed (machine_mode mode,
13232 reg_class_t from_class,
13233 reg_class_t to_class)
13234 {
13235 bool ret = rs6000_secondary_memory_needed (mode, from_class, to_class);
13236
13237 fprintf (stderr,
13238 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
13239 "to_class = %s, mode = %s\n",
13240 ret ? "true" : "false",
13241 reg_class_names[from_class],
13242 reg_class_names[to_class],
13243 GET_MODE_NAME (mode));
13244
13245 return ret;
13246 }
13247
13248 /* Return the register class of a scratch register needed to copy IN into
13249 or out of a register in RCLASS in MODE. If it can be done directly,
13250 NO_REGS is returned. */
13251
13252 static enum reg_class
13253 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
13254 rtx in)
13255 {
13256 int regno;
13257
13258 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
13259 #if TARGET_MACHO
13260 && MACHOPIC_INDIRECT
13261 #endif
13262 ))
13263 {
13264 /* We cannot copy a symbolic operand directly into anything
13265 other than BASE_REGS for TARGET_ELF. So indicate that a
13266 register from BASE_REGS is needed as an intermediate
13267 register.
13268
13269 On Darwin, pic addresses require a load from memory, which
13270 needs a base register. */
13271 if (rclass != BASE_REGS
13272 && (SYMBOL_REF_P (in)
13273 || GET_CODE (in) == HIGH
13274 || GET_CODE (in) == LABEL_REF
13275 || GET_CODE (in) == CONST))
13276 return BASE_REGS;
13277 }
13278
13279 if (REG_P (in))
13280 {
13281 regno = REGNO (in);
13282 if (!HARD_REGISTER_NUM_P (regno))
13283 {
13284 regno = true_regnum (in);
13285 if (!HARD_REGISTER_NUM_P (regno))
13286 regno = -1;
13287 }
13288 }
13289 else if (SUBREG_P (in))
13290 {
13291 regno = true_regnum (in);
13292 if (!HARD_REGISTER_NUM_P (regno))
13293 regno = -1;
13294 }
13295 else
13296 regno = -1;
13297
13298 /* If we have VSX register moves, prefer moving scalar values between
13299 Altivec registers and GPR by going via an FPR (and then via memory)
13300 instead of reloading the secondary memory address for Altivec moves. */
13301 if (TARGET_VSX
13302 && GET_MODE_SIZE (mode) < 16
13303 && !mode_supports_vmx_dform (mode)
13304 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
13305 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
13306 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
13307 && (regno >= 0 && INT_REGNO_P (regno)))))
13308 return FLOAT_REGS;
13309
13310 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
13311 into anything. */
13312 if (rclass == GENERAL_REGS || rclass == BASE_REGS
13313 || (regno >= 0 && INT_REGNO_P (regno)))
13314 return NO_REGS;
13315
13316 /* Constants, memory, and VSX registers can go into VSX registers (both the
13317 traditional floating point and the altivec registers). */
13318 if (rclass == VSX_REGS
13319 && (regno == -1 || VSX_REGNO_P (regno)))
13320 return NO_REGS;
13321
13322 /* Constants, memory, and FP registers can go into FP registers. */
13323 if ((regno == -1 || FP_REGNO_P (regno))
13324 && (rclass == FLOAT_REGS || rclass == GEN_OR_FLOAT_REGS))
13325 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
13326
13327 /* Memory, and AltiVec registers can go into AltiVec registers. */
13328 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
13329 && rclass == ALTIVEC_REGS)
13330 return NO_REGS;
13331
13332 /* We can copy among the CR registers. */
13333 if ((rclass == CR_REGS || rclass == CR0_REGS)
13334 && regno >= 0 && CR_REGNO_P (regno))
13335 return NO_REGS;
13336
13337 /* Otherwise, we need GENERAL_REGS. */
13338 return GENERAL_REGS;
13339 }
13340
13341 /* Debug version of rs6000_secondary_reload_class. */
13342 static enum reg_class
13343 rs6000_debug_secondary_reload_class (enum reg_class rclass,
13344 machine_mode mode, rtx in)
13345 {
13346 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
13347 fprintf (stderr,
13348 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
13349 "mode = %s, input rtx:\n",
13350 reg_class_names[ret], reg_class_names[rclass],
13351 GET_MODE_NAME (mode));
13352 debug_rtx (in);
13353
13354 return ret;
13355 }
13356
13357 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
13358
13359 static bool
13360 rs6000_can_change_mode_class (machine_mode from,
13361 machine_mode to,
13362 reg_class_t rclass)
13363 {
13364 unsigned from_size = GET_MODE_SIZE (from);
13365 unsigned to_size = GET_MODE_SIZE (to);
13366
13367 if (from_size != to_size)
13368 {
13369 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
13370
13371 if (reg_classes_intersect_p (xclass, rclass))
13372 {
13373 unsigned to_nregs = hard_regno_nregs (FIRST_FPR_REGNO, to);
13374 unsigned from_nregs = hard_regno_nregs (FIRST_FPR_REGNO, from);
13375 bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
13376 bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
13377
13378 /* Don't allow 64-bit types to overlap with 128-bit types that take a
13379 single register under VSX because the scalar part of the register
13380 is in the upper 64-bits, and not the lower 64-bits. Types like
13381 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
13382 IEEE floating point can't overlap, and neither can small
13383 values. */
13384
13385 if (to_float128_vector_p && from_float128_vector_p)
13386 return true;
13387
13388 else if (to_float128_vector_p || from_float128_vector_p)
13389 return false;
13390
13391 /* TDmode in floating-mode registers must always go into a register
13392 pair with the most significant word in the even-numbered register
13393 to match ISA requirements. In little-endian mode, this does not
13394 match subreg numbering, so we cannot allow subregs. */
13395 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
13396 return false;
13397
13398 /* Allow SD<->DD changes, since SDmode values are stored in
13399 the low half of the DDmode, just like target-independent
13400 code expects. We need to allow at least SD->DD since
13401 rs6000_secondary_memory_needed_mode asks for that change
13402 to be made for SD reloads. */
13403 if ((to == DDmode && from == SDmode)
13404 || (to == SDmode && from == DDmode))
13405 return true;
13406
13407 if (from_size < 8 || to_size < 8)
13408 return false;
13409
13410 if (from_size == 8 && (8 * to_nregs) != to_size)
13411 return false;
13412
13413 if (to_size == 8 && (8 * from_nregs) != from_size)
13414 return false;
13415
13416 return true;
13417 }
13418 else
13419 return true;
13420 }
13421
13422 /* Since the VSX register set includes traditional floating point registers
13423 and altivec registers, just check for the size being different instead of
13424 trying to check whether the modes are vector modes. Otherwise it won't
13425 allow say DF and DI to change classes. For types like TFmode and TDmode
13426 that take 2 64-bit registers, rather than a single 128-bit register, don't
13427 allow subregs of those types to other 128 bit types. */
13428 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
13429 {
13430 unsigned num_regs = (from_size + 15) / 16;
13431 if (hard_regno_nregs (FIRST_FPR_REGNO, to) > num_regs
13432 || hard_regno_nregs (FIRST_FPR_REGNO, from) > num_regs)
13433 return false;
13434
13435 return (from_size == 8 || from_size == 16);
13436 }
13437
13438 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
13439 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
13440 return false;
13441
13442 return true;
13443 }
13444
13445 /* Debug version of rs6000_can_change_mode_class. */
13446 static bool
13447 rs6000_debug_can_change_mode_class (machine_mode from,
13448 machine_mode to,
13449 reg_class_t rclass)
13450 {
13451 bool ret = rs6000_can_change_mode_class (from, to, rclass);
13452
13453 fprintf (stderr,
13454 "rs6000_can_change_mode_class, return %s, from = %s, "
13455 "to = %s, rclass = %s\n",
13456 ret ? "true" : "false",
13457 GET_MODE_NAME (from), GET_MODE_NAME (to),
13458 reg_class_names[rclass]);
13459
13460 return ret;
13461 }
13462 \f
13463 /* Return a string to do a move operation of 128 bits of data. */
13464
13465 const char *
13466 rs6000_output_move_128bit (rtx operands[])
13467 {
13468 rtx dest = operands[0];
13469 rtx src = operands[1];
13470 machine_mode mode = GET_MODE (dest);
13471 int dest_regno;
13472 int src_regno;
13473 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
13474 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
13475
13476 if (REG_P (dest))
13477 {
13478 dest_regno = REGNO (dest);
13479 dest_gpr_p = INT_REGNO_P (dest_regno);
13480 dest_fp_p = FP_REGNO_P (dest_regno);
13481 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
13482 dest_vsx_p = dest_fp_p | dest_vmx_p;
13483 }
13484 else
13485 {
13486 dest_regno = -1;
13487 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
13488 }
13489
13490 if (REG_P (src))
13491 {
13492 src_regno = REGNO (src);
13493 src_gpr_p = INT_REGNO_P (src_regno);
13494 src_fp_p = FP_REGNO_P (src_regno);
13495 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
13496 src_vsx_p = src_fp_p | src_vmx_p;
13497 }
13498 else
13499 {
13500 src_regno = -1;
13501 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
13502 }
13503
13504 /* Register moves. */
13505 if (dest_regno >= 0 && src_regno >= 0)
13506 {
13507 if (dest_gpr_p)
13508 {
13509 if (src_gpr_p)
13510 return "#";
13511
13512 if (TARGET_DIRECT_MOVE_128 && src_vsx_p)
13513 return (WORDS_BIG_ENDIAN
13514 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
13515 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
13516
13517 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
13518 return "#";
13519 }
13520
13521 else if (TARGET_VSX && dest_vsx_p)
13522 {
13523 if (src_vsx_p)
13524 return "xxlor %x0,%x1,%x1";
13525
13526 else if (TARGET_DIRECT_MOVE_128 && src_gpr_p)
13527 return (WORDS_BIG_ENDIAN
13528 ? "mtvsrdd %x0,%1,%L1"
13529 : "mtvsrdd %x0,%L1,%1");
13530
13531 else if (TARGET_DIRECT_MOVE && src_gpr_p)
13532 return "#";
13533 }
13534
13535 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
13536 return "vor %0,%1,%1";
13537
13538 else if (dest_fp_p && src_fp_p)
13539 return "#";
13540 }
13541
13542 /* Loads. */
13543 else if (dest_regno >= 0 && MEM_P (src))
13544 {
13545 if (dest_gpr_p)
13546 {
13547 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
13548 return "lq %0,%1";
13549 else
13550 return "#";
13551 }
13552
13553 else if (TARGET_ALTIVEC && dest_vmx_p
13554 && altivec_indexed_or_indirect_operand (src, mode))
13555 return "lvx %0,%y1";
13556
13557 else if (TARGET_VSX && dest_vsx_p)
13558 {
13559 if (mode_supports_dq_form (mode)
13560 && quad_address_p (XEXP (src, 0), mode, true))
13561 return "lxv %x0,%1";
13562
13563 else if (TARGET_P9_VECTOR)
13564 return "lxvx %x0,%y1";
13565
13566 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
13567 return "lxvw4x %x0,%y1";
13568
13569 else
13570 return "lxvd2x %x0,%y1";
13571 }
13572
13573 else if (TARGET_ALTIVEC && dest_vmx_p)
13574 return "lvx %0,%y1";
13575
13576 else if (dest_fp_p)
13577 return "#";
13578 }
13579
13580 /* Stores. */
13581 else if (src_regno >= 0 && MEM_P (dest))
13582 {
13583 if (src_gpr_p)
13584 {
13585 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
13586 return "stq %1,%0";
13587 else
13588 return "#";
13589 }
13590
13591 else if (TARGET_ALTIVEC && src_vmx_p
13592 && altivec_indexed_or_indirect_operand (dest, mode))
13593 return "stvx %1,%y0";
13594
13595 else if (TARGET_VSX && src_vsx_p)
13596 {
13597 if (mode_supports_dq_form (mode)
13598 && quad_address_p (XEXP (dest, 0), mode, true))
13599 return "stxv %x1,%0";
13600
13601 else if (TARGET_P9_VECTOR)
13602 return "stxvx %x1,%y0";
13603
13604 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
13605 return "stxvw4x %x1,%y0";
13606
13607 else
13608 return "stxvd2x %x1,%y0";
13609 }
13610
13611 else if (TARGET_ALTIVEC && src_vmx_p)
13612 return "stvx %1,%y0";
13613
13614 else if (src_fp_p)
13615 return "#";
13616 }
13617
13618 /* Constants. */
13619 else if (dest_regno >= 0
13620 && (CONST_INT_P (src)
13621 || CONST_WIDE_INT_P (src)
13622 || CONST_DOUBLE_P (src)
13623 || GET_CODE (src) == CONST_VECTOR))
13624 {
13625 if (dest_gpr_p)
13626 return "#";
13627
13628 else if ((dest_vmx_p && TARGET_ALTIVEC)
13629 || (dest_vsx_p && TARGET_VSX))
13630 return output_vec_const_move (operands);
13631 }
13632
13633 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
13634 }
13635
13636 /* Validate a 128-bit move. */
13637 bool
13638 rs6000_move_128bit_ok_p (rtx operands[])
13639 {
13640 machine_mode mode = GET_MODE (operands[0]);
13641 return (gpc_reg_operand (operands[0], mode)
13642 || gpc_reg_operand (operands[1], mode));
13643 }
13644
13645 /* Return true if a 128-bit move needs to be split. */
13646 bool
13647 rs6000_split_128bit_ok_p (rtx operands[])
13648 {
13649 if (!reload_completed)
13650 return false;
13651
13652 if (!gpr_or_gpr_p (operands[0], operands[1]))
13653 return false;
13654
13655 if (quad_load_store_p (operands[0], operands[1]))
13656 return false;
13657
13658 return true;
13659 }
13660
13661 \f
13662 /* Given a comparison operation, return the bit number in CCR to test. We
13663 know this is a valid comparison.
13664
13665 SCC_P is 1 if this is for an scc. That means that %D will have been
13666 used instead of %C, so the bits will be in different places.
13667
13668 Return -1 if OP isn't a valid comparison for some reason. */
13669
13670 int
13671 ccr_bit (rtx op, int scc_p)
13672 {
13673 enum rtx_code code = GET_CODE (op);
13674 machine_mode cc_mode;
13675 int cc_regnum;
13676 int base_bit;
13677 rtx reg;
13678
13679 if (!COMPARISON_P (op))
13680 return -1;
13681
13682 reg = XEXP (op, 0);
13683
13684 if (!REG_P (reg) || !CR_REGNO_P (REGNO (reg)))
13685 return -1;
13686
13687 cc_mode = GET_MODE (reg);
13688 cc_regnum = REGNO (reg);
13689 base_bit = 4 * (cc_regnum - CR0_REGNO);
13690
13691 validate_condition_mode (code, cc_mode);
13692
13693 /* When generating a sCOND operation, only positive conditions are
13694 allowed. */
13695 if (scc_p)
13696 switch (code)
13697 {
13698 case EQ:
13699 case GT:
13700 case LT:
13701 case UNORDERED:
13702 case GTU:
13703 case LTU:
13704 break;
13705 default:
13706 return -1;
13707 }
13708
13709 switch (code)
13710 {
13711 case NE:
13712 return scc_p ? base_bit + 3 : base_bit + 2;
13713 case EQ:
13714 return base_bit + 2;
13715 case GT: case GTU: case UNLE:
13716 return base_bit + 1;
13717 case LT: case LTU: case UNGE:
13718 return base_bit;
13719 case ORDERED: case UNORDERED:
13720 return base_bit + 3;
13721
13722 case GE: case GEU:
13723 /* If scc, we will have done a cror to put the bit in the
13724 unordered position. So test that bit. For integer, this is ! LT
13725 unless this is an scc insn. */
13726 return scc_p ? base_bit + 3 : base_bit;
13727
13728 case LE: case LEU:
13729 return scc_p ? base_bit + 3 : base_bit + 1;
13730
13731 default:
13732 return -1;
13733 }
13734 }
13735 \f
13736 /* Return the GOT register. */
13737
13738 rtx
13739 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
13740 {
13741 /* The second flow pass currently (June 1999) can't update
13742 regs_ever_live without disturbing other parts of the compiler, so
13743 update it here to make the prolog/epilogue code happy. */
13744 if (!can_create_pseudo_p ()
13745 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
13746 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
13747
13748 crtl->uses_pic_offset_table = 1;
13749
13750 return pic_offset_table_rtx;
13751 }
13752 \f
13753 #define INT_P(X) (CONST_INT_P (X) && GET_MODE (X) == VOIDmode)
13754
13755 /* Write out a function code label. */
13756
13757 void
13758 rs6000_output_function_entry (FILE *file, const char *fname)
13759 {
13760 if (fname[0] != '.')
13761 {
13762 switch (DEFAULT_ABI)
13763 {
13764 default:
13765 gcc_unreachable ();
13766
13767 case ABI_AIX:
13768 if (DOT_SYMBOLS)
13769 putc ('.', file);
13770 else
13771 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
13772 break;
13773
13774 case ABI_ELFv2:
13775 case ABI_V4:
13776 case ABI_DARWIN:
13777 break;
13778 }
13779 }
13780
13781 RS6000_OUTPUT_BASENAME (file, fname);
13782 }
13783
13784 /* Print an operand. Recognize special options, documented below. */
13785
13786 #if TARGET_ELF
13787 /* Access to .sdata2 through r2 (see -msdata=eabi in invoke.texi) is
13788 only introduced by the linker, when applying the sda21
13789 relocation. */
13790 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
13791 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
13792 #else
13793 #define SMALL_DATA_RELOC "sda21"
13794 #define SMALL_DATA_REG 0
13795 #endif
13796
13797 void
13798 print_operand (FILE *file, rtx x, int code)
13799 {
13800 int i;
13801 unsigned HOST_WIDE_INT uval;
13802
13803 switch (code)
13804 {
13805 /* %a is output_address. */
13806
13807 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
13808 output_operand. */
13809
13810 case 'A':
13811 /* Write the MMA accumulator number associated with VSX register X. */
13812 if (!REG_P (x) || !FP_REGNO_P (REGNO (x)) || (REGNO (x) % 4) != 0)
13813 output_operand_lossage ("invalid %%A value");
13814 else
13815 fprintf (file, "%d", (REGNO (x) - FIRST_FPR_REGNO) / 4);
13816 return;
13817
13818 case 'D':
13819 /* Like 'J' but get to the GT bit only. */
13820 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13821 {
13822 output_operand_lossage ("invalid %%D value");
13823 return;
13824 }
13825
13826 /* Bit 1 is GT bit. */
13827 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
13828
13829 /* Add one for shift count in rlinm for scc. */
13830 fprintf (file, "%d", i + 1);
13831 return;
13832
13833 case 'e':
13834 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
13835 if (! INT_P (x))
13836 {
13837 output_operand_lossage ("invalid %%e value");
13838 return;
13839 }
13840
13841 uval = INTVAL (x);
13842 if ((uval & 0xffff) == 0 && uval != 0)
13843 putc ('s', file);
13844 return;
13845
13846 case 'E':
13847 /* X is a CR register. Print the number of the EQ bit of the CR */
13848 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13849 output_operand_lossage ("invalid %%E value");
13850 else
13851 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
13852 return;
13853
13854 case 'f':
13855 /* X is a CR register. Print the shift count needed to move it
13856 to the high-order four bits. */
13857 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13858 output_operand_lossage ("invalid %%f value");
13859 else
13860 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
13861 return;
13862
13863 case 'F':
13864 /* Similar, but print the count for the rotate in the opposite
13865 direction. */
13866 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13867 output_operand_lossage ("invalid %%F value");
13868 else
13869 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
13870 return;
13871
13872 case 'G':
13873 /* X is a constant integer. If it is negative, print "m",
13874 otherwise print "z". This is to make an aze or ame insn. */
13875 if (!CONST_INT_P (x))
13876 output_operand_lossage ("invalid %%G value");
13877 else if (INTVAL (x) >= 0)
13878 putc ('z', file);
13879 else
13880 putc ('m', file);
13881 return;
13882
13883 case 'h':
13884 /* If constant, output low-order five bits. Otherwise, write
13885 normally. */
13886 if (INT_P (x))
13887 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
13888 else
13889 print_operand (file, x, 0);
13890 return;
13891
13892 case 'H':
13893 /* If constant, output low-order six bits. Otherwise, write
13894 normally. */
13895 if (INT_P (x))
13896 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
13897 else
13898 print_operand (file, x, 0);
13899 return;
13900
13901 case 'I':
13902 /* Print `i' if this is a constant, else nothing. */
13903 if (INT_P (x))
13904 putc ('i', file);
13905 return;
13906
13907 case 'j':
13908 /* Write the bit number in CCR for jump. */
13909 i = ccr_bit (x, 0);
13910 if (i == -1)
13911 output_operand_lossage ("invalid %%j code");
13912 else
13913 fprintf (file, "%d", i);
13914 return;
13915
13916 case 'J':
13917 /* Similar, but add one for shift count in rlinm for scc and pass
13918 scc flag to `ccr_bit'. */
13919 i = ccr_bit (x, 1);
13920 if (i == -1)
13921 output_operand_lossage ("invalid %%J code");
13922 else
13923 /* If we want bit 31, write a shift count of zero, not 32. */
13924 fprintf (file, "%d", i == 31 ? 0 : i + 1);
13925 return;
13926
13927 case 'k':
13928 /* X must be a constant. Write the 1's complement of the
13929 constant. */
13930 if (! INT_P (x))
13931 output_operand_lossage ("invalid %%k value");
13932 else
13933 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
13934 return;
13935
13936 case 'K':
13937 /* X must be a symbolic constant on ELF. Write an
13938 expression suitable for an 'addi' that adds in the low 16
13939 bits of the MEM. */
13940 if (GET_CODE (x) == CONST)
13941 {
13942 if (GET_CODE (XEXP (x, 0)) != PLUS
13943 || (!SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
13944 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
13945 || !CONST_INT_P (XEXP (XEXP (x, 0), 1)))
13946 output_operand_lossage ("invalid %%K value");
13947 }
13948 print_operand_address (file, x);
13949 fputs ("@l", file);
13950 return;
13951
13952 /* %l is output_asm_label. */
13953
13954 case 'L':
13955 /* Write second word of DImode or DFmode reference. Works on register
13956 or non-indexed memory only. */
13957 if (REG_P (x))
13958 fputs (reg_names[REGNO (x) + 1], file);
13959 else if (MEM_P (x))
13960 {
13961 machine_mode mode = GET_MODE (x);
13962 /* Handle possible auto-increment. Since it is pre-increment and
13963 we have already done it, we can just use an offset of word. */
13964 if (GET_CODE (XEXP (x, 0)) == PRE_INC
13965 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
13966 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
13967 UNITS_PER_WORD));
13968 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
13969 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
13970 UNITS_PER_WORD));
13971 else
13972 output_address (mode, XEXP (adjust_address_nv (x, SImode,
13973 UNITS_PER_WORD),
13974 0));
13975
13976 if (small_data_operand (x, GET_MODE (x)))
13977 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
13978 reg_names[SMALL_DATA_REG]);
13979 }
13980 return;
13981
13982 case 'N': /* Unused */
13983 /* Write the number of elements in the vector times 4. */
13984 if (GET_CODE (x) != PARALLEL)
13985 output_operand_lossage ("invalid %%N value");
13986 else
13987 fprintf (file, "%d", XVECLEN (x, 0) * 4);
13988 return;
13989
13990 case 'O': /* Unused */
13991 /* Similar, but subtract 1 first. */
13992 if (GET_CODE (x) != PARALLEL)
13993 output_operand_lossage ("invalid %%O value");
13994 else
13995 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
13996 return;
13997
13998 case 'p':
13999 /* X is a CONST_INT that is a power of two. Output the logarithm. */
14000 if (! INT_P (x)
14001 || INTVAL (x) < 0
14002 || (i = exact_log2 (INTVAL (x))) < 0)
14003 output_operand_lossage ("invalid %%p value");
14004 else
14005 fprintf (file, "%d", i);
14006 return;
14007
14008 case 'P':
14009 /* The operand must be an indirect memory reference. The result
14010 is the register name. */
14011 if (!MEM_P (x) || !REG_P (XEXP (x, 0))
14012 || REGNO (XEXP (x, 0)) >= 32)
14013 output_operand_lossage ("invalid %%P value");
14014 else
14015 fputs (reg_names[REGNO (XEXP (x, 0))], file);
14016 return;
14017
14018 case 'q':
14019 /* This outputs the logical code corresponding to a boolean
14020 expression. The expression may have one or both operands
14021 negated (if one, only the first one). For condition register
14022 logical operations, it will also treat the negated
14023 CR codes as NOTs, but not handle NOTs of them. */
14024 {
14025 const char *const *t = 0;
14026 const char *s;
14027 enum rtx_code code = GET_CODE (x);
14028 static const char * const tbl[3][3] = {
14029 { "and", "andc", "nor" },
14030 { "or", "orc", "nand" },
14031 { "xor", "eqv", "xor" } };
14032
14033 if (code == AND)
14034 t = tbl[0];
14035 else if (code == IOR)
14036 t = tbl[1];
14037 else if (code == XOR)
14038 t = tbl[2];
14039 else
14040 output_operand_lossage ("invalid %%q value");
14041
14042 if (GET_CODE (XEXP (x, 0)) != NOT)
14043 s = t[0];
14044 else
14045 {
14046 if (GET_CODE (XEXP (x, 1)) == NOT)
14047 s = t[2];
14048 else
14049 s = t[1];
14050 }
14051
14052 fputs (s, file);
14053 }
14054 return;
14055
14056 case 'Q':
14057 if (! TARGET_MFCRF)
14058 return;
14059 fputc (',', file);
14060 /* FALLTHRU */
14061
14062 case 'R':
14063 /* X is a CR register. Print the mask for `mtcrf'. */
14064 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
14065 output_operand_lossage ("invalid %%R value");
14066 else
14067 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
14068 return;
14069
14070 case 's':
14071 /* Low 5 bits of 32 - value */
14072 if (! INT_P (x))
14073 output_operand_lossage ("invalid %%s value");
14074 else
14075 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
14076 return;
14077
14078 case 't':
14079 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
14080 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
14081 {
14082 output_operand_lossage ("invalid %%t value");
14083 return;
14084 }
14085
14086 /* Bit 3 is OV bit. */
14087 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
14088
14089 /* If we want bit 31, write a shift count of zero, not 32. */
14090 fprintf (file, "%d", i == 31 ? 0 : i + 1);
14091 return;
14092
14093 case 'T':
14094 /* Print the symbolic name of a branch target register. */
14095 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
14096 x = XVECEXP (x, 0, 0);
14097 if (!REG_P (x) || (REGNO (x) != LR_REGNO
14098 && REGNO (x) != CTR_REGNO))
14099 output_operand_lossage ("invalid %%T value");
14100 else if (REGNO (x) == LR_REGNO)
14101 fputs ("lr", file);
14102 else
14103 fputs ("ctr", file);
14104 return;
14105
14106 case 'u':
14107 /* High-order or low-order 16 bits of constant, whichever is non-zero,
14108 for use in unsigned operand. */
14109 if (! INT_P (x))
14110 {
14111 output_operand_lossage ("invalid %%u value");
14112 return;
14113 }
14114
14115 uval = INTVAL (x);
14116 if ((uval & 0xffff) == 0)
14117 uval >>= 16;
14118
14119 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
14120 return;
14121
14122 case 'v':
14123 /* High-order 16 bits of constant for use in signed operand. */
14124 if (! INT_P (x))
14125 output_operand_lossage ("invalid %%v value");
14126 else
14127 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
14128 (INTVAL (x) >> 16) & 0xffff);
14129 return;
14130
14131 case 'U':
14132 /* Print `u' if this has an auto-increment or auto-decrement. */
14133 if (MEM_P (x)
14134 && (GET_CODE (XEXP (x, 0)) == PRE_INC
14135 || GET_CODE (XEXP (x, 0)) == PRE_DEC
14136 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
14137 putc ('u', file);
14138 return;
14139
14140 case 'V':
14141 /* Print the trap code for this operand. */
14142 switch (GET_CODE (x))
14143 {
14144 case EQ:
14145 fputs ("eq", file); /* 4 */
14146 break;
14147 case NE:
14148 fputs ("ne", file); /* 24 */
14149 break;
14150 case LT:
14151 fputs ("lt", file); /* 16 */
14152 break;
14153 case LE:
14154 fputs ("le", file); /* 20 */
14155 break;
14156 case GT:
14157 fputs ("gt", file); /* 8 */
14158 break;
14159 case GE:
14160 fputs ("ge", file); /* 12 */
14161 break;
14162 case LTU:
14163 fputs ("llt", file); /* 2 */
14164 break;
14165 case LEU:
14166 fputs ("lle", file); /* 6 */
14167 break;
14168 case GTU:
14169 fputs ("lgt", file); /* 1 */
14170 break;
14171 case GEU:
14172 fputs ("lge", file); /* 5 */
14173 break;
14174 default:
14175 output_operand_lossage ("invalid %%V value");
14176 }
14177 break;
14178
14179 case 'w':
14180 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
14181 normally. */
14182 if (INT_P (x))
14183 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
14184 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
14185 else
14186 print_operand (file, x, 0);
14187 return;
14188
14189 case 'x':
14190 /* X is a FPR or Altivec register used in a VSX context. */
14191 if (!REG_P (x) || !VSX_REGNO_P (REGNO (x)))
14192 output_operand_lossage ("invalid %%x value");
14193 else
14194 {
14195 int reg = REGNO (x);
14196 int vsx_reg = (FP_REGNO_P (reg)
14197 ? reg - 32
14198 : reg - FIRST_ALTIVEC_REGNO + 32);
14199
14200 #ifdef TARGET_REGNAMES
14201 if (TARGET_REGNAMES)
14202 fprintf (file, "%%vs%d", vsx_reg);
14203 else
14204 #endif
14205 fprintf (file, "%d", vsx_reg);
14206 }
14207 return;
14208
14209 case 'X':
14210 if (MEM_P (x)
14211 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
14212 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
14213 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
14214 putc ('x', file);
14215 return;
14216
14217 case 'Y':
14218 /* Like 'L', for third word of TImode/PTImode */
14219 if (REG_P (x))
14220 fputs (reg_names[REGNO (x) + 2], file);
14221 else if (MEM_P (x))
14222 {
14223 machine_mode mode = GET_MODE (x);
14224 if (GET_CODE (XEXP (x, 0)) == PRE_INC
14225 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
14226 output_address (mode, plus_constant (Pmode,
14227 XEXP (XEXP (x, 0), 0), 8));
14228 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
14229 output_address (mode, plus_constant (Pmode,
14230 XEXP (XEXP (x, 0), 0), 8));
14231 else
14232 output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0));
14233 if (small_data_operand (x, GET_MODE (x)))
14234 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
14235 reg_names[SMALL_DATA_REG]);
14236 }
14237 return;
14238
14239 case 'z':
14240 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
14241 x = XVECEXP (x, 0, 1);
14242 /* X is a SYMBOL_REF. Write out the name preceded by a
14243 period and without any trailing data in brackets. Used for function
14244 names. If we are configured for System V (or the embedded ABI) on
14245 the PowerPC, do not emit the period, since those systems do not use
14246 TOCs and the like. */
14247 if (!SYMBOL_REF_P (x))
14248 {
14249 output_operand_lossage ("invalid %%z value");
14250 return;
14251 }
14252
14253 /* For macho, check to see if we need a stub. */
14254 if (TARGET_MACHO)
14255 {
14256 const char *name = XSTR (x, 0);
14257 #if TARGET_MACHO
14258 if (darwin_symbol_stubs
14259 && MACHOPIC_INDIRECT
14260 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14261 name = machopic_indirection_name (x, /*stub_p=*/true);
14262 #endif
14263 assemble_name (file, name);
14264 }
14265 else if (!DOT_SYMBOLS)
14266 assemble_name (file, XSTR (x, 0));
14267 else
14268 rs6000_output_function_entry (file, XSTR (x, 0));
14269 return;
14270
14271 case 'Z':
14272 /* Like 'L', for last word of TImode/PTImode. */
14273 if (REG_P (x))
14274 fputs (reg_names[REGNO (x) + 3], file);
14275 else if (MEM_P (x))
14276 {
14277 machine_mode mode = GET_MODE (x);
14278 if (GET_CODE (XEXP (x, 0)) == PRE_INC
14279 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
14280 output_address (mode, plus_constant (Pmode,
14281 XEXP (XEXP (x, 0), 0), 12));
14282 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
14283 output_address (mode, plus_constant (Pmode,
14284 XEXP (XEXP (x, 0), 0), 12));
14285 else
14286 output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0));
14287 if (small_data_operand (x, GET_MODE (x)))
14288 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
14289 reg_names[SMALL_DATA_REG]);
14290 }
14291 return;
14292
14293 /* Print AltiVec memory operand. */
14294 case 'y':
14295 {
14296 rtx tmp;
14297
14298 gcc_assert (MEM_P (x));
14299
14300 tmp = XEXP (x, 0);
14301
14302 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (GET_MODE (x))
14303 && GET_CODE (tmp) == AND
14304 && CONST_INT_P (XEXP (tmp, 1))
14305 && INTVAL (XEXP (tmp, 1)) == -16)
14306 tmp = XEXP (tmp, 0);
14307 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
14308 && GET_CODE (tmp) == PRE_MODIFY)
14309 tmp = XEXP (tmp, 1);
14310 if (REG_P (tmp))
14311 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
14312 else
14313 {
14314 if (GET_CODE (tmp) != PLUS
14315 || !REG_P (XEXP (tmp, 0))
14316 || !REG_P (XEXP (tmp, 1)))
14317 {
14318 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
14319 break;
14320 }
14321
14322 if (REGNO (XEXP (tmp, 0)) == 0)
14323 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
14324 reg_names[ REGNO (XEXP (tmp, 0)) ]);
14325 else
14326 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
14327 reg_names[ REGNO (XEXP (tmp, 1)) ]);
14328 }
14329 break;
14330 }
14331
14332 case 0:
14333 if (REG_P (x))
14334 fprintf (file, "%s", reg_names[REGNO (x)]);
14335 else if (MEM_P (x))
14336 {
14337 /* We need to handle PRE_INC and PRE_DEC here, since we need to
14338 know the width from the mode. */
14339 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
14340 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
14341 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
14342 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
14343 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
14344 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
14345 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
14346 output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
14347 else
14348 output_address (GET_MODE (x), XEXP (x, 0));
14349 }
14350 else if (toc_relative_expr_p (x, false,
14351 &tocrel_base_oac, &tocrel_offset_oac))
14352 /* This hack along with a corresponding hack in
14353 rs6000_output_addr_const_extra arranges to output addends
14354 where the assembler expects to find them. eg.
14355 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
14356 without this hack would be output as "x@toc+4". We
14357 want "x+4@toc". */
14358 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
14359 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
14360 output_addr_const (file, XVECEXP (x, 0, 0));
14361 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
14362 output_addr_const (file, XVECEXP (x, 0, 1));
14363 else
14364 output_addr_const (file, x);
14365 return;
14366
14367 case '&':
14368 if (const char *name = get_some_local_dynamic_name ())
14369 assemble_name (file, name);
14370 else
14371 output_operand_lossage ("'%%&' used without any "
14372 "local dynamic TLS references");
14373 return;
14374
14375 default:
14376 output_operand_lossage ("invalid %%xn code");
14377 }
14378 }
14379 \f
14380 /* Print the address of an operand. */
14381
14382 void
14383 print_operand_address (FILE *file, rtx x)
14384 {
14385 if (REG_P (x))
14386 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
14387
14388 /* Is it a PC-relative address? */
14389 else if (TARGET_PCREL && pcrel_local_or_external_address (x, VOIDmode))
14390 {
14391 HOST_WIDE_INT offset;
14392
14393 if (GET_CODE (x) == CONST)
14394 x = XEXP (x, 0);
14395
14396 if (GET_CODE (x) == PLUS)
14397 {
14398 offset = INTVAL (XEXP (x, 1));
14399 x = XEXP (x, 0);
14400 }
14401 else
14402 offset = 0;
14403
14404 output_addr_const (file, x);
14405
14406 if (offset)
14407 fprintf (file, "%+" PRId64, offset);
14408
14409 if (SYMBOL_REF_P (x) && !SYMBOL_REF_LOCAL_P (x))
14410 fprintf (file, "@got");
14411
14412 fprintf (file, "@pcrel");
14413 }
14414 else if (SYMBOL_REF_P (x) || GET_CODE (x) == CONST
14415 || GET_CODE (x) == LABEL_REF)
14416 {
14417 output_addr_const (file, x);
14418 if (small_data_operand (x, GET_MODE (x)))
14419 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
14420 reg_names[SMALL_DATA_REG]);
14421 else
14422 gcc_assert (!TARGET_TOC);
14423 }
14424 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
14425 && REG_P (XEXP (x, 1)))
14426 {
14427 if (REGNO (XEXP (x, 0)) == 0)
14428 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
14429 reg_names[ REGNO (XEXP (x, 0)) ]);
14430 else
14431 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
14432 reg_names[ REGNO (XEXP (x, 1)) ]);
14433 }
14434 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
14435 && CONST_INT_P (XEXP (x, 1)))
14436 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
14437 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
14438 #if TARGET_MACHO
14439 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
14440 && CONSTANT_P (XEXP (x, 1)))
14441 {
14442 fprintf (file, "lo16(");
14443 output_addr_const (file, XEXP (x, 1));
14444 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
14445 }
14446 #endif
14447 #if TARGET_ELF
14448 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
14449 && CONSTANT_P (XEXP (x, 1)))
14450 {
14451 output_addr_const (file, XEXP (x, 1));
14452 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
14453 }
14454 #endif
14455 else if (toc_relative_expr_p (x, false, &tocrel_base_oac, &tocrel_offset_oac))
14456 {
14457 /* This hack along with a corresponding hack in
14458 rs6000_output_addr_const_extra arranges to output addends
14459 where the assembler expects to find them. eg.
14460 (lo_sum (reg 9)
14461 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
14462 without this hack would be output as "x@toc+8@l(9)". We
14463 want "x+8@toc@l(9)". */
14464 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
14465 if (GET_CODE (x) == LO_SUM)
14466 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
14467 else
14468 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base_oac, 0, 1))]);
14469 }
14470 else
14471 output_addr_const (file, x);
14472 }
14473 \f
14474 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14475
14476 bool
14477 rs6000_output_addr_const_extra (FILE *file, rtx x)
14478 {
14479 if (GET_CODE (x) == UNSPEC)
14480 switch (XINT (x, 1))
14481 {
14482 case UNSPEC_TOCREL:
14483 gcc_checking_assert (SYMBOL_REF_P (XVECEXP (x, 0, 0))
14484 && REG_P (XVECEXP (x, 0, 1))
14485 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
14486 output_addr_const (file, XVECEXP (x, 0, 0));
14487 if (x == tocrel_base_oac && tocrel_offset_oac != const0_rtx)
14488 {
14489 if (INTVAL (tocrel_offset_oac) >= 0)
14490 fprintf (file, "+");
14491 output_addr_const (file, CONST_CAST_RTX (tocrel_offset_oac));
14492 }
14493 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
14494 {
14495 putc ('-', file);
14496 assemble_name (file, toc_label_name);
14497 need_toc_init = 1;
14498 }
14499 else if (TARGET_ELF)
14500 fputs ("@toc", file);
14501 return true;
14502
14503 #if TARGET_MACHO
14504 case UNSPEC_MACHOPIC_OFFSET:
14505 output_addr_const (file, XVECEXP (x, 0, 0));
14506 putc ('-', file);
14507 machopic_output_function_base_name (file);
14508 return true;
14509 #endif
14510 }
14511 return false;
14512 }
14513 \f
14514 /* Target hook for assembling integer objects. The PowerPC version has
14515 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
14516 is defined. It also needs to handle DI-mode objects on 64-bit
14517 targets. */
14518
14519 static bool
14520 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
14521 {
14522 #ifdef RELOCATABLE_NEEDS_FIXUP
14523 /* Special handling for SI values. */
14524 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
14525 {
14526 static int recurse = 0;
14527
14528 /* For -mrelocatable, we mark all addresses that need to be fixed up in
14529 the .fixup section. Since the TOC section is already relocated, we
14530 don't need to mark it here. We used to skip the text section, but it
14531 should never be valid for relocated addresses to be placed in the text
14532 section. */
14533 if (DEFAULT_ABI == ABI_V4
14534 && (TARGET_RELOCATABLE || flag_pic > 1)
14535 && in_section != toc_section
14536 && !recurse
14537 && !CONST_SCALAR_INT_P (x)
14538 && CONSTANT_P (x))
14539 {
14540 char buf[256];
14541
14542 recurse = 1;
14543 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
14544 fixuplabelno++;
14545 ASM_OUTPUT_LABEL (asm_out_file, buf);
14546 fprintf (asm_out_file, "\t.long\t(");
14547 output_addr_const (asm_out_file, x);
14548 fprintf (asm_out_file, ")@fixup\n");
14549 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
14550 ASM_OUTPUT_ALIGN (asm_out_file, 2);
14551 fprintf (asm_out_file, "\t.long\t");
14552 assemble_name (asm_out_file, buf);
14553 fprintf (asm_out_file, "\n\t.previous\n");
14554 recurse = 0;
14555 return true;
14556 }
14557 /* Remove initial .'s to turn a -mcall-aixdesc function
14558 address into the address of the descriptor, not the function
14559 itself. */
14560 else if (SYMBOL_REF_P (x)
14561 && XSTR (x, 0)[0] == '.'
14562 && DEFAULT_ABI == ABI_AIX)
14563 {
14564 const char *name = XSTR (x, 0);
14565 while (*name == '.')
14566 name++;
14567
14568 fprintf (asm_out_file, "\t.long\t%s\n", name);
14569 return true;
14570 }
14571 }
14572 #endif /* RELOCATABLE_NEEDS_FIXUP */
14573 return default_assemble_integer (x, size, aligned_p);
14574 }
14575
14576 /* Return a template string for assembly to emit when making an
14577 external call. FUNOP is the call mem argument operand number. */
14578
14579 static const char *
14580 rs6000_call_template_1 (rtx *operands, unsigned int funop, bool sibcall)
14581 {
14582 /* -Wformat-overflow workaround, without which gcc thinks that %u
14583 might produce 10 digits. */
14584 gcc_assert (funop <= MAX_RECOG_OPERANDS);
14585
14586 char arg[12];
14587 arg[0] = 0;
14588 if (GET_CODE (operands[funop + 1]) == UNSPEC)
14589 {
14590 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
14591 sprintf (arg, "(%%%u@tlsgd)", funop + 1);
14592 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
14593 sprintf (arg, "(%%&@tlsld)");
14594 }
14595
14596 /* The magic 32768 offset here corresponds to the offset of
14597 r30 in .got2, as given by LCTOC1. See sysv4.h:toc_section. */
14598 char z[11];
14599 sprintf (z, "%%z%u%s", funop,
14600 (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic == 2
14601 ? "+32768" : ""));
14602
14603 static char str[32]; /* 1 spare */
14604 if (rs6000_pcrel_p ())
14605 sprintf (str, "b%s %s@notoc%s", sibcall ? "" : "l", z, arg);
14606 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
14607 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
14608 sibcall ? "" : "\n\tnop");
14609 else if (DEFAULT_ABI == ABI_V4)
14610 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
14611 flag_pic ? "@plt" : "");
14612 #if TARGET_MACHO
14613 /* If/when we remove the mlongcall opt, we can share the AIX/ELGv2 case. */
14614 else if (DEFAULT_ABI == ABI_DARWIN)
14615 {
14616 /* The cookie is in operand func+2. */
14617 gcc_checking_assert (GET_CODE (operands[funop + 2]) == CONST_INT);
14618 int cookie = INTVAL (operands[funop + 2]);
14619 if (cookie & CALL_LONG)
14620 {
14621 tree funname = get_identifier (XSTR (operands[funop], 0));
14622 tree labelname = get_prev_label (funname);
14623 gcc_checking_assert (labelname && !sibcall);
14624
14625 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
14626 instruction will reach 'foo', otherwise link as 'bl L42'".
14627 "L42" should be a 'branch island', that will do a far jump to
14628 'foo'. Branch islands are generated in
14629 macho_branch_islands(). */
14630 sprintf (str, "jbsr %%z%u,%.10s", funop,
14631 IDENTIFIER_POINTER (labelname));
14632 }
14633 else
14634 /* Same as AIX or ELFv2, except to keep backwards compat, no nop
14635 after the call. */
14636 sprintf (str, "b%s %s%s", sibcall ? "" : "l", z, arg);
14637 }
14638 #endif
14639 else
14640 gcc_unreachable ();
14641 return str;
14642 }
14643
14644 const char *
14645 rs6000_call_template (rtx *operands, unsigned int funop)
14646 {
14647 return rs6000_call_template_1 (operands, funop, false);
14648 }
14649
14650 const char *
14651 rs6000_sibcall_template (rtx *operands, unsigned int funop)
14652 {
14653 return rs6000_call_template_1 (operands, funop, true);
14654 }
14655
14656 /* As above, for indirect calls. */
14657
14658 static const char *
14659 rs6000_indirect_call_template_1 (rtx *operands, unsigned int funop,
14660 bool sibcall)
14661 {
14662 /* -Wformat-overflow workaround, without which gcc thinks that %u
14663 might produce 10 digits. Note that -Wformat-overflow will not
14664 currently warn here for str[], so do not rely on a warning to
14665 ensure str[] is correctly sized. */
14666 gcc_assert (funop <= MAX_RECOG_OPERANDS);
14667
14668 /* Currently, funop is either 0 or 1. The maximum string is always
14669 a !speculate 64-bit __tls_get_addr call.
14670
14671 ABI_ELFv2, pcrel:
14672 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14673 . 35 .reloc .,R_PPC64_PLTSEQ_NOTOC,%z1\n\t
14674 . 9 crset 2\n\t
14675 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14676 . 36 .reloc .,R_PPC64_PLTCALL_NOTOC,%z1\n\t
14677 . 8 beq%T1l-
14678 .---
14679 .142
14680
14681 ABI_AIX:
14682 . 9 ld 2,%3\n\t
14683 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14684 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
14685 . 9 crset 2\n\t
14686 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14687 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
14688 . 10 beq%T1l-\n\t
14689 . 10 ld 2,%4(1)
14690 .---
14691 .151
14692
14693 ABI_ELFv2:
14694 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14695 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
14696 . 9 crset 2\n\t
14697 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14698 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
14699 . 10 beq%T1l-\n\t
14700 . 10 ld 2,%3(1)
14701 .---
14702 .142
14703
14704 ABI_V4:
14705 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14706 . 35 .reloc .,R_PPC64_PLTSEQ,%z1+32768\n\t
14707 . 9 crset 2\n\t
14708 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14709 . 36 .reloc .,R_PPC64_PLTCALL,%z1+32768\n\t
14710 . 8 beq%T1l-
14711 .---
14712 .141 */
14713 static char str[160]; /* 8 spare */
14714 char *s = str;
14715 const char *ptrload = TARGET_64BIT ? "d" : "wz";
14716
14717 if (DEFAULT_ABI == ABI_AIX)
14718 s += sprintf (s,
14719 "l%s 2,%%%u\n\t",
14720 ptrload, funop + 3);
14721
14722 /* We don't need the extra code to stop indirect call speculation if
14723 calling via LR. */
14724 bool speculate = (TARGET_MACHO
14725 || rs6000_speculate_indirect_jumps
14726 || (REG_P (operands[funop])
14727 && REGNO (operands[funop]) == LR_REGNO));
14728
14729 if (TARGET_PLTSEQ && GET_CODE (operands[funop]) == UNSPEC)
14730 {
14731 const char *rel64 = TARGET_64BIT ? "64" : "";
14732 char tls[29];
14733 tls[0] = 0;
14734 if (GET_CODE (operands[funop + 1]) == UNSPEC)
14735 {
14736 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
14737 sprintf (tls, ".reloc .,R_PPC%s_TLSGD,%%%u\n\t",
14738 rel64, funop + 1);
14739 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
14740 sprintf (tls, ".reloc .,R_PPC%s_TLSLD,%%&\n\t",
14741 rel64);
14742 }
14743
14744 const char *notoc = rs6000_pcrel_p () ? "_NOTOC" : "";
14745 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
14746 && flag_pic == 2 ? "+32768" : "");
14747 if (!speculate)
14748 {
14749 s += sprintf (s,
14750 "%s.reloc .,R_PPC%s_PLTSEQ%s,%%z%u%s\n\t",
14751 tls, rel64, notoc, funop, addend);
14752 s += sprintf (s, "crset 2\n\t");
14753 }
14754 s += sprintf (s,
14755 "%s.reloc .,R_PPC%s_PLTCALL%s,%%z%u%s\n\t",
14756 tls, rel64, notoc, funop, addend);
14757 }
14758 else if (!speculate)
14759 s += sprintf (s, "crset 2\n\t");
14760
14761 if (rs6000_pcrel_p ())
14762 {
14763 if (speculate)
14764 sprintf (s, "b%%T%ul", funop);
14765 else
14766 sprintf (s, "beq%%T%ul-", funop);
14767 }
14768 else if (DEFAULT_ABI == ABI_AIX)
14769 {
14770 if (speculate)
14771 sprintf (s,
14772 "b%%T%ul\n\t"
14773 "l%s 2,%%%u(1)",
14774 funop, ptrload, funop + 4);
14775 else
14776 sprintf (s,
14777 "beq%%T%ul-\n\t"
14778 "l%s 2,%%%u(1)",
14779 funop, ptrload, funop + 4);
14780 }
14781 else if (DEFAULT_ABI == ABI_ELFv2)
14782 {
14783 if (speculate)
14784 sprintf (s,
14785 "b%%T%ul\n\t"
14786 "l%s 2,%%%u(1)",
14787 funop, ptrload, funop + 3);
14788 else
14789 sprintf (s,
14790 "beq%%T%ul-\n\t"
14791 "l%s 2,%%%u(1)",
14792 funop, ptrload, funop + 3);
14793 }
14794 else
14795 {
14796 if (speculate)
14797 sprintf (s,
14798 "b%%T%u%s",
14799 funop, sibcall ? "" : "l");
14800 else
14801 sprintf (s,
14802 "beq%%T%u%s-%s",
14803 funop, sibcall ? "" : "l", sibcall ? "\n\tb $" : "");
14804 }
14805 return str;
14806 }
14807
14808 const char *
14809 rs6000_indirect_call_template (rtx *operands, unsigned int funop)
14810 {
14811 return rs6000_indirect_call_template_1 (operands, funop, false);
14812 }
14813
14814 const char *
14815 rs6000_indirect_sibcall_template (rtx *operands, unsigned int funop)
14816 {
14817 return rs6000_indirect_call_template_1 (operands, funop, true);
14818 }
14819
14820 #if HAVE_AS_PLTSEQ
14821 /* Output indirect call insns. WHICH identifies the type of sequence. */
14822 const char *
14823 rs6000_pltseq_template (rtx *operands, int which)
14824 {
14825 const char *rel64 = TARGET_64BIT ? "64" : "";
14826 char tls[30];
14827 tls[0] = 0;
14828 if (GET_CODE (operands[3]) == UNSPEC)
14829 {
14830 char off = which == RS6000_PLTSEQ_PLT_PCREL34 ? '8' : '4';
14831 if (XINT (operands[3], 1) == UNSPEC_TLSGD)
14832 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSGD,%%3\n\t",
14833 off, rel64);
14834 else if (XINT (operands[3], 1) == UNSPEC_TLSLD)
14835 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSLD,%%&\n\t",
14836 off, rel64);
14837 }
14838
14839 gcc_assert (DEFAULT_ABI == ABI_ELFv2 || DEFAULT_ABI == ABI_V4);
14840 static char str[96]; /* 10 spare */
14841 char off = WORDS_BIG_ENDIAN ? '2' : '4';
14842 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
14843 && flag_pic == 2 ? "+32768" : "");
14844 switch (which)
14845 {
14846 case RS6000_PLTSEQ_TOCSAVE:
14847 sprintf (str,
14848 "st%s\n\t"
14849 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2",
14850 TARGET_64BIT ? "d 2,24(1)" : "w 2,12(1)",
14851 tls, rel64);
14852 break;
14853 case RS6000_PLTSEQ_PLT16_HA:
14854 if (DEFAULT_ABI == ABI_V4 && !flag_pic)
14855 sprintf (str,
14856 "lis %%0,0\n\t"
14857 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2",
14858 tls, off, rel64);
14859 else
14860 sprintf (str,
14861 "addis %%0,%%1,0\n\t"
14862 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2%s",
14863 tls, off, rel64, addend);
14864 break;
14865 case RS6000_PLTSEQ_PLT16_LO:
14866 sprintf (str,
14867 "l%s %%0,0(%%1)\n\t"
14868 "%s.reloc .-%c,R_PPC%s_PLT16_LO%s,%%z2%s",
14869 TARGET_64BIT ? "d" : "wz",
14870 tls, off, rel64, TARGET_64BIT ? "_DS" : "", addend);
14871 break;
14872 case RS6000_PLTSEQ_MTCTR:
14873 sprintf (str,
14874 "mtctr %%1\n\t"
14875 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2%s",
14876 tls, rel64, addend);
14877 break;
14878 case RS6000_PLTSEQ_PLT_PCREL34:
14879 sprintf (str,
14880 "pl%s %%0,0(0),1\n\t"
14881 "%s.reloc .-8,R_PPC%s_PLT_PCREL34_NOTOC,%%z2",
14882 TARGET_64BIT ? "d" : "wz",
14883 tls, rel64);
14884 break;
14885 default:
14886 gcc_unreachable ();
14887 }
14888 return str;
14889 }
14890 #endif
14891 \f
14892 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
14893 /* Emit an assembler directive to set symbol visibility for DECL to
14894 VISIBILITY_TYPE. */
14895
14896 static void
14897 rs6000_assemble_visibility (tree decl, int vis)
14898 {
14899 if (TARGET_XCOFF)
14900 return;
14901
14902 /* Functions need to have their entry point symbol visibility set as
14903 well as their descriptor symbol visibility. */
14904 if (DEFAULT_ABI == ABI_AIX
14905 && DOT_SYMBOLS
14906 && TREE_CODE (decl) == FUNCTION_DECL)
14907 {
14908 static const char * const visibility_types[] = {
14909 NULL, "protected", "hidden", "internal"
14910 };
14911
14912 const char *name, *type;
14913
14914 name = ((* targetm.strip_name_encoding)
14915 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
14916 type = visibility_types[vis];
14917
14918 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
14919 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
14920 }
14921 else
14922 default_assemble_visibility (decl, vis);
14923 }
14924 #endif
14925 \f
14926 /* Write PATCH_AREA_SIZE NOPs into the asm outfile FILE around a function
14927 entry. If RECORD_P is true and the target supports named sections,
14928 the location of the NOPs will be recorded in a special object section
14929 called "__patchable_function_entries". This routine may be called
14930 twice per function to put NOPs before and after the function
14931 entry. */
14932
14933 void
14934 rs6000_print_patchable_function_entry (FILE *file,
14935 unsigned HOST_WIDE_INT patch_area_size,
14936 bool record_p)
14937 {
14938 unsigned int flags = SECTION_WRITE | SECTION_RELRO;
14939 /* When .opd section is emitted, the function symbol
14940 default_print_patchable_function_entry_1 is emitted into the .opd section
14941 while the patchable area is emitted into the function section.
14942 Don't use SECTION_LINK_ORDER in that case. */
14943 if (!(TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
14944 && HAVE_GAS_SECTION_LINK_ORDER)
14945 flags |= SECTION_LINK_ORDER;
14946 default_print_patchable_function_entry_1 (file, patch_area_size, record_p,
14947 flags);
14948 }
14949 \f
14950 enum rtx_code
14951 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
14952 {
14953 /* Reversal of FP compares takes care -- an ordered compare
14954 becomes an unordered compare and vice versa. */
14955 if (mode == CCFPmode
14956 && (!flag_finite_math_only
14957 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
14958 || code == UNEQ || code == LTGT))
14959 return reverse_condition_maybe_unordered (code);
14960 else
14961 return reverse_condition (code);
14962 }
14963
14964 /* Generate a compare for CODE. Return a brand-new rtx that
14965 represents the result of the compare. */
14966
14967 static rtx
14968 rs6000_generate_compare (rtx cmp, machine_mode mode)
14969 {
14970 machine_mode comp_mode;
14971 rtx compare_result;
14972 enum rtx_code code = GET_CODE (cmp);
14973 rtx op0 = XEXP (cmp, 0);
14974 rtx op1 = XEXP (cmp, 1);
14975
14976 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
14977 comp_mode = CCmode;
14978 else if (FLOAT_MODE_P (mode))
14979 comp_mode = CCFPmode;
14980 else if (code == GTU || code == LTU
14981 || code == GEU || code == LEU)
14982 comp_mode = CCUNSmode;
14983 else if ((code == EQ || code == NE)
14984 && unsigned_reg_p (op0)
14985 && (unsigned_reg_p (op1)
14986 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
14987 /* These are unsigned values, perhaps there will be a later
14988 ordering compare that can be shared with this one. */
14989 comp_mode = CCUNSmode;
14990 else
14991 comp_mode = CCmode;
14992
14993 /* If we have an unsigned compare, make sure we don't have a signed value as
14994 an immediate. */
14995 if (comp_mode == CCUNSmode && CONST_INT_P (op1)
14996 && INTVAL (op1) < 0)
14997 {
14998 op0 = copy_rtx_if_shared (op0);
14999 op1 = force_reg (GET_MODE (op0), op1);
15000 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
15001 }
15002
15003 /* First, the compare. */
15004 compare_result = gen_reg_rtx (comp_mode);
15005
15006 /* IEEE 128-bit support in VSX registers when we do not have hardware
15007 support. */
15008 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
15009 {
15010 rtx libfunc = NULL_RTX;
15011 bool check_nan = false;
15012 rtx dest;
15013
15014 switch (code)
15015 {
15016 case EQ:
15017 case NE:
15018 libfunc = optab_libfunc (eq_optab, mode);
15019 break;
15020
15021 case GT:
15022 case GE:
15023 libfunc = optab_libfunc (ge_optab, mode);
15024 break;
15025
15026 case LT:
15027 case LE:
15028 libfunc = optab_libfunc (le_optab, mode);
15029 break;
15030
15031 case UNORDERED:
15032 case ORDERED:
15033 libfunc = optab_libfunc (unord_optab, mode);
15034 code = (code == UNORDERED) ? NE : EQ;
15035 break;
15036
15037 case UNGE:
15038 case UNGT:
15039 check_nan = true;
15040 libfunc = optab_libfunc (ge_optab, mode);
15041 code = (code == UNGE) ? GE : GT;
15042 break;
15043
15044 case UNLE:
15045 case UNLT:
15046 check_nan = true;
15047 libfunc = optab_libfunc (le_optab, mode);
15048 code = (code == UNLE) ? LE : LT;
15049 break;
15050
15051 case UNEQ:
15052 case LTGT:
15053 check_nan = true;
15054 libfunc = optab_libfunc (eq_optab, mode);
15055 code = (code = UNEQ) ? EQ : NE;
15056 break;
15057
15058 default:
15059 gcc_unreachable ();
15060 }
15061
15062 gcc_assert (libfunc);
15063
15064 if (!check_nan)
15065 dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
15066 SImode, op0, mode, op1, mode);
15067
15068 /* The library signals an exception for signalling NaNs, so we need to
15069 handle isgreater, etc. by first checking isordered. */
15070 else
15071 {
15072 rtx ne_rtx, normal_dest, unord_dest;
15073 rtx unord_func = optab_libfunc (unord_optab, mode);
15074 rtx join_label = gen_label_rtx ();
15075 rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label);
15076 rtx unord_cmp = gen_reg_rtx (comp_mode);
15077
15078
15079 /* Test for either value being a NaN. */
15080 gcc_assert (unord_func);
15081 unord_dest = emit_library_call_value (unord_func, NULL_RTX, LCT_CONST,
15082 SImode, op0, mode, op1, mode);
15083
15084 /* Set value (0) if either value is a NaN, and jump to the join
15085 label. */
15086 dest = gen_reg_rtx (SImode);
15087 emit_move_insn (dest, const1_rtx);
15088 emit_insn (gen_rtx_SET (unord_cmp,
15089 gen_rtx_COMPARE (comp_mode, unord_dest,
15090 const0_rtx)));
15091
15092 ne_rtx = gen_rtx_NE (comp_mode, unord_cmp, const0_rtx);
15093 emit_jump_insn (gen_rtx_SET (pc_rtx,
15094 gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
15095 join_ref,
15096 pc_rtx)));
15097
15098 /* Do the normal comparison, knowing that the values are not
15099 NaNs. */
15100 normal_dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
15101 SImode, op0, mode, op1, mode);
15102
15103 emit_insn (gen_cstoresi4 (dest,
15104 gen_rtx_fmt_ee (code, SImode, normal_dest,
15105 const0_rtx),
15106 normal_dest, const0_rtx));
15107
15108 /* Join NaN and non-Nan paths. Compare dest against 0. */
15109 emit_label (join_label);
15110 code = NE;
15111 }
15112
15113 emit_insn (gen_rtx_SET (compare_result,
15114 gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
15115 }
15116
15117 else
15118 {
15119 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
15120 CLOBBERs to match cmptf_internal2 pattern. */
15121 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
15122 && FLOAT128_IBM_P (GET_MODE (op0))
15123 && TARGET_HARD_FLOAT)
15124 emit_insn (gen_rtx_PARALLEL (VOIDmode,
15125 gen_rtvec (10,
15126 gen_rtx_SET (compare_result,
15127 gen_rtx_COMPARE (comp_mode, op0, op1)),
15128 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15129 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15130 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15131 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15132 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15133 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15134 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15135 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15136 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
15137 else if (GET_CODE (op1) == UNSPEC
15138 && XINT (op1, 1) == UNSPEC_SP_TEST)
15139 {
15140 rtx op1b = XVECEXP (op1, 0, 0);
15141 comp_mode = CCEQmode;
15142 compare_result = gen_reg_rtx (CCEQmode);
15143 if (TARGET_64BIT)
15144 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
15145 else
15146 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
15147 }
15148 else
15149 emit_insn (gen_rtx_SET (compare_result,
15150 gen_rtx_COMPARE (comp_mode, op0, op1)));
15151 }
15152
15153 validate_condition_mode (code, GET_MODE (compare_result));
15154
15155 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
15156 }
15157
15158 \f
15159 /* Return the diagnostic message string if the binary operation OP is
15160 not permitted on TYPE1 and TYPE2, NULL otherwise. */
15161
15162 static const char*
15163 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED,
15164 const_tree type1,
15165 const_tree type2)
15166 {
15167 machine_mode mode1 = TYPE_MODE (type1);
15168 machine_mode mode2 = TYPE_MODE (type2);
15169
15170 /* For complex modes, use the inner type. */
15171 if (COMPLEX_MODE_P (mode1))
15172 mode1 = GET_MODE_INNER (mode1);
15173
15174 if (COMPLEX_MODE_P (mode2))
15175 mode2 = GET_MODE_INNER (mode2);
15176
15177 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
15178 double to intermix unless -mfloat128-convert. */
15179 if (mode1 == mode2)
15180 return NULL;
15181
15182 if (!TARGET_FLOAT128_CVT)
15183 {
15184 if ((FLOAT128_IEEE_P (mode1) && FLOAT128_IBM_P (mode2))
15185 || (FLOAT128_IBM_P (mode1) && FLOAT128_IEEE_P (mode2)))
15186 return N_("Invalid mixing of IEEE 128-bit and IBM 128-bit floating "
15187 "point types");
15188 }
15189
15190 return NULL;
15191 }
15192
15193 \f
15194 /* Expand floating point conversion to/from __float128 and __ibm128. */
15195
15196 void
15197 rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
15198 {
15199 machine_mode dest_mode = GET_MODE (dest);
15200 machine_mode src_mode = GET_MODE (src);
15201 convert_optab cvt = unknown_optab;
15202 bool do_move = false;
15203 rtx libfunc = NULL_RTX;
15204 rtx dest2;
15205 typedef rtx (*rtx_2func_t) (rtx, rtx);
15206 rtx_2func_t hw_convert = (rtx_2func_t)0;
15207 size_t kf_or_tf;
15208
15209 struct hw_conv_t {
15210 rtx_2func_t from_df;
15211 rtx_2func_t from_sf;
15212 rtx_2func_t from_si_sign;
15213 rtx_2func_t from_si_uns;
15214 rtx_2func_t from_di_sign;
15215 rtx_2func_t from_di_uns;
15216 rtx_2func_t to_df;
15217 rtx_2func_t to_sf;
15218 rtx_2func_t to_si_sign;
15219 rtx_2func_t to_si_uns;
15220 rtx_2func_t to_di_sign;
15221 rtx_2func_t to_di_uns;
15222 } hw_conversions[2] = {
15223 /* convertions to/from KFmode */
15224 {
15225 gen_extenddfkf2_hw, /* KFmode <- DFmode. */
15226 gen_extendsfkf2_hw, /* KFmode <- SFmode. */
15227 gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */
15228 gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */
15229 gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */
15230 gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */
15231 gen_trunckfdf2_hw, /* DFmode <- KFmode. */
15232 gen_trunckfsf2_hw, /* SFmode <- KFmode. */
15233 gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */
15234 gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */
15235 gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */
15236 gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */
15237 },
15238
15239 /* convertions to/from TFmode */
15240 {
15241 gen_extenddftf2_hw, /* TFmode <- DFmode. */
15242 gen_extendsftf2_hw, /* TFmode <- SFmode. */
15243 gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */
15244 gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */
15245 gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */
15246 gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */
15247 gen_trunctfdf2_hw, /* DFmode <- TFmode. */
15248 gen_trunctfsf2_hw, /* SFmode <- TFmode. */
15249 gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */
15250 gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */
15251 gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */
15252 gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */
15253 },
15254 };
15255
15256 if (dest_mode == src_mode)
15257 gcc_unreachable ();
15258
15259 /* Eliminate memory operations. */
15260 if (MEM_P (src))
15261 src = force_reg (src_mode, src);
15262
15263 if (MEM_P (dest))
15264 {
15265 rtx tmp = gen_reg_rtx (dest_mode);
15266 rs6000_expand_float128_convert (tmp, src, unsigned_p);
15267 rs6000_emit_move (dest, tmp, dest_mode);
15268 return;
15269 }
15270
15271 /* Convert to IEEE 128-bit floating point. */
15272 if (FLOAT128_IEEE_P (dest_mode))
15273 {
15274 if (dest_mode == KFmode)
15275 kf_or_tf = 0;
15276 else if (dest_mode == TFmode)
15277 kf_or_tf = 1;
15278 else
15279 gcc_unreachable ();
15280
15281 switch (src_mode)
15282 {
15283 case E_DFmode:
15284 cvt = sext_optab;
15285 hw_convert = hw_conversions[kf_or_tf].from_df;
15286 break;
15287
15288 case E_SFmode:
15289 cvt = sext_optab;
15290 hw_convert = hw_conversions[kf_or_tf].from_sf;
15291 break;
15292
15293 case E_KFmode:
15294 case E_IFmode:
15295 case E_TFmode:
15296 if (FLOAT128_IBM_P (src_mode))
15297 cvt = sext_optab;
15298 else
15299 do_move = true;
15300 break;
15301
15302 case E_SImode:
15303 if (unsigned_p)
15304 {
15305 cvt = ufloat_optab;
15306 hw_convert = hw_conversions[kf_or_tf].from_si_uns;
15307 }
15308 else
15309 {
15310 cvt = sfloat_optab;
15311 hw_convert = hw_conversions[kf_or_tf].from_si_sign;
15312 }
15313 break;
15314
15315 case E_DImode:
15316 if (unsigned_p)
15317 {
15318 cvt = ufloat_optab;
15319 hw_convert = hw_conversions[kf_or_tf].from_di_uns;
15320 }
15321 else
15322 {
15323 cvt = sfloat_optab;
15324 hw_convert = hw_conversions[kf_or_tf].from_di_sign;
15325 }
15326 break;
15327
15328 default:
15329 gcc_unreachable ();
15330 }
15331 }
15332
15333 /* Convert from IEEE 128-bit floating point. */
15334 else if (FLOAT128_IEEE_P (src_mode))
15335 {
15336 if (src_mode == KFmode)
15337 kf_or_tf = 0;
15338 else if (src_mode == TFmode)
15339 kf_or_tf = 1;
15340 else
15341 gcc_unreachable ();
15342
15343 switch (dest_mode)
15344 {
15345 case E_DFmode:
15346 cvt = trunc_optab;
15347 hw_convert = hw_conversions[kf_or_tf].to_df;
15348 break;
15349
15350 case E_SFmode:
15351 cvt = trunc_optab;
15352 hw_convert = hw_conversions[kf_or_tf].to_sf;
15353 break;
15354
15355 case E_KFmode:
15356 case E_IFmode:
15357 case E_TFmode:
15358 if (FLOAT128_IBM_P (dest_mode))
15359 cvt = trunc_optab;
15360 else
15361 do_move = true;
15362 break;
15363
15364 case E_SImode:
15365 if (unsigned_p)
15366 {
15367 cvt = ufix_optab;
15368 hw_convert = hw_conversions[kf_or_tf].to_si_uns;
15369 }
15370 else
15371 {
15372 cvt = sfix_optab;
15373 hw_convert = hw_conversions[kf_or_tf].to_si_sign;
15374 }
15375 break;
15376
15377 case E_DImode:
15378 if (unsigned_p)
15379 {
15380 cvt = ufix_optab;
15381 hw_convert = hw_conversions[kf_or_tf].to_di_uns;
15382 }
15383 else
15384 {
15385 cvt = sfix_optab;
15386 hw_convert = hw_conversions[kf_or_tf].to_di_sign;
15387 }
15388 break;
15389
15390 default:
15391 gcc_unreachable ();
15392 }
15393 }
15394
15395 /* Both IBM format. */
15396 else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode))
15397 do_move = true;
15398
15399 else
15400 gcc_unreachable ();
15401
15402 /* Handle conversion between TFmode/KFmode/IFmode. */
15403 if (do_move)
15404 emit_insn (gen_rtx_SET (dest, gen_rtx_FLOAT_EXTEND (dest_mode, src)));
15405
15406 /* Handle conversion if we have hardware support. */
15407 else if (TARGET_FLOAT128_HW && hw_convert)
15408 emit_insn ((hw_convert) (dest, src));
15409
15410 /* Call an external function to do the conversion. */
15411 else if (cvt != unknown_optab)
15412 {
15413 libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
15414 gcc_assert (libfunc != NULL_RTX);
15415
15416 dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode,
15417 src, src_mode);
15418
15419 gcc_assert (dest2 != NULL_RTX);
15420 if (!rtx_equal_p (dest, dest2))
15421 emit_move_insn (dest, dest2);
15422 }
15423
15424 else
15425 gcc_unreachable ();
15426
15427 return;
15428 }
15429
15430 \f
15431 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
15432 can be used as that dest register. Return the dest register. */
15433
15434 rtx
15435 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
15436 {
15437 if (op2 == const0_rtx)
15438 return op1;
15439
15440 if (GET_CODE (scratch) == SCRATCH)
15441 scratch = gen_reg_rtx (mode);
15442
15443 if (logical_operand (op2, mode))
15444 emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
15445 else
15446 emit_insn (gen_rtx_SET (scratch,
15447 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
15448
15449 return scratch;
15450 }
15451
15452 /* Emit code doing a cror of two CR bits, for FP comparisons with a CODE that
15453 requires this. The result is mode MODE. */
15454 rtx
15455 rs6000_emit_fp_cror (rtx_code code, machine_mode mode, rtx x)
15456 {
15457 rtx cond[2];
15458 int n = 0;
15459 if (code == LTGT || code == LE || code == UNLT)
15460 cond[n++] = gen_rtx_fmt_ee (LT, mode, x, const0_rtx);
15461 if (code == LTGT || code == GE || code == UNGT)
15462 cond[n++] = gen_rtx_fmt_ee (GT, mode, x, const0_rtx);
15463 if (code == LE || code == GE || code == UNEQ)
15464 cond[n++] = gen_rtx_fmt_ee (EQ, mode, x, const0_rtx);
15465 if (code == UNLT || code == UNGT || code == UNEQ)
15466 cond[n++] = gen_rtx_fmt_ee (UNORDERED, mode, x, const0_rtx);
15467
15468 gcc_assert (n == 2);
15469
15470 rtx cc = gen_reg_rtx (CCEQmode);
15471 rtx logical = gen_rtx_IOR (mode, cond[0], cond[1]);
15472 emit_insn (gen_cceq_ior_compare (mode, cc, logical, cond[0], x, cond[1], x));
15473
15474 return cc;
15475 }
15476
15477 void
15478 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
15479 {
15480 rtx condition_rtx = rs6000_generate_compare (operands[1], mode);
15481 rtx_code cond_code = GET_CODE (condition_rtx);
15482
15483 if (FLOAT_MODE_P (mode) && HONOR_NANS (mode)
15484 && !(FLOAT128_VECTOR_P (mode) && !TARGET_FLOAT128_HW))
15485 ;
15486 else if (cond_code == NE
15487 || cond_code == GE || cond_code == LE
15488 || cond_code == GEU || cond_code == LEU
15489 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
15490 {
15491 rtx not_result = gen_reg_rtx (CCEQmode);
15492 rtx not_op, rev_cond_rtx;
15493 machine_mode cc_mode;
15494
15495 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
15496
15497 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
15498 SImode, XEXP (condition_rtx, 0), const0_rtx);
15499 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
15500 emit_insn (gen_rtx_SET (not_result, not_op));
15501 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
15502 }
15503
15504 machine_mode op_mode = GET_MODE (XEXP (operands[1], 0));
15505 if (op_mode == VOIDmode)
15506 op_mode = GET_MODE (XEXP (operands[1], 1));
15507
15508 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
15509 {
15510 PUT_MODE (condition_rtx, DImode);
15511 convert_move (operands[0], condition_rtx, 0);
15512 }
15513 else
15514 {
15515 PUT_MODE (condition_rtx, SImode);
15516 emit_insn (gen_rtx_SET (operands[0], condition_rtx));
15517 }
15518 }
15519
15520 /* Emit a branch of kind CODE to location LOC. */
15521
15522 void
15523 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
15524 {
15525 rtx condition_rtx = rs6000_generate_compare (operands[0], mode);
15526 rtx loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
15527 rtx ite = gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx, loc_ref, pc_rtx);
15528 emit_jump_insn (gen_rtx_SET (pc_rtx, ite));
15529 }
15530
15531 /* Return the string to output a conditional branch to LABEL, which is
15532 the operand template of the label, or NULL if the branch is really a
15533 conditional return.
15534
15535 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
15536 condition code register and its mode specifies what kind of
15537 comparison we made.
15538
15539 REVERSED is nonzero if we should reverse the sense of the comparison.
15540
15541 INSN is the insn. */
15542
15543 char *
15544 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
15545 {
15546 static char string[64];
15547 enum rtx_code code = GET_CODE (op);
15548 rtx cc_reg = XEXP (op, 0);
15549 machine_mode mode = GET_MODE (cc_reg);
15550 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
15551 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
15552 int really_reversed = reversed ^ need_longbranch;
15553 char *s = string;
15554 const char *ccode;
15555 const char *pred;
15556 rtx note;
15557
15558 validate_condition_mode (code, mode);
15559
15560 /* Work out which way this really branches. We could use
15561 reverse_condition_maybe_unordered here always but this
15562 makes the resulting assembler clearer. */
15563 if (really_reversed)
15564 {
15565 /* Reversal of FP compares takes care -- an ordered compare
15566 becomes an unordered compare and vice versa. */
15567 if (mode == CCFPmode)
15568 code = reverse_condition_maybe_unordered (code);
15569 else
15570 code = reverse_condition (code);
15571 }
15572
15573 switch (code)
15574 {
15575 /* Not all of these are actually distinct opcodes, but
15576 we distinguish them for clarity of the resulting assembler. */
15577 case NE: case LTGT:
15578 ccode = "ne"; break;
15579 case EQ: case UNEQ:
15580 ccode = "eq"; break;
15581 case GE: case GEU:
15582 ccode = "ge"; break;
15583 case GT: case GTU: case UNGT:
15584 ccode = "gt"; break;
15585 case LE: case LEU:
15586 ccode = "le"; break;
15587 case LT: case LTU: case UNLT:
15588 ccode = "lt"; break;
15589 case UNORDERED: ccode = "un"; break;
15590 case ORDERED: ccode = "nu"; break;
15591 case UNGE: ccode = "nl"; break;
15592 case UNLE: ccode = "ng"; break;
15593 default:
15594 gcc_unreachable ();
15595 }
15596
15597 /* Maybe we have a guess as to how likely the branch is. */
15598 pred = "";
15599 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
15600 if (note != NULL_RTX)
15601 {
15602 /* PROB is the difference from 50%. */
15603 int prob = profile_probability::from_reg_br_prob_note (XINT (note, 0))
15604 .to_reg_br_prob_base () - REG_BR_PROB_BASE / 2;
15605
15606 /* Only hint for highly probable/improbable branches on newer cpus when
15607 we have real profile data, as static prediction overrides processor
15608 dynamic prediction. For older cpus we may as well always hint, but
15609 assume not taken for branches that are very close to 50% as a
15610 mispredicted taken branch is more expensive than a
15611 mispredicted not-taken branch. */
15612 if (rs6000_always_hint
15613 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
15614 && (profile_status_for_fn (cfun) != PROFILE_GUESSED)
15615 && br_prob_note_reliable_p (note)))
15616 {
15617 if (abs (prob) > REG_BR_PROB_BASE / 20
15618 && ((prob > 0) ^ need_longbranch))
15619 pred = "+";
15620 else
15621 pred = "-";
15622 }
15623 }
15624
15625 if (label == NULL)
15626 s += sprintf (s, "b%slr%s ", ccode, pred);
15627 else
15628 s += sprintf (s, "b%s%s ", ccode, pred);
15629
15630 /* We need to escape any '%' characters in the reg_names string.
15631 Assume they'd only be the first character.... */
15632 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
15633 *s++ = '%';
15634 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
15635
15636 if (label != NULL)
15637 {
15638 /* If the branch distance was too far, we may have to use an
15639 unconditional branch to go the distance. */
15640 if (need_longbranch)
15641 s += sprintf (s, ",$+8\n\tb %s", label);
15642 else
15643 s += sprintf (s, ",%s", label);
15644 }
15645
15646 return string;
15647 }
15648
15649 /* Return insn for VSX or Altivec comparisons. */
15650
15651 static rtx
15652 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
15653 {
15654 rtx mask;
15655 machine_mode mode = GET_MODE (op0);
15656
15657 switch (code)
15658 {
15659 default:
15660 break;
15661
15662 case GE:
15663 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15664 return NULL_RTX;
15665 /* FALLTHRU */
15666
15667 case EQ:
15668 case GT:
15669 case GTU:
15670 case ORDERED:
15671 case UNORDERED:
15672 case UNEQ:
15673 case LTGT:
15674 mask = gen_reg_rtx (mode);
15675 emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
15676 return mask;
15677 }
15678
15679 return NULL_RTX;
15680 }
15681
15682 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
15683 DMODE is expected destination mode. This is a recursive function. */
15684
15685 static rtx
15686 rs6000_emit_vector_compare (enum rtx_code rcode,
15687 rtx op0, rtx op1,
15688 machine_mode dmode)
15689 {
15690 rtx mask;
15691 bool swap_operands = false;
15692 bool try_again = false;
15693
15694 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
15695 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
15696
15697 /* See if the comparison works as is. */
15698 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
15699 if (mask)
15700 return mask;
15701
15702 switch (rcode)
15703 {
15704 case LT:
15705 rcode = GT;
15706 swap_operands = true;
15707 try_again = true;
15708 break;
15709 case LTU:
15710 rcode = GTU;
15711 swap_operands = true;
15712 try_again = true;
15713 break;
15714 case NE:
15715 case UNLE:
15716 case UNLT:
15717 case UNGE:
15718 case UNGT:
15719 /* Invert condition and try again.
15720 e.g., A != B becomes ~(A==B). */
15721 {
15722 enum rtx_code rev_code;
15723 enum insn_code nor_code;
15724 rtx mask2;
15725
15726 rev_code = reverse_condition_maybe_unordered (rcode);
15727 if (rev_code == UNKNOWN)
15728 return NULL_RTX;
15729
15730 nor_code = optab_handler (one_cmpl_optab, dmode);
15731 if (nor_code == CODE_FOR_nothing)
15732 return NULL_RTX;
15733
15734 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
15735 if (!mask2)
15736 return NULL_RTX;
15737
15738 mask = gen_reg_rtx (dmode);
15739 emit_insn (GEN_FCN (nor_code) (mask, mask2));
15740 return mask;
15741 }
15742 break;
15743 case GE:
15744 case GEU:
15745 case LE:
15746 case LEU:
15747 /* Try GT/GTU/LT/LTU OR EQ */
15748 {
15749 rtx c_rtx, eq_rtx;
15750 enum insn_code ior_code;
15751 enum rtx_code new_code;
15752
15753 switch (rcode)
15754 {
15755 case GE:
15756 new_code = GT;
15757 break;
15758
15759 case GEU:
15760 new_code = GTU;
15761 break;
15762
15763 case LE:
15764 new_code = LT;
15765 break;
15766
15767 case LEU:
15768 new_code = LTU;
15769 break;
15770
15771 default:
15772 gcc_unreachable ();
15773 }
15774
15775 ior_code = optab_handler (ior_optab, dmode);
15776 if (ior_code == CODE_FOR_nothing)
15777 return NULL_RTX;
15778
15779 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
15780 if (!c_rtx)
15781 return NULL_RTX;
15782
15783 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
15784 if (!eq_rtx)
15785 return NULL_RTX;
15786
15787 mask = gen_reg_rtx (dmode);
15788 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
15789 return mask;
15790 }
15791 break;
15792 default:
15793 return NULL_RTX;
15794 }
15795
15796 if (try_again)
15797 {
15798 if (swap_operands)
15799 std::swap (op0, op1);
15800
15801 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
15802 if (mask)
15803 return mask;
15804 }
15805
15806 /* You only get two chances. */
15807 return NULL_RTX;
15808 }
15809
15810 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
15811 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
15812 operands for the relation operation COND. */
15813
15814 int
15815 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
15816 rtx cond, rtx cc_op0, rtx cc_op1)
15817 {
15818 machine_mode dest_mode = GET_MODE (dest);
15819 machine_mode mask_mode = GET_MODE (cc_op0);
15820 enum rtx_code rcode = GET_CODE (cond);
15821 rtx mask;
15822 bool invert_move = false;
15823
15824 if (VECTOR_UNIT_NONE_P (dest_mode))
15825 return 0;
15826
15827 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
15828 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
15829
15830 switch (rcode)
15831 {
15832 /* Swap operands if we can, and fall back to doing the operation as
15833 specified, and doing a NOR to invert the test. */
15834 case NE:
15835 case UNLE:
15836 case UNLT:
15837 case UNGE:
15838 case UNGT:
15839 /* Invert condition and try again.
15840 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
15841 invert_move = true;
15842 rcode = reverse_condition_maybe_unordered (rcode);
15843 if (rcode == UNKNOWN)
15844 return 0;
15845 break;
15846
15847 case GE:
15848 case LE:
15849 if (GET_MODE_CLASS (mask_mode) == MODE_VECTOR_INT)
15850 {
15851 /* Invert condition to avoid compound test. */
15852 invert_move = true;
15853 rcode = reverse_condition (rcode);
15854 }
15855 break;
15856
15857 case GTU:
15858 case GEU:
15859 case LTU:
15860 case LEU:
15861
15862 /* Invert condition to avoid compound test if necessary. */
15863 if (rcode == GEU || rcode == LEU)
15864 {
15865 invert_move = true;
15866 rcode = reverse_condition (rcode);
15867 }
15868 break;
15869
15870 default:
15871 break;
15872 }
15873
15874 /* Get the vector mask for the given relational operations. */
15875 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
15876
15877 if (!mask)
15878 return 0;
15879
15880 if (mask_mode != dest_mode)
15881 mask = simplify_gen_subreg (dest_mode, mask, mask_mode, 0);
15882
15883 if (invert_move)
15884 std::swap (op_true, op_false);
15885
15886 /* Optimize vec1 == vec2, to know the mask generates -1/0. */
15887 if (GET_MODE_CLASS (dest_mode) == MODE_VECTOR_INT
15888 && (GET_CODE (op_true) == CONST_VECTOR
15889 || GET_CODE (op_false) == CONST_VECTOR))
15890 {
15891 rtx constant_0 = CONST0_RTX (dest_mode);
15892 rtx constant_m1 = CONSTM1_RTX (dest_mode);
15893
15894 if (op_true == constant_m1 && op_false == constant_0)
15895 {
15896 emit_move_insn (dest, mask);
15897 return 1;
15898 }
15899
15900 else if (op_true == constant_0 && op_false == constant_m1)
15901 {
15902 emit_insn (gen_rtx_SET (dest, gen_rtx_NOT (dest_mode, mask)));
15903 return 1;
15904 }
15905
15906 /* If we can't use the vector comparison directly, perhaps we can use
15907 the mask for the true or false fields, instead of loading up a
15908 constant. */
15909 if (op_true == constant_m1)
15910 op_true = mask;
15911
15912 if (op_false == constant_0)
15913 op_false = mask;
15914 }
15915
15916 if (!REG_P (op_true) && !SUBREG_P (op_true))
15917 op_true = force_reg (dest_mode, op_true);
15918
15919 if (!REG_P (op_false) && !SUBREG_P (op_false))
15920 op_false = force_reg (dest_mode, op_false);
15921
15922 rtx tmp = gen_rtx_IOR (dest_mode,
15923 gen_rtx_AND (dest_mode, gen_rtx_NOT (dest_mode, mask),
15924 op_false),
15925 gen_rtx_AND (dest_mode, mask, op_true));
15926 emit_insn (gen_rtx_SET (dest, tmp));
15927 return 1;
15928 }
15929
15930 /* Possibly emit the xsmaxc{dp,qp} and xsminc{dp,qp} instructions to emit a
15931 maximum or minimum with "C" semantics.
15932
15933 Unless you use -ffast-math, you can't use these instructions to replace
15934 conditions that implicitly reverse the condition because the comparison
15935 might generate a NaN or signed zer0.
15936
15937 I.e. the following can be replaced all of the time
15938 ret = (op1 > op2) ? op1 : op2 ; generate xsmaxcdp
15939 ret = (op1 >= op2) ? op1 : op2 ; generate xsmaxcdp
15940 ret = (op1 < op2) ? op1 : op2; ; generate xsmincdp
15941 ret = (op1 <= op2) ? op1 : op2; ; generate xsmincdp
15942
15943 The following can be replaced only if -ffast-math is used:
15944 ret = (op1 < op2) ? op2 : op1 ; generate xsmaxcdp
15945 ret = (op1 <= op2) ? op2 : op1 ; generate xsmaxcdp
15946 ret = (op1 > op2) ? op2 : op1; ; generate xsmincdp
15947 ret = (op1 >= op2) ? op2 : op1; ; generate xsmincdp
15948
15949 Move TRUE_COND to DEST if OP of the operands of the last comparison is
15950 nonzero/true, FALSE_COND if it is zero/false.
15951
15952 Return false if we can't generate the appropriate minimum or maximum, and
15953 true if we can did the minimum or maximum. */
15954
15955 static bool
15956 rs6000_maybe_emit_maxc_minc (rtx dest, rtx op, rtx true_cond, rtx false_cond)
15957 {
15958 enum rtx_code code = GET_CODE (op);
15959 rtx op0 = XEXP (op, 0);
15960 rtx op1 = XEXP (op, 1);
15961 machine_mode compare_mode = GET_MODE (op0);
15962 machine_mode result_mode = GET_MODE (dest);
15963 bool max_p = false;
15964
15965 if (result_mode != compare_mode)
15966 return false;
15967
15968 if (code == GE || code == GT)
15969 max_p = true;
15970 else if (code == LE || code == LT)
15971 max_p = false;
15972 else
15973 return false;
15974
15975 if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond))
15976 ;
15977
15978 /* Only when NaNs and signed-zeros are not in effect, smax could be
15979 used for `op0 < op1 ? op1 : op0`, and smin could be used for
15980 `op0 > op1 ? op1 : op0`. */
15981 else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond)
15982 && !HONOR_NANS (compare_mode) && !HONOR_SIGNED_ZEROS (compare_mode))
15983 max_p = !max_p;
15984
15985 else
15986 return false;
15987
15988 rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1);
15989 return true;
15990 }
15991
15992 /* Possibly emit a floating point conditional move by generating a compare that
15993 sets a mask instruction and a XXSEL select instruction.
15994
15995 Move TRUE_COND to DEST if OP of the operands of the last comparison is
15996 nonzero/true, FALSE_COND if it is zero/false.
15997
15998 Return false if the operation cannot be generated, and true if we could
15999 generate the instruction. */
16000
16001 static bool
16002 rs6000_maybe_emit_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
16003 {
16004 enum rtx_code code = GET_CODE (op);
16005 rtx op0 = XEXP (op, 0);
16006 rtx op1 = XEXP (op, 1);
16007 machine_mode compare_mode = GET_MODE (op0);
16008 machine_mode result_mode = GET_MODE (dest);
16009 rtx compare_rtx;
16010 rtx cmove_rtx;
16011 rtx clobber_rtx;
16012
16013 if (!can_create_pseudo_p ())
16014 return 0;
16015
16016 /* We allow the comparison to be either SFmode/DFmode and the true/false
16017 condition to be either SFmode/DFmode. I.e. we allow:
16018
16019 float a, b;
16020 double c, d, r;
16021
16022 r = (a == b) ? c : d;
16023
16024 and:
16025
16026 double a, b;
16027 float c, d, r;
16028
16029 r = (a == b) ? c : d;
16030
16031 but we don't allow intermixing the IEEE 128-bit floating point types with
16032 the 32/64-bit scalar types. */
16033
16034 if (!(compare_mode == result_mode
16035 || (compare_mode == SFmode && result_mode == DFmode)
16036 || (compare_mode == DFmode && result_mode == SFmode)))
16037 return false;
16038
16039 switch (code)
16040 {
16041 case EQ:
16042 case GE:
16043 case GT:
16044 break;
16045
16046 case NE:
16047 case LT:
16048 case LE:
16049 code = swap_condition (code);
16050 std::swap (op0, op1);
16051 break;
16052
16053 default:
16054 return false;
16055 }
16056
16057 /* Generate: [(parallel [(set (dest)
16058 (if_then_else (op (cmp1) (cmp2))
16059 (true)
16060 (false)))
16061 (clobber (scratch))])]. */
16062
16063 compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1);
16064 cmove_rtx = gen_rtx_SET (dest,
16065 gen_rtx_IF_THEN_ELSE (result_mode,
16066 compare_rtx,
16067 true_cond,
16068 false_cond));
16069
16070 clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode));
16071 emit_insn (gen_rtx_PARALLEL (VOIDmode,
16072 gen_rtvec (2, cmove_rtx, clobber_rtx)));
16073
16074 return true;
16075 }
16076
16077 /* Helper function to return true if the target has instructions to do a
16078 compare and set mask instruction that can be used with XXSEL to implement a
16079 conditional move. It is also assumed that such a target also supports the
16080 "C" minimum and maximum instructions. */
16081
16082 static bool
16083 have_compare_and_set_mask (machine_mode mode)
16084 {
16085 switch (mode)
16086 {
16087 case E_SFmode:
16088 case E_DFmode:
16089 return TARGET_P9_MINMAX;
16090
16091 case E_KFmode:
16092 case E_TFmode:
16093 return TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode);
16094
16095 default:
16096 break;
16097 }
16098
16099 return false;
16100 }
16101
16102 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
16103 operands of the last comparison is nonzero/true, FALSE_COND if it
16104 is zero/false. Return 0 if the hardware has no such operation. */
16105
16106 bool
16107 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
16108 {
16109 enum rtx_code code = GET_CODE (op);
16110 rtx op0 = XEXP (op, 0);
16111 rtx op1 = XEXP (op, 1);
16112 machine_mode compare_mode = GET_MODE (op0);
16113 machine_mode result_mode = GET_MODE (dest);
16114 rtx temp;
16115 bool is_against_zero;
16116
16117 /* These modes should always match. */
16118 if (GET_MODE (op1) != compare_mode
16119 /* In the isel case however, we can use a compare immediate, so
16120 op1 may be a small constant. */
16121 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
16122 return false;
16123 if (GET_MODE (true_cond) != result_mode)
16124 return false;
16125 if (GET_MODE (false_cond) != result_mode)
16126 return false;
16127
16128 /* See if we can use the "C" minimum, "C" maximum, and compare and set mask
16129 instructions. */
16130 if (have_compare_and_set_mask (compare_mode)
16131 && have_compare_and_set_mask (result_mode))
16132 {
16133 if (rs6000_maybe_emit_maxc_minc (dest, op, true_cond, false_cond))
16134 return true;
16135
16136 if (rs6000_maybe_emit_fp_cmove (dest, op, true_cond, false_cond))
16137 return true;
16138 }
16139
16140 /* Don't allow using floating point comparisons for integer results for
16141 now. */
16142 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
16143 return false;
16144
16145 /* First, work out if the hardware can do this at all, or
16146 if it's too slow.... */
16147 if (!FLOAT_MODE_P (compare_mode))
16148 {
16149 if (TARGET_ISEL)
16150 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
16151 return false;
16152 }
16153
16154 is_against_zero = op1 == CONST0_RTX (compare_mode);
16155
16156 /* A floating-point subtract might overflow, underflow, or produce
16157 an inexact result, thus changing the floating-point flags, so it
16158 can't be generated if we care about that. It's safe if one side
16159 of the construct is zero, since then no subtract will be
16160 generated. */
16161 if (SCALAR_FLOAT_MODE_P (compare_mode)
16162 && flag_trapping_math && ! is_against_zero)
16163 return false;
16164
16165 /* Eliminate half of the comparisons by switching operands, this
16166 makes the remaining code simpler. */
16167 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
16168 || code == LTGT || code == LT || code == UNLE)
16169 {
16170 code = reverse_condition_maybe_unordered (code);
16171 temp = true_cond;
16172 true_cond = false_cond;
16173 false_cond = temp;
16174 }
16175
16176 /* UNEQ and LTGT take four instructions for a comparison with zero,
16177 it'll probably be faster to use a branch here too. */
16178 if (code == UNEQ && HONOR_NANS (compare_mode))
16179 return false;
16180
16181 /* We're going to try to implement comparisons by performing
16182 a subtract, then comparing against zero. Unfortunately,
16183 Inf - Inf is NaN which is not zero, and so if we don't
16184 know that the operand is finite and the comparison
16185 would treat EQ different to UNORDERED, we can't do it. */
16186 if (HONOR_INFINITIES (compare_mode)
16187 && code != GT && code != UNGE
16188 && (!CONST_DOUBLE_P (op1)
16189 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1)))
16190 /* Constructs of the form (a OP b ? a : b) are safe. */
16191 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
16192 || (! rtx_equal_p (op0, true_cond)
16193 && ! rtx_equal_p (op1, true_cond))))
16194 return false;
16195
16196 /* At this point we know we can use fsel. */
16197
16198 /* Don't allow compare_mode other than SFmode or DFmode, for others there
16199 is no fsel instruction. */
16200 if (compare_mode != SFmode && compare_mode != DFmode)
16201 return false;
16202
16203 /* Reduce the comparison to a comparison against zero. */
16204 if (! is_against_zero)
16205 {
16206 temp = gen_reg_rtx (compare_mode);
16207 emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
16208 op0 = temp;
16209 op1 = CONST0_RTX (compare_mode);
16210 }
16211
16212 /* If we don't care about NaNs we can reduce some of the comparisons
16213 down to faster ones. */
16214 if (! HONOR_NANS (compare_mode))
16215 switch (code)
16216 {
16217 case GT:
16218 code = LE;
16219 temp = true_cond;
16220 true_cond = false_cond;
16221 false_cond = temp;
16222 break;
16223 case UNGE:
16224 code = GE;
16225 break;
16226 case UNEQ:
16227 code = EQ;
16228 break;
16229 default:
16230 break;
16231 }
16232
16233 /* Now, reduce everything down to a GE. */
16234 switch (code)
16235 {
16236 case GE:
16237 break;
16238
16239 case LE:
16240 temp = gen_reg_rtx (compare_mode);
16241 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
16242 op0 = temp;
16243 break;
16244
16245 case ORDERED:
16246 temp = gen_reg_rtx (compare_mode);
16247 emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
16248 op0 = temp;
16249 break;
16250
16251 case EQ:
16252 temp = gen_reg_rtx (compare_mode);
16253 emit_insn (gen_rtx_SET (temp,
16254 gen_rtx_NEG (compare_mode,
16255 gen_rtx_ABS (compare_mode, op0))));
16256 op0 = temp;
16257 break;
16258
16259 case UNGE:
16260 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
16261 temp = gen_reg_rtx (result_mode);
16262 emit_insn (gen_rtx_SET (temp,
16263 gen_rtx_IF_THEN_ELSE (result_mode,
16264 gen_rtx_GE (VOIDmode,
16265 op0, op1),
16266 true_cond, false_cond)));
16267 false_cond = true_cond;
16268 true_cond = temp;
16269
16270 temp = gen_reg_rtx (compare_mode);
16271 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
16272 op0 = temp;
16273 break;
16274
16275 case GT:
16276 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
16277 temp = gen_reg_rtx (result_mode);
16278 emit_insn (gen_rtx_SET (temp,
16279 gen_rtx_IF_THEN_ELSE (result_mode,
16280 gen_rtx_GE (VOIDmode,
16281 op0, op1),
16282 true_cond, false_cond)));
16283 true_cond = false_cond;
16284 false_cond = temp;
16285
16286 temp = gen_reg_rtx (compare_mode);
16287 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
16288 op0 = temp;
16289 break;
16290
16291 default:
16292 gcc_unreachable ();
16293 }
16294
16295 emit_insn (gen_rtx_SET (dest,
16296 gen_rtx_IF_THEN_ELSE (result_mode,
16297 gen_rtx_GE (VOIDmode,
16298 op0, op1),
16299 true_cond, false_cond)));
16300 return true;
16301 }
16302
16303 /* Same as above, but for ints (isel). */
16304
16305 bool
16306 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
16307 {
16308 rtx condition_rtx, cr;
16309 machine_mode mode = GET_MODE (dest);
16310 enum rtx_code cond_code;
16311 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
16312 bool signedp;
16313
16314 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
16315 return false;
16316
16317 /* We still have to do the compare, because isel doesn't do a
16318 compare, it just looks at the CRx bits set by a previous compare
16319 instruction. */
16320 condition_rtx = rs6000_generate_compare (op, mode);
16321 cond_code = GET_CODE (condition_rtx);
16322 cr = XEXP (condition_rtx, 0);
16323 signedp = GET_MODE (cr) == CCmode;
16324
16325 isel_func = (mode == SImode
16326 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
16327 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
16328
16329 switch (cond_code)
16330 {
16331 case LT: case GT: case LTU: case GTU: case EQ:
16332 /* isel handles these directly. */
16333 break;
16334
16335 default:
16336 /* We need to swap the sense of the comparison. */
16337 {
16338 std::swap (false_cond, true_cond);
16339 PUT_CODE (condition_rtx, reverse_condition (cond_code));
16340 }
16341 break;
16342 }
16343
16344 false_cond = force_reg (mode, false_cond);
16345 if (true_cond != const0_rtx)
16346 true_cond = force_reg (mode, true_cond);
16347
16348 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
16349
16350 return true;
16351 }
16352
16353 void
16354 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
16355 {
16356 machine_mode mode = GET_MODE (op0);
16357 enum rtx_code c;
16358 rtx target;
16359
16360 /* VSX/altivec have direct min/max insns. */
16361 if ((code == SMAX || code == SMIN)
16362 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
16363 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))
16364 || (TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode))))
16365 {
16366 emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
16367 return;
16368 }
16369
16370 if (code == SMAX || code == SMIN)
16371 c = GE;
16372 else
16373 c = GEU;
16374
16375 if (code == SMAX || code == UMAX)
16376 target = emit_conditional_move (dest, { c, op0, op1, mode },
16377 op0, op1, mode, 0);
16378 else
16379 target = emit_conditional_move (dest, { c, op0, op1, mode },
16380 op1, op0, mode, 0);
16381 gcc_assert (target);
16382 if (target != dest)
16383 emit_move_insn (dest, target);
16384 }
16385
16386 /* A subroutine of the atomic operation splitters. Jump to LABEL if
16387 COND is true. Mark the jump as unlikely to be taken. */
16388
16389 static void
16390 emit_unlikely_jump (rtx cond, rtx label)
16391 {
16392 rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
16393 rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
16394 add_reg_br_prob_note (insn, profile_probability::very_unlikely ());
16395 }
16396
16397 /* A subroutine of the atomic operation splitters. Emit a load-locked
16398 instruction in MODE. For QI/HImode, possibly use a pattern than includes
16399 the zero_extend operation. */
16400
16401 static void
16402 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
16403 {
16404 rtx (*fn) (rtx, rtx) = NULL;
16405
16406 switch (mode)
16407 {
16408 case E_QImode:
16409 fn = gen_load_lockedqi;
16410 break;
16411 case E_HImode:
16412 fn = gen_load_lockedhi;
16413 break;
16414 case E_SImode:
16415 if (GET_MODE (mem) == QImode)
16416 fn = gen_load_lockedqi_si;
16417 else if (GET_MODE (mem) == HImode)
16418 fn = gen_load_lockedhi_si;
16419 else
16420 fn = gen_load_lockedsi;
16421 break;
16422 case E_DImode:
16423 fn = gen_load_lockeddi;
16424 break;
16425 case E_TImode:
16426 fn = gen_load_lockedti;
16427 break;
16428 default:
16429 gcc_unreachable ();
16430 }
16431 emit_insn (fn (reg, mem));
16432 }
16433
16434 /* A subroutine of the atomic operation splitters. Emit a store-conditional
16435 instruction in MODE. */
16436
16437 static void
16438 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
16439 {
16440 rtx (*fn) (rtx, rtx, rtx) = NULL;
16441
16442 switch (mode)
16443 {
16444 case E_QImode:
16445 fn = gen_store_conditionalqi;
16446 break;
16447 case E_HImode:
16448 fn = gen_store_conditionalhi;
16449 break;
16450 case E_SImode:
16451 fn = gen_store_conditionalsi;
16452 break;
16453 case E_DImode:
16454 fn = gen_store_conditionaldi;
16455 break;
16456 case E_TImode:
16457 fn = gen_store_conditionalti;
16458 break;
16459 default:
16460 gcc_unreachable ();
16461 }
16462
16463 /* Emit sync before stwcx. to address PPC405 Erratum. */
16464 if (PPC405_ERRATUM77)
16465 emit_insn (gen_hwsync ());
16466
16467 emit_insn (fn (res, mem, val));
16468 }
16469
16470 /* Expand barriers before and after a load_locked/store_cond sequence. */
16471
16472 static rtx
16473 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
16474 {
16475 rtx addr = XEXP (mem, 0);
16476
16477 if (!legitimate_indirect_address_p (addr, reload_completed)
16478 && !legitimate_indexed_address_p (addr, reload_completed))
16479 {
16480 addr = force_reg (Pmode, addr);
16481 mem = replace_equiv_address_nv (mem, addr);
16482 }
16483
16484 switch (model)
16485 {
16486 case MEMMODEL_RELAXED:
16487 case MEMMODEL_CONSUME:
16488 case MEMMODEL_ACQUIRE:
16489 break;
16490 case MEMMODEL_RELEASE:
16491 case MEMMODEL_ACQ_REL:
16492 emit_insn (gen_lwsync ());
16493 break;
16494 case MEMMODEL_SEQ_CST:
16495 emit_insn (gen_hwsync ());
16496 break;
16497 default:
16498 gcc_unreachable ();
16499 }
16500 return mem;
16501 }
16502
16503 static void
16504 rs6000_post_atomic_barrier (enum memmodel model)
16505 {
16506 switch (model)
16507 {
16508 case MEMMODEL_RELAXED:
16509 case MEMMODEL_CONSUME:
16510 case MEMMODEL_RELEASE:
16511 break;
16512 case MEMMODEL_ACQUIRE:
16513 case MEMMODEL_ACQ_REL:
16514 case MEMMODEL_SEQ_CST:
16515 emit_insn (gen_isync ());
16516 break;
16517 default:
16518 gcc_unreachable ();
16519 }
16520 }
16521
16522 /* A subroutine of the various atomic expanders. For sub-word operations,
16523 we must adjust things to operate on SImode. Given the original MEM,
16524 return a new aligned memory. Also build and return the quantities by
16525 which to shift and mask. */
16526
16527 static rtx
16528 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
16529 {
16530 rtx addr, align, shift, mask, mem;
16531 HOST_WIDE_INT shift_mask;
16532 machine_mode mode = GET_MODE (orig_mem);
16533
16534 /* For smaller modes, we have to implement this via SImode. */
16535 shift_mask = (mode == QImode ? 0x18 : 0x10);
16536
16537 addr = XEXP (orig_mem, 0);
16538 addr = force_reg (GET_MODE (addr), addr);
16539
16540 /* Aligned memory containing subword. Generate a new memory. We
16541 do not want any of the existing MEM_ATTR data, as we're now
16542 accessing memory outside the original object. */
16543 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
16544 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16545 mem = gen_rtx_MEM (SImode, align);
16546 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
16547 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
16548 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
16549
16550 /* Shift amount for subword relative to aligned word. */
16551 shift = gen_reg_rtx (SImode);
16552 addr = gen_lowpart (SImode, addr);
16553 rtx tmp = gen_reg_rtx (SImode);
16554 emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
16555 emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
16556 if (BYTES_BIG_ENDIAN)
16557 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
16558 shift, 1, OPTAB_LIB_WIDEN);
16559 *pshift = shift;
16560
16561 /* Mask for insertion. */
16562 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
16563 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
16564 *pmask = mask;
16565
16566 return mem;
16567 }
16568
16569 /* A subroutine of the various atomic expanders. For sub-word operands,
16570 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
16571
16572 static rtx
16573 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
16574 {
16575 rtx x;
16576
16577 x = gen_reg_rtx (SImode);
16578 emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
16579 gen_rtx_NOT (SImode, mask),
16580 oldval)));
16581
16582 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
16583
16584 return x;
16585 }
16586
16587 /* A subroutine of the various atomic expanders. For sub-word operands,
16588 extract WIDE to NARROW via SHIFT. */
16589
16590 static void
16591 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
16592 {
16593 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
16594 wide, 1, OPTAB_LIB_WIDEN);
16595 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
16596 }
16597
16598 /* Expand an atomic compare and swap operation. */
16599
16600 void
16601 rs6000_expand_atomic_compare_and_swap (rtx operands[])
16602 {
16603 rtx boolval, retval, mem, oldval, newval, cond;
16604 rtx label1, label2, x, mask, shift;
16605 machine_mode mode, orig_mode;
16606 enum memmodel mod_s, mod_f;
16607 bool is_weak;
16608
16609 boolval = operands[0];
16610 retval = operands[1];
16611 mem = operands[2];
16612 oldval = operands[3];
16613 newval = operands[4];
16614 is_weak = (INTVAL (operands[5]) != 0);
16615 mod_s = memmodel_base (INTVAL (operands[6]));
16616 mod_f = memmodel_base (INTVAL (operands[7]));
16617 orig_mode = mode = GET_MODE (mem);
16618
16619 mask = shift = NULL_RTX;
16620 if (mode == QImode || mode == HImode)
16621 {
16622 /* Before power8, we didn't have access to lbarx/lharx, so generate a
16623 lwarx and shift/mask operations. With power8, we need to do the
16624 comparison in SImode, but the store is still done in QI/HImode. */
16625 oldval = convert_modes (SImode, mode, oldval, 1);
16626
16627 if (!TARGET_SYNC_HI_QI)
16628 {
16629 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
16630
16631 /* Shift and mask OLDVAL into position with the word. */
16632 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
16633 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16634
16635 /* Shift and mask NEWVAL into position within the word. */
16636 newval = convert_modes (SImode, mode, newval, 1);
16637 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
16638 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16639 }
16640
16641 /* Prepare to adjust the return value. */
16642 retval = gen_reg_rtx (SImode);
16643 mode = SImode;
16644 }
16645 else if (reg_overlap_mentioned_p (retval, oldval))
16646 oldval = copy_to_reg (oldval);
16647
16648 if (mode != TImode && !reg_or_short_operand (oldval, mode))
16649 oldval = copy_to_mode_reg (mode, oldval);
16650
16651 if (reg_overlap_mentioned_p (retval, newval))
16652 newval = copy_to_reg (newval);
16653
16654 mem = rs6000_pre_atomic_barrier (mem, mod_s);
16655
16656 label1 = NULL_RTX;
16657 if (!is_weak)
16658 {
16659 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
16660 emit_label (XEXP (label1, 0));
16661 }
16662 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
16663
16664 emit_load_locked (mode, retval, mem);
16665
16666 x = retval;
16667 if (mask)
16668 x = expand_simple_binop (SImode, AND, retval, mask,
16669 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16670
16671 cond = gen_reg_rtx (CCmode);
16672 /* If we have TImode, synthesize a comparison. */
16673 if (mode != TImode)
16674 x = gen_rtx_COMPARE (CCmode, x, oldval);
16675 else
16676 {
16677 rtx xor1_result = gen_reg_rtx (DImode);
16678 rtx xor2_result = gen_reg_rtx (DImode);
16679 rtx or_result = gen_reg_rtx (DImode);
16680 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
16681 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
16682 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
16683 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
16684
16685 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
16686 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
16687 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
16688 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
16689 }
16690
16691 emit_insn (gen_rtx_SET (cond, x));
16692
16693 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
16694 emit_unlikely_jump (x, label2);
16695
16696 x = newval;
16697 if (mask)
16698 x = rs6000_mask_atomic_subword (retval, newval, mask);
16699
16700 emit_store_conditional (orig_mode, cond, mem, x);
16701
16702 if (!is_weak)
16703 {
16704 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
16705 emit_unlikely_jump (x, label1);
16706 }
16707
16708 if (!is_mm_relaxed (mod_f))
16709 emit_label (XEXP (label2, 0));
16710
16711 rs6000_post_atomic_barrier (mod_s);
16712
16713 if (is_mm_relaxed (mod_f))
16714 emit_label (XEXP (label2, 0));
16715
16716 if (shift)
16717 rs6000_finish_atomic_subword (operands[1], retval, shift);
16718 else if (mode != GET_MODE (operands[1]))
16719 convert_move (operands[1], retval, 1);
16720
16721 /* In all cases, CR0 contains EQ on success, and NE on failure. */
16722 x = gen_rtx_EQ (SImode, cond, const0_rtx);
16723 emit_insn (gen_rtx_SET (boolval, x));
16724 }
16725
16726 /* Expand an atomic exchange operation. */
16727
16728 void
16729 rs6000_expand_atomic_exchange (rtx operands[])
16730 {
16731 rtx retval, mem, val, cond;
16732 machine_mode mode;
16733 enum memmodel model;
16734 rtx label, x, mask, shift;
16735
16736 retval = operands[0];
16737 mem = operands[1];
16738 val = operands[2];
16739 model = memmodel_base (INTVAL (operands[3]));
16740 mode = GET_MODE (mem);
16741
16742 mask = shift = NULL_RTX;
16743 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
16744 {
16745 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
16746
16747 /* Shift and mask VAL into position with the word. */
16748 val = convert_modes (SImode, mode, val, 1);
16749 val = expand_simple_binop (SImode, ASHIFT, val, shift,
16750 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16751
16752 /* Prepare to adjust the return value. */
16753 retval = gen_reg_rtx (SImode);
16754 mode = SImode;
16755 }
16756
16757 mem = rs6000_pre_atomic_barrier (mem, model);
16758
16759 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
16760 emit_label (XEXP (label, 0));
16761
16762 emit_load_locked (mode, retval, mem);
16763
16764 x = val;
16765 if (mask)
16766 x = rs6000_mask_atomic_subword (retval, val, mask);
16767
16768 cond = gen_reg_rtx (CCmode);
16769 emit_store_conditional (mode, cond, mem, x);
16770
16771 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
16772 emit_unlikely_jump (x, label);
16773
16774 rs6000_post_atomic_barrier (model);
16775
16776 if (shift)
16777 rs6000_finish_atomic_subword (operands[0], retval, shift);
16778 }
16779
16780 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
16781 to perform. MEM is the memory on which to operate. VAL is the second
16782 operand of the binary operator. BEFORE and AFTER are optional locations to
16783 return the value of MEM either before of after the operation. MODEL_RTX
16784 is a CONST_INT containing the memory model to use. */
16785
16786 void
16787 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
16788 rtx orig_before, rtx orig_after, rtx model_rtx)
16789 {
16790 enum memmodel model = memmodel_base (INTVAL (model_rtx));
16791 machine_mode mode = GET_MODE (mem);
16792 machine_mode store_mode = mode;
16793 rtx label, x, cond, mask, shift;
16794 rtx before = orig_before, after = orig_after;
16795
16796 mask = shift = NULL_RTX;
16797 /* On power8, we want to use SImode for the operation. On previous systems,
16798 use the operation in a subword and shift/mask to get the proper byte or
16799 halfword. */
16800 if (mode == QImode || mode == HImode)
16801 {
16802 if (TARGET_SYNC_HI_QI)
16803 {
16804 val = convert_modes (SImode, mode, val, 1);
16805
16806 /* Prepare to adjust the return value. */
16807 before = gen_reg_rtx (SImode);
16808 if (after)
16809 after = gen_reg_rtx (SImode);
16810 mode = SImode;
16811 }
16812 else
16813 {
16814 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
16815
16816 /* Shift and mask VAL into position with the word. */
16817 val = convert_modes (SImode, mode, val, 1);
16818 val = expand_simple_binop (SImode, ASHIFT, val, shift,
16819 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16820
16821 switch (code)
16822 {
16823 case IOR:
16824 case XOR:
16825 /* We've already zero-extended VAL. That is sufficient to
16826 make certain that it does not affect other bits. */
16827 mask = NULL;
16828 break;
16829
16830 case AND:
16831 /* If we make certain that all of the other bits in VAL are
16832 set, that will be sufficient to not affect other bits. */
16833 x = gen_rtx_NOT (SImode, mask);
16834 x = gen_rtx_IOR (SImode, x, val);
16835 emit_insn (gen_rtx_SET (val, x));
16836 mask = NULL;
16837 break;
16838
16839 case NOT:
16840 case PLUS:
16841 case MINUS:
16842 /* These will all affect bits outside the field and need
16843 adjustment via MASK within the loop. */
16844 break;
16845
16846 default:
16847 gcc_unreachable ();
16848 }
16849
16850 /* Prepare to adjust the return value. */
16851 before = gen_reg_rtx (SImode);
16852 if (after)
16853 after = gen_reg_rtx (SImode);
16854 store_mode = mode = SImode;
16855 }
16856 }
16857
16858 mem = rs6000_pre_atomic_barrier (mem, model);
16859
16860 label = gen_label_rtx ();
16861 emit_label (label);
16862 label = gen_rtx_LABEL_REF (VOIDmode, label);
16863
16864 if (before == NULL_RTX)
16865 before = gen_reg_rtx (mode);
16866
16867 emit_load_locked (mode, before, mem);
16868
16869 if (code == NOT)
16870 {
16871 x = expand_simple_binop (mode, AND, before, val,
16872 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16873 after = expand_simple_unop (mode, NOT, x, after, 1);
16874 }
16875 else
16876 {
16877 after = expand_simple_binop (mode, code, before, val,
16878 after, 1, OPTAB_LIB_WIDEN);
16879 }
16880
16881 x = after;
16882 if (mask)
16883 {
16884 x = expand_simple_binop (SImode, AND, after, mask,
16885 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16886 x = rs6000_mask_atomic_subword (before, x, mask);
16887 }
16888 else if (store_mode != mode)
16889 x = convert_modes (store_mode, mode, x, 1);
16890
16891 cond = gen_reg_rtx (CCmode);
16892 emit_store_conditional (store_mode, cond, mem, x);
16893
16894 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
16895 emit_unlikely_jump (x, label);
16896
16897 rs6000_post_atomic_barrier (model);
16898
16899 if (shift)
16900 {
16901 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
16902 then do the calcuations in a SImode register. */
16903 if (orig_before)
16904 rs6000_finish_atomic_subword (orig_before, before, shift);
16905 if (orig_after)
16906 rs6000_finish_atomic_subword (orig_after, after, shift);
16907 }
16908 else if (store_mode != mode)
16909 {
16910 /* QImode/HImode on machines with lbarx/lharx where we do the native
16911 operation and then do the calcuations in a SImode register. */
16912 if (orig_before)
16913 convert_move (orig_before, before, 1);
16914 if (orig_after)
16915 convert_move (orig_after, after, 1);
16916 }
16917 else if (orig_after && after != orig_after)
16918 emit_move_insn (orig_after, after);
16919 }
16920
16921 static GTY(()) alias_set_type TOC_alias_set = -1;
16922
16923 alias_set_type
16924 get_TOC_alias_set (void)
16925 {
16926 if (TOC_alias_set == -1)
16927 TOC_alias_set = new_alias_set ();
16928 return TOC_alias_set;
16929 }
16930
16931 /* The mode the ABI uses for a word. This is not the same as word_mode
16932 for -m32 -mpowerpc64. This is used to implement various target hooks. */
16933
16934 static scalar_int_mode
16935 rs6000_abi_word_mode (void)
16936 {
16937 return TARGET_32BIT ? SImode : DImode;
16938 }
16939
16940 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
16941 static char *
16942 rs6000_offload_options (void)
16943 {
16944 if (TARGET_64BIT)
16945 return xstrdup ("-foffload-abi=lp64");
16946 else
16947 return xstrdup ("-foffload-abi=ilp32");
16948 }
16949
16950 \f
16951 /* A quick summary of the various types of 'constant-pool tables'
16952 under PowerPC:
16953
16954 Target Flags Name One table per
16955 AIX (none) AIX TOC object file
16956 AIX -mfull-toc AIX TOC object file
16957 AIX -mminimal-toc AIX minimal TOC translation unit
16958 SVR4/EABI (none) SVR4 SDATA object file
16959 SVR4/EABI -fpic SVR4 pic object file
16960 SVR4/EABI -fPIC SVR4 PIC translation unit
16961 SVR4/EABI -mrelocatable EABI TOC function
16962 SVR4/EABI -maix AIX TOC object file
16963 SVR4/EABI -maix -mminimal-toc
16964 AIX minimal TOC translation unit
16965
16966 Name Reg. Set by entries contains:
16967 made by addrs? fp? sum?
16968
16969 AIX TOC 2 crt0 as Y option option
16970 AIX minimal TOC 30 prolog gcc Y Y option
16971 SVR4 SDATA 13 crt0 gcc N Y N
16972 SVR4 pic 30 prolog ld Y not yet N
16973 SVR4 PIC 30 prolog gcc Y option option
16974 EABI TOC 30 prolog gcc Y option option
16975
16976 */
16977
16978 /* Hash functions for the hash table. */
16979
16980 static unsigned
16981 rs6000_hash_constant (rtx k)
16982 {
16983 enum rtx_code code = GET_CODE (k);
16984 machine_mode mode = GET_MODE (k);
16985 unsigned result = (code << 3) ^ mode;
16986 const char *format;
16987 int flen, fidx;
16988
16989 format = GET_RTX_FORMAT (code);
16990 flen = strlen (format);
16991 fidx = 0;
16992
16993 switch (code)
16994 {
16995 case LABEL_REF:
16996 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
16997
16998 case CONST_WIDE_INT:
16999 {
17000 int i;
17001 flen = CONST_WIDE_INT_NUNITS (k);
17002 for (i = 0; i < flen; i++)
17003 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
17004 return result;
17005 }
17006
17007 case CONST_DOUBLE:
17008 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
17009
17010 case CODE_LABEL:
17011 fidx = 3;
17012 break;
17013
17014 default:
17015 break;
17016 }
17017
17018 for (; fidx < flen; fidx++)
17019 switch (format[fidx])
17020 {
17021 case 's':
17022 {
17023 unsigned i, len;
17024 const char *str = XSTR (k, fidx);
17025 len = strlen (str);
17026 result = result * 613 + len;
17027 for (i = 0; i < len; i++)
17028 result = result * 613 + (unsigned) str[i];
17029 break;
17030 }
17031 case 'u':
17032 case 'e':
17033 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
17034 break;
17035 case 'i':
17036 case 'n':
17037 result = result * 613 + (unsigned) XINT (k, fidx);
17038 break;
17039 case 'w':
17040 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
17041 result = result * 613 + (unsigned) XWINT (k, fidx);
17042 else
17043 {
17044 size_t i;
17045 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
17046 result = result * 613 + (unsigned) (XWINT (k, fidx)
17047 >> CHAR_BIT * i);
17048 }
17049 break;
17050 case '0':
17051 break;
17052 default:
17053 gcc_unreachable ();
17054 }
17055
17056 return result;
17057 }
17058
17059 hashval_t
17060 toc_hasher::hash (toc_hash_struct *thc)
17061 {
17062 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
17063 }
17064
17065 /* Compare H1 and H2 for equivalence. */
17066
17067 bool
17068 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
17069 {
17070 rtx r1 = h1->key;
17071 rtx r2 = h2->key;
17072
17073 if (h1->key_mode != h2->key_mode)
17074 return 0;
17075
17076 return rtx_equal_p (r1, r2);
17077 }
17078
17079 /* These are the names given by the C++ front-end to vtables, and
17080 vtable-like objects. Ideally, this logic should not be here;
17081 instead, there should be some programmatic way of inquiring as
17082 to whether or not an object is a vtable. */
17083
17084 #define VTABLE_NAME_P(NAME) \
17085 (startswith (name, "_vt.") \
17086 || startswith (name, "_ZTV") \
17087 || startswith (name, "_ZTT") \
17088 || startswith (name, "_ZTI") \
17089 || startswith (name, "_ZTC"))
17090
17091 #ifdef NO_DOLLAR_IN_LABEL
17092 /* Return a GGC-allocated character string translating dollar signs in
17093 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
17094
17095 const char *
17096 rs6000_xcoff_strip_dollar (const char *name)
17097 {
17098 char *strip, *p;
17099 const char *q;
17100 size_t len;
17101
17102 q = (const char *) strchr (name, '$');
17103
17104 if (q == 0 || q == name)
17105 return name;
17106
17107 len = strlen (name);
17108 strip = XALLOCAVEC (char, len + 1);
17109 strcpy (strip, name);
17110 p = strip + (q - name);
17111 while (p)
17112 {
17113 *p = '_';
17114 p = strchr (p + 1, '$');
17115 }
17116
17117 return ggc_alloc_string (strip, len);
17118 }
17119 #endif
17120
17121 void
17122 rs6000_output_symbol_ref (FILE *file, rtx x)
17123 {
17124 const char *name = XSTR (x, 0);
17125
17126 /* Currently C++ toc references to vtables can be emitted before it
17127 is decided whether the vtable is public or private. If this is
17128 the case, then the linker will eventually complain that there is
17129 a reference to an unknown section. Thus, for vtables only,
17130 we emit the TOC reference to reference the identifier and not the
17131 symbol. */
17132 if (VTABLE_NAME_P (name))
17133 {
17134 RS6000_OUTPUT_BASENAME (file, name);
17135 }
17136 else
17137 assemble_name (file, name);
17138 }
17139
17140 /* Output a TOC entry. We derive the entry name from what is being
17141 written. */
17142
17143 void
17144 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
17145 {
17146 char buf[256];
17147 const char *name = buf;
17148 rtx base = x;
17149 HOST_WIDE_INT offset = 0;
17150
17151 gcc_assert (!TARGET_NO_TOC_OR_PCREL);
17152
17153 /* When the linker won't eliminate them, don't output duplicate
17154 TOC entries (this happens on AIX if there is any kind of TOC,
17155 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
17156 CODE_LABELs. */
17157 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
17158 {
17159 struct toc_hash_struct *h;
17160
17161 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
17162 time because GGC is not initialized at that point. */
17163 if (toc_hash_table == NULL)
17164 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
17165
17166 h = ggc_alloc<toc_hash_struct> ();
17167 h->key = x;
17168 h->key_mode = mode;
17169 h->labelno = labelno;
17170
17171 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
17172 if (*found == NULL)
17173 *found = h;
17174 else /* This is indeed a duplicate.
17175 Set this label equal to that label. */
17176 {
17177 fputs ("\t.set ", file);
17178 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
17179 fprintf (file, "%d,", labelno);
17180 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
17181 fprintf (file, "%d\n", ((*found)->labelno));
17182
17183 #ifdef HAVE_AS_TLS
17184 if (TARGET_XCOFF && SYMBOL_REF_P (x)
17185 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
17186 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
17187 {
17188 fputs ("\t.set ", file);
17189 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
17190 fprintf (file, "%d,", labelno);
17191 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
17192 fprintf (file, "%d\n", ((*found)->labelno));
17193 }
17194 #endif
17195 return;
17196 }
17197 }
17198
17199 /* If we're going to put a double constant in the TOC, make sure it's
17200 aligned properly when strict alignment is on. */
17201 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
17202 && STRICT_ALIGNMENT
17203 && GET_MODE_BITSIZE (mode) >= 64
17204 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
17205 ASM_OUTPUT_ALIGN (file, 3);
17206 }
17207
17208 (*targetm.asm_out.internal_label) (file, "LC", labelno);
17209
17210 /* Handle FP constants specially. Note that if we have a minimal
17211 TOC, things we put here aren't actually in the TOC, so we can allow
17212 FP constants. */
17213 if (CONST_DOUBLE_P (x)
17214 && (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode
17215 || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode))
17216 {
17217 long k[4];
17218
17219 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
17220 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k);
17221 else
17222 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
17223
17224 if (TARGET_64BIT)
17225 {
17226 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17227 fputs (DOUBLE_INT_ASM_OP, file);
17228 else
17229 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
17230 k[0] & 0xffffffff, k[1] & 0xffffffff,
17231 k[2] & 0xffffffff, k[3] & 0xffffffff);
17232 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
17233 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
17234 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
17235 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
17236 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
17237 return;
17238 }
17239 else
17240 {
17241 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17242 fputs ("\t.long ", file);
17243 else
17244 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
17245 k[0] & 0xffffffff, k[1] & 0xffffffff,
17246 k[2] & 0xffffffff, k[3] & 0xffffffff);
17247 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
17248 k[0] & 0xffffffff, k[1] & 0xffffffff,
17249 k[2] & 0xffffffff, k[3] & 0xffffffff);
17250 return;
17251 }
17252 }
17253 else if (CONST_DOUBLE_P (x)
17254 && (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
17255 {
17256 long k[2];
17257
17258 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
17259 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k);
17260 else
17261 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
17262
17263 if (TARGET_64BIT)
17264 {
17265 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17266 fputs (DOUBLE_INT_ASM_OP, file);
17267 else
17268 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
17269 k[0] & 0xffffffff, k[1] & 0xffffffff);
17270 fprintf (file, "0x%lx%08lx\n",
17271 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
17272 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
17273 return;
17274 }
17275 else
17276 {
17277 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17278 fputs ("\t.long ", file);
17279 else
17280 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
17281 k[0] & 0xffffffff, k[1] & 0xffffffff);
17282 fprintf (file, "0x%lx,0x%lx\n",
17283 k[0] & 0xffffffff, k[1] & 0xffffffff);
17284 return;
17285 }
17286 }
17287 else if (CONST_DOUBLE_P (x)
17288 && (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
17289 {
17290 long l;
17291
17292 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
17293 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l);
17294 else
17295 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
17296
17297 if (TARGET_64BIT)
17298 {
17299 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17300 fputs (DOUBLE_INT_ASM_OP, file);
17301 else
17302 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
17303 if (WORDS_BIG_ENDIAN)
17304 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
17305 else
17306 fprintf (file, "0x%lx\n", l & 0xffffffff);
17307 return;
17308 }
17309 else
17310 {
17311 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17312 fputs ("\t.long ", file);
17313 else
17314 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
17315 fprintf (file, "0x%lx\n", l & 0xffffffff);
17316 return;
17317 }
17318 }
17319 else if (GET_MODE (x) == VOIDmode && CONST_INT_P (x))
17320 {
17321 unsigned HOST_WIDE_INT low;
17322 HOST_WIDE_INT high;
17323
17324 low = INTVAL (x) & 0xffffffff;
17325 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
17326
17327 /* TOC entries are always Pmode-sized, so when big-endian
17328 smaller integer constants in the TOC need to be padded.
17329 (This is still a win over putting the constants in
17330 a separate constant pool, because then we'd have
17331 to have both a TOC entry _and_ the actual constant.)
17332
17333 For a 32-bit target, CONST_INT values are loaded and shifted
17334 entirely within `low' and can be stored in one TOC entry. */
17335
17336 /* It would be easy to make this work, but it doesn't now. */
17337 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
17338
17339 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
17340 {
17341 low |= high << 32;
17342 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
17343 high = (HOST_WIDE_INT) low >> 32;
17344 low &= 0xffffffff;
17345 }
17346
17347 if (TARGET_64BIT)
17348 {
17349 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17350 fputs (DOUBLE_INT_ASM_OP, file);
17351 else
17352 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
17353 (long) high & 0xffffffff, (long) low & 0xffffffff);
17354 fprintf (file, "0x%lx%08lx\n",
17355 (long) high & 0xffffffff, (long) low & 0xffffffff);
17356 return;
17357 }
17358 else
17359 {
17360 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
17361 {
17362 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17363 fputs ("\t.long ", file);
17364 else
17365 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
17366 (long) high & 0xffffffff, (long) low & 0xffffffff);
17367 fprintf (file, "0x%lx,0x%lx\n",
17368 (long) high & 0xffffffff, (long) low & 0xffffffff);
17369 }
17370 else
17371 {
17372 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17373 fputs ("\t.long ", file);
17374 else
17375 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
17376 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
17377 }
17378 return;
17379 }
17380 }
17381
17382 if (GET_CODE (x) == CONST)
17383 {
17384 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
17385 && CONST_INT_P (XEXP (XEXP (x, 0), 1)));
17386
17387 base = XEXP (XEXP (x, 0), 0);
17388 offset = INTVAL (XEXP (XEXP (x, 0), 1));
17389 }
17390
17391 switch (GET_CODE (base))
17392 {
17393 case SYMBOL_REF:
17394 name = XSTR (base, 0);
17395 break;
17396
17397 case LABEL_REF:
17398 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
17399 CODE_LABEL_NUMBER (XEXP (base, 0)));
17400 break;
17401
17402 case CODE_LABEL:
17403 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
17404 break;
17405
17406 default:
17407 gcc_unreachable ();
17408 }
17409
17410 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17411 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
17412 else
17413 {
17414 fputs ("\t.tc ", file);
17415 RS6000_OUTPUT_BASENAME (file, name);
17416
17417 if (offset < 0)
17418 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
17419 else if (offset)
17420 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
17421
17422 /* Mark large TOC symbols on AIX with [TE] so they are mapped
17423 after other TOC symbols, reducing overflow of small TOC access
17424 to [TC] symbols. */
17425 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
17426 ? "[TE]," : "[TC],", file);
17427 }
17428
17429 /* Currently C++ toc references to vtables can be emitted before it
17430 is decided whether the vtable is public or private. If this is
17431 the case, then the linker will eventually complain that there is
17432 a TOC reference to an unknown section. Thus, for vtables only,
17433 we emit the TOC reference to reference the symbol and not the
17434 section. */
17435 if (VTABLE_NAME_P (name))
17436 {
17437 RS6000_OUTPUT_BASENAME (file, name);
17438 if (offset < 0)
17439 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
17440 else if (offset > 0)
17441 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
17442 }
17443 else
17444 output_addr_const (file, x);
17445
17446 #if HAVE_AS_TLS
17447 if (TARGET_XCOFF && SYMBOL_REF_P (base))
17448 {
17449 switch (SYMBOL_REF_TLS_MODEL (base))
17450 {
17451 case 0:
17452 break;
17453 case TLS_MODEL_LOCAL_EXEC:
17454 fputs ("@le", file);
17455 break;
17456 case TLS_MODEL_INITIAL_EXEC:
17457 fputs ("@ie", file);
17458 break;
17459 /* Use global-dynamic for local-dynamic. */
17460 case TLS_MODEL_GLOBAL_DYNAMIC:
17461 case TLS_MODEL_LOCAL_DYNAMIC:
17462 putc ('\n', file);
17463 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
17464 fputs ("\t.tc .", file);
17465 RS6000_OUTPUT_BASENAME (file, name);
17466 fputs ("[TC],", file);
17467 output_addr_const (file, x);
17468 fputs ("@m", file);
17469 break;
17470 default:
17471 gcc_unreachable ();
17472 }
17473 }
17474 #endif
17475
17476 putc ('\n', file);
17477 }
17478 \f
17479 /* Output an assembler pseudo-op to write an ASCII string of N characters
17480 starting at P to FILE.
17481
17482 On the RS/6000, we have to do this using the .byte operation and
17483 write out special characters outside the quoted string.
17484 Also, the assembler is broken; very long strings are truncated,
17485 so we must artificially break them up early. */
17486
17487 void
17488 output_ascii (FILE *file, const char *p, int n)
17489 {
17490 char c;
17491 int i, count_string;
17492 const char *for_string = "\t.byte \"";
17493 const char *for_decimal = "\t.byte ";
17494 const char *to_close = NULL;
17495
17496 count_string = 0;
17497 for (i = 0; i < n; i++)
17498 {
17499 c = *p++;
17500 if (c >= ' ' && c < 0177)
17501 {
17502 if (for_string)
17503 fputs (for_string, file);
17504 putc (c, file);
17505
17506 /* Write two quotes to get one. */
17507 if (c == '"')
17508 {
17509 putc (c, file);
17510 ++count_string;
17511 }
17512
17513 for_string = NULL;
17514 for_decimal = "\"\n\t.byte ";
17515 to_close = "\"\n";
17516 ++count_string;
17517
17518 if (count_string >= 512)
17519 {
17520 fputs (to_close, file);
17521
17522 for_string = "\t.byte \"";
17523 for_decimal = "\t.byte ";
17524 to_close = NULL;
17525 count_string = 0;
17526 }
17527 }
17528 else
17529 {
17530 if (for_decimal)
17531 fputs (for_decimal, file);
17532 fprintf (file, "%d", c);
17533
17534 for_string = "\n\t.byte \"";
17535 for_decimal = ", ";
17536 to_close = "\n";
17537 count_string = 0;
17538 }
17539 }
17540
17541 /* Now close the string if we have written one. Then end the line. */
17542 if (to_close)
17543 fputs (to_close, file);
17544 }
17545 \f
17546 /* Generate a unique section name for FILENAME for a section type
17547 represented by SECTION_DESC. Output goes into BUF.
17548
17549 SECTION_DESC can be any string, as long as it is different for each
17550 possible section type.
17551
17552 We name the section in the same manner as xlc. The name begins with an
17553 underscore followed by the filename (after stripping any leading directory
17554 names) with the last period replaced by the string SECTION_DESC. If
17555 FILENAME does not contain a period, SECTION_DESC is appended to the end of
17556 the name. */
17557
17558 void
17559 rs6000_gen_section_name (char **buf, const char *filename,
17560 const char *section_desc)
17561 {
17562 const char *q, *after_last_slash, *last_period = 0;
17563 char *p;
17564 int len;
17565
17566 after_last_slash = filename;
17567 for (q = filename; *q; q++)
17568 {
17569 if (*q == '/')
17570 after_last_slash = q + 1;
17571 else if (*q == '.')
17572 last_period = q;
17573 }
17574
17575 len = strlen (after_last_slash) + strlen (section_desc) + 2;
17576 *buf = (char *) xmalloc (len);
17577
17578 p = *buf;
17579 *p++ = '_';
17580
17581 for (q = after_last_slash; *q; q++)
17582 {
17583 if (q == last_period)
17584 {
17585 strcpy (p, section_desc);
17586 p += strlen (section_desc);
17587 break;
17588 }
17589
17590 else if (ISALNUM (*q))
17591 *p++ = *q;
17592 }
17593
17594 if (last_period == 0)
17595 strcpy (p, section_desc);
17596 else
17597 *p = '\0';
17598 }
17599 \f
17600 /* Emit profile function. */
17601
17602 void
17603 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
17604 {
17605 /* Non-standard profiling for kernels, which just saves LR then calls
17606 _mcount without worrying about arg saves. The idea is to change
17607 the function prologue as little as possible as it isn't easy to
17608 account for arg save/restore code added just for _mcount. */
17609 if (TARGET_PROFILE_KERNEL)
17610 return;
17611
17612 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
17613 {
17614 #ifndef NO_PROFILE_COUNTERS
17615 # define NO_PROFILE_COUNTERS 0
17616 #endif
17617 if (NO_PROFILE_COUNTERS)
17618 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
17619 LCT_NORMAL, VOIDmode);
17620 else
17621 {
17622 char buf[30];
17623 const char *label_name;
17624 rtx fun;
17625
17626 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
17627 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
17628 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
17629
17630 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
17631 LCT_NORMAL, VOIDmode, fun, Pmode);
17632 }
17633 }
17634 else if (DEFAULT_ABI == ABI_DARWIN)
17635 {
17636 const char *mcount_name = RS6000_MCOUNT;
17637 int caller_addr_regno = LR_REGNO;
17638
17639 /* Be conservative and always set this, at least for now. */
17640 crtl->uses_pic_offset_table = 1;
17641
17642 #if TARGET_MACHO
17643 /* For PIC code, set up a stub and collect the caller's address
17644 from r0, which is where the prologue puts it. */
17645 if (MACHOPIC_INDIRECT
17646 && crtl->uses_pic_offset_table)
17647 caller_addr_regno = 0;
17648 #endif
17649 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
17650 LCT_NORMAL, VOIDmode,
17651 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
17652 }
17653 }
17654
17655 /* Write function profiler code. */
17656
17657 void
17658 output_function_profiler (FILE *file, int labelno)
17659 {
17660 char buf[100];
17661
17662 switch (DEFAULT_ABI)
17663 {
17664 default:
17665 gcc_unreachable ();
17666
17667 case ABI_V4:
17668 if (!TARGET_32BIT)
17669 {
17670 warning (0, "no profiling of 64-bit code for this ABI");
17671 return;
17672 }
17673 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
17674 fprintf (file, "\tmflr %s\n", reg_names[0]);
17675 if (NO_PROFILE_COUNTERS)
17676 {
17677 asm_fprintf (file, "\tstw %s,4(%s)\n",
17678 reg_names[0], reg_names[1]);
17679 }
17680 else if (TARGET_SECURE_PLT && flag_pic)
17681 {
17682 if (TARGET_LINK_STACK)
17683 {
17684 char name[32];
17685 get_ppc476_thunk_name (name);
17686 asm_fprintf (file, "\tbl %s\n", name);
17687 }
17688 else
17689 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
17690 asm_fprintf (file, "\tstw %s,4(%s)\n",
17691 reg_names[0], reg_names[1]);
17692 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
17693 asm_fprintf (file, "\taddis %s,%s,",
17694 reg_names[12], reg_names[12]);
17695 assemble_name (file, buf);
17696 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
17697 assemble_name (file, buf);
17698 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
17699 }
17700 else if (flag_pic == 1)
17701 {
17702 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
17703 asm_fprintf (file, "\tstw %s,4(%s)\n",
17704 reg_names[0], reg_names[1]);
17705 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
17706 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
17707 assemble_name (file, buf);
17708 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
17709 }
17710 else if (flag_pic > 1)
17711 {
17712 asm_fprintf (file, "\tstw %s,4(%s)\n",
17713 reg_names[0], reg_names[1]);
17714 /* Now, we need to get the address of the label. */
17715 if (TARGET_LINK_STACK)
17716 {
17717 char name[32];
17718 get_ppc476_thunk_name (name);
17719 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
17720 assemble_name (file, buf);
17721 fputs ("-.\n1:", file);
17722 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
17723 asm_fprintf (file, "\taddi %s,%s,4\n",
17724 reg_names[11], reg_names[11]);
17725 }
17726 else
17727 {
17728 fputs ("\tbcl 20,31,1f\n\t.long ", file);
17729 assemble_name (file, buf);
17730 fputs ("-.\n1:", file);
17731 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
17732 }
17733 asm_fprintf (file, "\tlwz %s,0(%s)\n",
17734 reg_names[0], reg_names[11]);
17735 asm_fprintf (file, "\tadd %s,%s,%s\n",
17736 reg_names[0], reg_names[0], reg_names[11]);
17737 }
17738 else
17739 {
17740 asm_fprintf (file, "\tlis %s,", reg_names[12]);
17741 assemble_name (file, buf);
17742 fputs ("@ha\n", file);
17743 asm_fprintf (file, "\tstw %s,4(%s)\n",
17744 reg_names[0], reg_names[1]);
17745 asm_fprintf (file, "\tla %s,", reg_names[0]);
17746 assemble_name (file, buf);
17747 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
17748 }
17749
17750 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
17751 fprintf (file, "\tbl %s%s\n",
17752 RS6000_MCOUNT, flag_pic ? "@plt" : "");
17753 break;
17754
17755 case ABI_AIX:
17756 case ABI_ELFv2:
17757 case ABI_DARWIN:
17758 /* Don't do anything, done in output_profile_hook (). */
17759 break;
17760 }
17761 }
17762
17763 \f
17764
17765 /* The following variable value is the last issued insn. */
17766
17767 static rtx_insn *last_scheduled_insn;
17768
17769 /* The following variable helps to balance issuing of load and
17770 store instructions */
17771
17772 static int load_store_pendulum;
17773
17774 /* The following variable helps pair divide insns during scheduling. */
17775 static int divide_cnt;
17776 /* The following variable helps pair and alternate vector and vector load
17777 insns during scheduling. */
17778 static int vec_pairing;
17779
17780
17781 /* Power4 load update and store update instructions are cracked into a
17782 load or store and an integer insn which are executed in the same cycle.
17783 Branches have their own dispatch slot which does not count against the
17784 GCC issue rate, but it changes the program flow so there are no other
17785 instructions to issue in this cycle. */
17786
17787 static int
17788 rs6000_variable_issue_1 (rtx_insn *insn, int more)
17789 {
17790 last_scheduled_insn = insn;
17791 if (GET_CODE (PATTERN (insn)) == USE
17792 || GET_CODE (PATTERN (insn)) == CLOBBER)
17793 {
17794 cached_can_issue_more = more;
17795 return cached_can_issue_more;
17796 }
17797
17798 if (insn_terminates_group_p (insn, current_group))
17799 {
17800 cached_can_issue_more = 0;
17801 return cached_can_issue_more;
17802 }
17803
17804 /* If no reservation, but reach here */
17805 if (recog_memoized (insn) < 0)
17806 return more;
17807
17808 if (rs6000_sched_groups)
17809 {
17810 if (is_microcoded_insn (insn))
17811 cached_can_issue_more = 0;
17812 else if (is_cracked_insn (insn))
17813 cached_can_issue_more = more > 2 ? more - 2 : 0;
17814 else
17815 cached_can_issue_more = more - 1;
17816
17817 return cached_can_issue_more;
17818 }
17819
17820 if (rs6000_tune == PROCESSOR_CELL && is_nonpipeline_insn (insn))
17821 return 0;
17822
17823 cached_can_issue_more = more - 1;
17824 return cached_can_issue_more;
17825 }
17826
17827 static int
17828 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
17829 {
17830 int r = rs6000_variable_issue_1 (insn, more);
17831 if (verbose)
17832 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
17833 return r;
17834 }
17835
17836 /* Adjust the cost of a scheduling dependency. Return the new cost of
17837 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
17838
17839 static int
17840 rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
17841 unsigned int)
17842 {
17843 enum attr_type attr_type;
17844
17845 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
17846 return cost;
17847
17848 switch (dep_type)
17849 {
17850 case REG_DEP_TRUE:
17851 {
17852 /* Data dependency; DEP_INSN writes a register that INSN reads
17853 some cycles later. */
17854
17855 /* Separate a load from a narrower, dependent store. */
17856 if ((rs6000_sched_groups || rs6000_tune == PROCESSOR_POWER9
17857 || rs6000_tune == PROCESSOR_POWER10)
17858 && GET_CODE (PATTERN (insn)) == SET
17859 && GET_CODE (PATTERN (dep_insn)) == SET
17860 && MEM_P (XEXP (PATTERN (insn), 1))
17861 && MEM_P (XEXP (PATTERN (dep_insn), 0))
17862 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
17863 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
17864 return cost + 14;
17865
17866 attr_type = get_attr_type (insn);
17867
17868 switch (attr_type)
17869 {
17870 case TYPE_JMPREG:
17871 /* Tell the first scheduling pass about the latency between
17872 a mtctr and bctr (and mtlr and br/blr). The first
17873 scheduling pass will not know about this latency since
17874 the mtctr instruction, which has the latency associated
17875 to it, will be generated by reload. */
17876 return 4;
17877 case TYPE_BRANCH:
17878 /* Leave some extra cycles between a compare and its
17879 dependent branch, to inhibit expensive mispredicts. */
17880 if ((rs6000_tune == PROCESSOR_PPC603
17881 || rs6000_tune == PROCESSOR_PPC604
17882 || rs6000_tune == PROCESSOR_PPC604e
17883 || rs6000_tune == PROCESSOR_PPC620
17884 || rs6000_tune == PROCESSOR_PPC630
17885 || rs6000_tune == PROCESSOR_PPC750
17886 || rs6000_tune == PROCESSOR_PPC7400
17887 || rs6000_tune == PROCESSOR_PPC7450
17888 || rs6000_tune == PROCESSOR_PPCE5500
17889 || rs6000_tune == PROCESSOR_PPCE6500
17890 || rs6000_tune == PROCESSOR_POWER4
17891 || rs6000_tune == PROCESSOR_POWER5
17892 || rs6000_tune == PROCESSOR_POWER7
17893 || rs6000_tune == PROCESSOR_POWER8
17894 || rs6000_tune == PROCESSOR_POWER9
17895 || rs6000_tune == PROCESSOR_POWER10
17896 || rs6000_tune == PROCESSOR_CELL)
17897 && recog_memoized (dep_insn)
17898 && (INSN_CODE (dep_insn) >= 0))
17899
17900 switch (get_attr_type (dep_insn))
17901 {
17902 case TYPE_CMP:
17903 case TYPE_FPCOMPARE:
17904 case TYPE_CR_LOGICAL:
17905 return cost + 2;
17906 case TYPE_EXTS:
17907 case TYPE_MUL:
17908 if (get_attr_dot (dep_insn) == DOT_YES)
17909 return cost + 2;
17910 else
17911 break;
17912 case TYPE_SHIFT:
17913 if (get_attr_dot (dep_insn) == DOT_YES
17914 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
17915 return cost + 2;
17916 else
17917 break;
17918 default:
17919 break;
17920 }
17921 break;
17922
17923 case TYPE_STORE:
17924 case TYPE_FPSTORE:
17925 if ((rs6000_tune == PROCESSOR_POWER6)
17926 && recog_memoized (dep_insn)
17927 && (INSN_CODE (dep_insn) >= 0))
17928 {
17929
17930 if (GET_CODE (PATTERN (insn)) != SET)
17931 /* If this happens, we have to extend this to schedule
17932 optimally. Return default for now. */
17933 return cost;
17934
17935 /* Adjust the cost for the case where the value written
17936 by a fixed point operation is used as the address
17937 gen value on a store. */
17938 switch (get_attr_type (dep_insn))
17939 {
17940 case TYPE_LOAD:
17941 case TYPE_CNTLZ:
17942 {
17943 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17944 return get_attr_sign_extend (dep_insn)
17945 == SIGN_EXTEND_YES ? 6 : 4;
17946 break;
17947 }
17948 case TYPE_SHIFT:
17949 {
17950 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17951 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
17952 6 : 3;
17953 break;
17954 }
17955 case TYPE_INTEGER:
17956 case TYPE_ADD:
17957 case TYPE_LOGICAL:
17958 case TYPE_EXTS:
17959 case TYPE_INSERT:
17960 {
17961 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17962 return 3;
17963 break;
17964 }
17965 case TYPE_STORE:
17966 case TYPE_FPLOAD:
17967 case TYPE_FPSTORE:
17968 {
17969 if (get_attr_update (dep_insn) == UPDATE_YES
17970 && ! rs6000_store_data_bypass_p (dep_insn, insn))
17971 return 3;
17972 break;
17973 }
17974 case TYPE_MUL:
17975 {
17976 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17977 return 17;
17978 break;
17979 }
17980 case TYPE_DIV:
17981 {
17982 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17983 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
17984 break;
17985 }
17986 default:
17987 break;
17988 }
17989 }
17990 break;
17991
17992 case TYPE_LOAD:
17993 if ((rs6000_tune == PROCESSOR_POWER6)
17994 && recog_memoized (dep_insn)
17995 && (INSN_CODE (dep_insn) >= 0))
17996 {
17997
17998 /* Adjust the cost for the case where the value written
17999 by a fixed point instruction is used within the address
18000 gen portion of a subsequent load(u)(x) */
18001 switch (get_attr_type (dep_insn))
18002 {
18003 case TYPE_LOAD:
18004 case TYPE_CNTLZ:
18005 {
18006 if (set_to_load_agen (dep_insn, insn))
18007 return get_attr_sign_extend (dep_insn)
18008 == SIGN_EXTEND_YES ? 6 : 4;
18009 break;
18010 }
18011 case TYPE_SHIFT:
18012 {
18013 if (set_to_load_agen (dep_insn, insn))
18014 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
18015 6 : 3;
18016 break;
18017 }
18018 case TYPE_INTEGER:
18019 case TYPE_ADD:
18020 case TYPE_LOGICAL:
18021 case TYPE_EXTS:
18022 case TYPE_INSERT:
18023 {
18024 if (set_to_load_agen (dep_insn, insn))
18025 return 3;
18026 break;
18027 }
18028 case TYPE_STORE:
18029 case TYPE_FPLOAD:
18030 case TYPE_FPSTORE:
18031 {
18032 if (get_attr_update (dep_insn) == UPDATE_YES
18033 && set_to_load_agen (dep_insn, insn))
18034 return 3;
18035 break;
18036 }
18037 case TYPE_MUL:
18038 {
18039 if (set_to_load_agen (dep_insn, insn))
18040 return 17;
18041 break;
18042 }
18043 case TYPE_DIV:
18044 {
18045 if (set_to_load_agen (dep_insn, insn))
18046 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
18047 break;
18048 }
18049 default:
18050 break;
18051 }
18052 }
18053 break;
18054
18055 default:
18056 break;
18057 }
18058
18059 /* Fall out to return default cost. */
18060 }
18061 break;
18062
18063 case REG_DEP_OUTPUT:
18064 /* Output dependency; DEP_INSN writes a register that INSN writes some
18065 cycles later. */
18066 if ((rs6000_tune == PROCESSOR_POWER6)
18067 && recog_memoized (dep_insn)
18068 && (INSN_CODE (dep_insn) >= 0))
18069 {
18070 attr_type = get_attr_type (insn);
18071
18072 switch (attr_type)
18073 {
18074 case TYPE_FP:
18075 case TYPE_FPSIMPLE:
18076 if (get_attr_type (dep_insn) == TYPE_FP
18077 || get_attr_type (dep_insn) == TYPE_FPSIMPLE)
18078 return 1;
18079 break;
18080 default:
18081 break;
18082 }
18083 }
18084 /* Fall through, no cost for output dependency. */
18085 /* FALLTHRU */
18086
18087 case REG_DEP_ANTI:
18088 /* Anti dependency; DEP_INSN reads a register that INSN writes some
18089 cycles later. */
18090 return 0;
18091
18092 default:
18093 gcc_unreachable ();
18094 }
18095
18096 return cost;
18097 }
18098
18099 /* Debug version of rs6000_adjust_cost. */
18100
18101 static int
18102 rs6000_debug_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
18103 int cost, unsigned int dw)
18104 {
18105 int ret = rs6000_adjust_cost (insn, dep_type, dep_insn, cost, dw);
18106
18107 if (ret != cost)
18108 {
18109 const char *dep;
18110
18111 switch (dep_type)
18112 {
18113 default: dep = "unknown depencency"; break;
18114 case REG_DEP_TRUE: dep = "data dependency"; break;
18115 case REG_DEP_OUTPUT: dep = "output dependency"; break;
18116 case REG_DEP_ANTI: dep = "anti depencency"; break;
18117 }
18118
18119 fprintf (stderr,
18120 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
18121 "%s, insn:\n", ret, cost, dep);
18122
18123 debug_rtx (insn);
18124 }
18125
18126 return ret;
18127 }
18128
18129 /* The function returns a true if INSN is microcoded.
18130 Return false otherwise. */
18131
18132 static bool
18133 is_microcoded_insn (rtx_insn *insn)
18134 {
18135 if (!insn || !NONDEBUG_INSN_P (insn)
18136 || GET_CODE (PATTERN (insn)) == USE
18137 || GET_CODE (PATTERN (insn)) == CLOBBER)
18138 return false;
18139
18140 if (rs6000_tune == PROCESSOR_CELL)
18141 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
18142
18143 if (rs6000_sched_groups
18144 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
18145 {
18146 enum attr_type type = get_attr_type (insn);
18147 if ((type == TYPE_LOAD
18148 && get_attr_update (insn) == UPDATE_YES
18149 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
18150 || ((type == TYPE_LOAD || type == TYPE_STORE)
18151 && get_attr_update (insn) == UPDATE_YES
18152 && get_attr_indexed (insn) == INDEXED_YES)
18153 || type == TYPE_MFCR)
18154 return true;
18155 }
18156
18157 return false;
18158 }
18159
18160 /* The function returns true if INSN is cracked into 2 instructions
18161 by the processor (and therefore occupies 2 issue slots). */
18162
18163 static bool
18164 is_cracked_insn (rtx_insn *insn)
18165 {
18166 if (!insn || !NONDEBUG_INSN_P (insn)
18167 || GET_CODE (PATTERN (insn)) == USE
18168 || GET_CODE (PATTERN (insn)) == CLOBBER)
18169 return false;
18170
18171 if (rs6000_sched_groups
18172 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
18173 {
18174 enum attr_type type = get_attr_type (insn);
18175 if ((type == TYPE_LOAD
18176 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
18177 && get_attr_update (insn) == UPDATE_NO)
18178 || (type == TYPE_LOAD
18179 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
18180 && get_attr_update (insn) == UPDATE_YES
18181 && get_attr_indexed (insn) == INDEXED_NO)
18182 || (type == TYPE_STORE
18183 && get_attr_update (insn) == UPDATE_YES
18184 && get_attr_indexed (insn) == INDEXED_NO)
18185 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
18186 && get_attr_update (insn) == UPDATE_YES)
18187 || (type == TYPE_CR_LOGICAL
18188 && get_attr_cr_logical_3op (insn) == CR_LOGICAL_3OP_YES)
18189 || (type == TYPE_EXTS
18190 && get_attr_dot (insn) == DOT_YES)
18191 || (type == TYPE_SHIFT
18192 && get_attr_dot (insn) == DOT_YES
18193 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
18194 || (type == TYPE_MUL
18195 && get_attr_dot (insn) == DOT_YES)
18196 || type == TYPE_DIV
18197 || (type == TYPE_INSERT
18198 && get_attr_size (insn) == SIZE_32))
18199 return true;
18200 }
18201
18202 return false;
18203 }
18204
18205 /* The function returns true if INSN can be issued only from
18206 the branch slot. */
18207
18208 static bool
18209 is_branch_slot_insn (rtx_insn *insn)
18210 {
18211 if (!insn || !NONDEBUG_INSN_P (insn)
18212 || GET_CODE (PATTERN (insn)) == USE
18213 || GET_CODE (PATTERN (insn)) == CLOBBER)
18214 return false;
18215
18216 if (rs6000_sched_groups)
18217 {
18218 enum attr_type type = get_attr_type (insn);
18219 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
18220 return true;
18221 return false;
18222 }
18223
18224 return false;
18225 }
18226
18227 /* The function returns true if out_inst sets a value that is
18228 used in the address generation computation of in_insn */
18229 static bool
18230 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
18231 {
18232 rtx out_set, in_set;
18233
18234 /* For performance reasons, only handle the simple case where
18235 both loads are a single_set. */
18236 out_set = single_set (out_insn);
18237 if (out_set)
18238 {
18239 in_set = single_set (in_insn);
18240 if (in_set)
18241 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
18242 }
18243
18244 return false;
18245 }
18246
18247 /* Try to determine base/offset/size parts of the given MEM.
18248 Return true if successful, false if all the values couldn't
18249 be determined.
18250
18251 This function only looks for REG or REG+CONST address forms.
18252 REG+REG address form will return false. */
18253
18254 static bool
18255 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
18256 HOST_WIDE_INT *size)
18257 {
18258 rtx addr_rtx;
18259 if MEM_SIZE_KNOWN_P (mem)
18260 *size = MEM_SIZE (mem);
18261 else
18262 return false;
18263
18264 addr_rtx = (XEXP (mem, 0));
18265 if (GET_CODE (addr_rtx) == PRE_MODIFY)
18266 addr_rtx = XEXP (addr_rtx, 1);
18267
18268 *offset = 0;
18269 while (GET_CODE (addr_rtx) == PLUS
18270 && CONST_INT_P (XEXP (addr_rtx, 1)))
18271 {
18272 *offset += INTVAL (XEXP (addr_rtx, 1));
18273 addr_rtx = XEXP (addr_rtx, 0);
18274 }
18275 if (!REG_P (addr_rtx))
18276 return false;
18277
18278 *base = addr_rtx;
18279 return true;
18280 }
18281
18282 /* If the target storage locations of arguments MEM1 and MEM2 are
18283 adjacent, then return the argument that has the lower address.
18284 Otherwise, return NULL_RTX. */
18285
18286 static rtx
18287 adjacent_mem_locations (rtx mem1, rtx mem2)
18288 {
18289 rtx reg1, reg2;
18290 HOST_WIDE_INT off1, size1, off2, size2;
18291
18292 if (MEM_P (mem1)
18293 && MEM_P (mem2)
18294 && get_memref_parts (mem1, &reg1, &off1, &size1)
18295 && get_memref_parts (mem2, &reg2, &off2, &size2)
18296 && REGNO (reg1) == REGNO (reg2))
18297 {
18298 if (off1 + size1 == off2)
18299 return mem1;
18300 else if (off2 + size2 == off1)
18301 return mem2;
18302 }
18303
18304 return NULL_RTX;
18305 }
18306
18307 /* This function returns true if it can be determined that the two MEM
18308 locations overlap by at least 1 byte based on base reg/offset/size. */
18309
18310 static bool
18311 mem_locations_overlap (rtx mem1, rtx mem2)
18312 {
18313 rtx reg1, reg2;
18314 HOST_WIDE_INT off1, size1, off2, size2;
18315
18316 if (get_memref_parts (mem1, &reg1, &off1, &size1)
18317 && get_memref_parts (mem2, &reg2, &off2, &size2))
18318 return ((REGNO (reg1) == REGNO (reg2))
18319 && (((off1 <= off2) && (off1 + size1 > off2))
18320 || ((off2 <= off1) && (off2 + size2 > off1))));
18321
18322 return false;
18323 }
18324
18325 /* A C statement (sans semicolon) to update the integer scheduling
18326 priority INSN_PRIORITY (INSN). Increase the priority to execute the
18327 INSN earlier, reduce the priority to execute INSN later. Do not
18328 define this macro if you do not need to adjust the scheduling
18329 priorities of insns. */
18330
18331 static int
18332 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
18333 {
18334 rtx load_mem, str_mem;
18335 /* On machines (like the 750) which have asymmetric integer units,
18336 where one integer unit can do multiply and divides and the other
18337 can't, reduce the priority of multiply/divide so it is scheduled
18338 before other integer operations. */
18339
18340 #if 0
18341 if (! INSN_P (insn))
18342 return priority;
18343
18344 if (GET_CODE (PATTERN (insn)) == USE)
18345 return priority;
18346
18347 switch (rs6000_tune) {
18348 case PROCESSOR_PPC750:
18349 switch (get_attr_type (insn))
18350 {
18351 default:
18352 break;
18353
18354 case TYPE_MUL:
18355 case TYPE_DIV:
18356 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
18357 priority, priority);
18358 if (priority >= 0 && priority < 0x01000000)
18359 priority >>= 3;
18360 break;
18361 }
18362 }
18363 #endif
18364
18365 if (insn_must_be_first_in_group (insn)
18366 && reload_completed
18367 && current_sched_info->sched_max_insns_priority
18368 && rs6000_sched_restricted_insns_priority)
18369 {
18370
18371 /* Prioritize insns that can be dispatched only in the first
18372 dispatch slot. */
18373 if (rs6000_sched_restricted_insns_priority == 1)
18374 /* Attach highest priority to insn. This means that in
18375 haifa-sched.cc:ready_sort(), dispatch-slot restriction considerations
18376 precede 'priority' (critical path) considerations. */
18377 return current_sched_info->sched_max_insns_priority;
18378 else if (rs6000_sched_restricted_insns_priority == 2)
18379 /* Increase priority of insn by a minimal amount. This means that in
18380 haifa-sched.cc:ready_sort(), only 'priority' (critical path)
18381 considerations precede dispatch-slot restriction considerations. */
18382 return (priority + 1);
18383 }
18384
18385 if (rs6000_tune == PROCESSOR_POWER6
18386 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
18387 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
18388 /* Attach highest priority to insn if the scheduler has just issued two
18389 stores and this instruction is a load, or two loads and this instruction
18390 is a store. Power6 wants loads and stores scheduled alternately
18391 when possible */
18392 return current_sched_info->sched_max_insns_priority;
18393
18394 return priority;
18395 }
18396
18397 /* Return true if the instruction is nonpipelined on the Cell. */
18398 static bool
18399 is_nonpipeline_insn (rtx_insn *insn)
18400 {
18401 enum attr_type type;
18402 if (!insn || !NONDEBUG_INSN_P (insn)
18403 || GET_CODE (PATTERN (insn)) == USE
18404 || GET_CODE (PATTERN (insn)) == CLOBBER)
18405 return false;
18406
18407 type = get_attr_type (insn);
18408 if (type == TYPE_MUL
18409 || type == TYPE_DIV
18410 || type == TYPE_SDIV
18411 || type == TYPE_DDIV
18412 || type == TYPE_SSQRT
18413 || type == TYPE_DSQRT
18414 || type == TYPE_MFCR
18415 || type == TYPE_MFCRF
18416 || type == TYPE_MFJMPR)
18417 {
18418 return true;
18419 }
18420 return false;
18421 }
18422
18423
18424 /* Return how many instructions the machine can issue per cycle. */
18425
18426 static int
18427 rs6000_issue_rate (void)
18428 {
18429 /* Unless scheduling for register pressure, use issue rate of 1 for
18430 first scheduling pass to decrease degradation. */
18431 if (!reload_completed && !flag_sched_pressure)
18432 return 1;
18433
18434 switch (rs6000_tune) {
18435 case PROCESSOR_RS64A:
18436 case PROCESSOR_PPC601: /* ? */
18437 case PROCESSOR_PPC7450:
18438 return 3;
18439 case PROCESSOR_PPC440:
18440 case PROCESSOR_PPC603:
18441 case PROCESSOR_PPC750:
18442 case PROCESSOR_PPC7400:
18443 case PROCESSOR_PPC8540:
18444 case PROCESSOR_PPC8548:
18445 case PROCESSOR_CELL:
18446 case PROCESSOR_PPCE300C2:
18447 case PROCESSOR_PPCE300C3:
18448 case PROCESSOR_PPCE500MC:
18449 case PROCESSOR_PPCE500MC64:
18450 case PROCESSOR_PPCE5500:
18451 case PROCESSOR_PPCE6500:
18452 case PROCESSOR_TITAN:
18453 return 2;
18454 case PROCESSOR_PPC476:
18455 case PROCESSOR_PPC604:
18456 case PROCESSOR_PPC604e:
18457 case PROCESSOR_PPC620:
18458 case PROCESSOR_PPC630:
18459 return 4;
18460 case PROCESSOR_POWER4:
18461 case PROCESSOR_POWER5:
18462 case PROCESSOR_POWER6:
18463 case PROCESSOR_POWER7:
18464 return 5;
18465 case PROCESSOR_POWER8:
18466 return 7;
18467 case PROCESSOR_POWER9:
18468 return 6;
18469 case PROCESSOR_POWER10:
18470 return 8;
18471 default:
18472 return 1;
18473 }
18474 }
18475
18476 /* Return how many instructions to look ahead for better insn
18477 scheduling. */
18478
18479 static int
18480 rs6000_use_sched_lookahead (void)
18481 {
18482 switch (rs6000_tune)
18483 {
18484 case PROCESSOR_PPC8540:
18485 case PROCESSOR_PPC8548:
18486 return 4;
18487
18488 case PROCESSOR_CELL:
18489 return (reload_completed ? 8 : 0);
18490
18491 default:
18492 return 0;
18493 }
18494 }
18495
18496 /* We are choosing insn from the ready queue. Return zero if INSN can be
18497 chosen. */
18498 static int
18499 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
18500 {
18501 if (ready_index == 0)
18502 return 0;
18503
18504 if (rs6000_tune != PROCESSOR_CELL)
18505 return 0;
18506
18507 gcc_assert (insn != NULL_RTX && INSN_P (insn));
18508
18509 if (!reload_completed
18510 || is_nonpipeline_insn (insn)
18511 || is_microcoded_insn (insn))
18512 return 1;
18513
18514 return 0;
18515 }
18516
18517 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
18518 and return true. */
18519
18520 static bool
18521 find_mem_ref (rtx pat, rtx *mem_ref)
18522 {
18523 const char * fmt;
18524 int i, j;
18525
18526 /* stack_tie does not produce any real memory traffic. */
18527 if (tie_operand (pat, VOIDmode))
18528 return false;
18529
18530 if (MEM_P (pat))
18531 {
18532 *mem_ref = pat;
18533 return true;
18534 }
18535
18536 /* Recursively process the pattern. */
18537 fmt = GET_RTX_FORMAT (GET_CODE (pat));
18538
18539 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
18540 {
18541 if (fmt[i] == 'e')
18542 {
18543 if (find_mem_ref (XEXP (pat, i), mem_ref))
18544 return true;
18545 }
18546 else if (fmt[i] == 'E')
18547 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
18548 {
18549 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
18550 return true;
18551 }
18552 }
18553
18554 return false;
18555 }
18556
18557 /* Determine if PAT is a PATTERN of a load insn. */
18558
18559 static bool
18560 is_load_insn1 (rtx pat, rtx *load_mem)
18561 {
18562 if (!pat || pat == NULL_RTX)
18563 return false;
18564
18565 if (GET_CODE (pat) == SET)
18566 {
18567 if (REG_P (SET_DEST (pat)))
18568 return find_mem_ref (SET_SRC (pat), load_mem);
18569 else
18570 return false;
18571 }
18572
18573 if (GET_CODE (pat) == PARALLEL)
18574 {
18575 int i;
18576
18577 for (i = 0; i < XVECLEN (pat, 0); i++)
18578 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
18579 return true;
18580 }
18581
18582 return false;
18583 }
18584
18585 /* Determine if INSN loads from memory. */
18586
18587 static bool
18588 is_load_insn (rtx insn, rtx *load_mem)
18589 {
18590 if (!insn || !INSN_P (insn))
18591 return false;
18592
18593 if (CALL_P (insn))
18594 return false;
18595
18596 return is_load_insn1 (PATTERN (insn), load_mem);
18597 }
18598
18599 /* Determine if PAT is a PATTERN of a store insn. */
18600
18601 static bool
18602 is_store_insn1 (rtx pat, rtx *str_mem)
18603 {
18604 if (!pat || pat == NULL_RTX)
18605 return false;
18606
18607 if (GET_CODE (pat) == SET)
18608 {
18609 if (REG_P (SET_SRC (pat)) || SUBREG_P (SET_SRC (pat)))
18610 return find_mem_ref (SET_DEST (pat), str_mem);
18611 else
18612 return false;
18613 }
18614
18615 if (GET_CODE (pat) == PARALLEL)
18616 {
18617 int i;
18618
18619 for (i = 0; i < XVECLEN (pat, 0); i++)
18620 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
18621 return true;
18622 }
18623
18624 return false;
18625 }
18626
18627 /* Determine if INSN stores to memory. */
18628
18629 static bool
18630 is_store_insn (rtx insn, rtx *str_mem)
18631 {
18632 if (!insn || !INSN_P (insn))
18633 return false;
18634
18635 return is_store_insn1 (PATTERN (insn), str_mem);
18636 }
18637
18638 /* Return whether TYPE is a Power9 pairable vector instruction type. */
18639
18640 static bool
18641 is_power9_pairable_vec_type (enum attr_type type)
18642 {
18643 switch (type)
18644 {
18645 case TYPE_VECSIMPLE:
18646 case TYPE_VECCOMPLEX:
18647 case TYPE_VECDIV:
18648 case TYPE_VECCMP:
18649 case TYPE_VECPERM:
18650 case TYPE_VECFLOAT:
18651 case TYPE_VECFDIV:
18652 case TYPE_VECDOUBLE:
18653 return true;
18654 default:
18655 break;
18656 }
18657 return false;
18658 }
18659
18660 /* Returns whether the dependence between INSN and NEXT is considered
18661 costly by the given target. */
18662
18663 static bool
18664 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
18665 {
18666 rtx insn;
18667 rtx next;
18668 rtx load_mem, str_mem;
18669
18670 /* If the flag is not enabled - no dependence is considered costly;
18671 allow all dependent insns in the same group.
18672 This is the most aggressive option. */
18673 if (rs6000_sched_costly_dep == no_dep_costly)
18674 return false;
18675
18676 /* If the flag is set to 1 - a dependence is always considered costly;
18677 do not allow dependent instructions in the same group.
18678 This is the most conservative option. */
18679 if (rs6000_sched_costly_dep == all_deps_costly)
18680 return true;
18681
18682 insn = DEP_PRO (dep);
18683 next = DEP_CON (dep);
18684
18685 if (rs6000_sched_costly_dep == store_to_load_dep_costly
18686 && is_load_insn (next, &load_mem)
18687 && is_store_insn (insn, &str_mem))
18688 /* Prevent load after store in the same group. */
18689 return true;
18690
18691 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
18692 && is_load_insn (next, &load_mem)
18693 && is_store_insn (insn, &str_mem)
18694 && DEP_TYPE (dep) == REG_DEP_TRUE
18695 && mem_locations_overlap(str_mem, load_mem))
18696 /* Prevent load after store in the same group if it is a true
18697 dependence. */
18698 return true;
18699
18700 /* The flag is set to X; dependences with latency >= X are considered costly,
18701 and will not be scheduled in the same group. */
18702 if (rs6000_sched_costly_dep <= max_dep_latency
18703 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
18704 return true;
18705
18706 return false;
18707 }
18708
18709 /* Return the next insn after INSN that is found before TAIL is reached,
18710 skipping any "non-active" insns - insns that will not actually occupy
18711 an issue slot. Return NULL_RTX if such an insn is not found. */
18712
18713 static rtx_insn *
18714 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
18715 {
18716 if (insn == NULL_RTX || insn == tail)
18717 return NULL;
18718
18719 while (1)
18720 {
18721 insn = NEXT_INSN (insn);
18722 if (insn == NULL_RTX || insn == tail)
18723 return NULL;
18724
18725 if (CALL_P (insn)
18726 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
18727 || (NONJUMP_INSN_P (insn)
18728 && GET_CODE (PATTERN (insn)) != USE
18729 && GET_CODE (PATTERN (insn)) != CLOBBER
18730 && INSN_CODE (insn) != CODE_FOR_stack_tie))
18731 break;
18732 }
18733 return insn;
18734 }
18735
18736 /* Move instruction at POS to the end of the READY list. */
18737
18738 static void
18739 move_to_end_of_ready (rtx_insn **ready, int pos, int lastpos)
18740 {
18741 rtx_insn *tmp;
18742 int i;
18743
18744 tmp = ready[pos];
18745 for (i = pos; i < lastpos; i++)
18746 ready[i] = ready[i + 1];
18747 ready[lastpos] = tmp;
18748 }
18749
18750 /* Do Power6 specific sched_reorder2 reordering of ready list. */
18751
18752 static int
18753 power6_sched_reorder2 (rtx_insn **ready, int lastpos)
18754 {
18755 /* For Power6, we need to handle some special cases to try and keep the
18756 store queue from overflowing and triggering expensive flushes.
18757
18758 This code monitors how load and store instructions are being issued
18759 and skews the ready list one way or the other to increase the likelihood
18760 that a desired instruction is issued at the proper time.
18761
18762 A couple of things are done. First, we maintain a "load_store_pendulum"
18763 to track the current state of load/store issue.
18764
18765 - If the pendulum is at zero, then no loads or stores have been
18766 issued in the current cycle so we do nothing.
18767
18768 - If the pendulum is 1, then a single load has been issued in this
18769 cycle and we attempt to locate another load in the ready list to
18770 issue with it.
18771
18772 - If the pendulum is -2, then two stores have already been
18773 issued in this cycle, so we increase the priority of the first load
18774 in the ready list to increase it's likelihood of being chosen first
18775 in the next cycle.
18776
18777 - If the pendulum is -1, then a single store has been issued in this
18778 cycle and we attempt to locate another store in the ready list to
18779 issue with it, preferring a store to an adjacent memory location to
18780 facilitate store pairing in the store queue.
18781
18782 - If the pendulum is 2, then two loads have already been
18783 issued in this cycle, so we increase the priority of the first store
18784 in the ready list to increase it's likelihood of being chosen first
18785 in the next cycle.
18786
18787 - If the pendulum < -2 or > 2, then do nothing.
18788
18789 Note: This code covers the most common scenarios. There exist non
18790 load/store instructions which make use of the LSU and which
18791 would need to be accounted for to strictly model the behavior
18792 of the machine. Those instructions are currently unaccounted
18793 for to help minimize compile time overhead of this code.
18794 */
18795 int pos;
18796 rtx load_mem, str_mem;
18797
18798 if (is_store_insn (last_scheduled_insn, &str_mem))
18799 /* Issuing a store, swing the load_store_pendulum to the left */
18800 load_store_pendulum--;
18801 else if (is_load_insn (last_scheduled_insn, &load_mem))
18802 /* Issuing a load, swing the load_store_pendulum to the right */
18803 load_store_pendulum++;
18804 else
18805 return cached_can_issue_more;
18806
18807 /* If the pendulum is balanced, or there is only one instruction on
18808 the ready list, then all is well, so return. */
18809 if ((load_store_pendulum == 0) || (lastpos <= 0))
18810 return cached_can_issue_more;
18811
18812 if (load_store_pendulum == 1)
18813 {
18814 /* A load has been issued in this cycle. Scan the ready list
18815 for another load to issue with it */
18816 pos = lastpos;
18817
18818 while (pos >= 0)
18819 {
18820 if (is_load_insn (ready[pos], &load_mem))
18821 {
18822 /* Found a load. Move it to the head of the ready list,
18823 and adjust it's priority so that it is more likely to
18824 stay there */
18825 move_to_end_of_ready (ready, pos, lastpos);
18826
18827 if (!sel_sched_p ()
18828 && INSN_PRIORITY_KNOWN (ready[lastpos]))
18829 INSN_PRIORITY (ready[lastpos])++;
18830 break;
18831 }
18832 pos--;
18833 }
18834 }
18835 else if (load_store_pendulum == -2)
18836 {
18837 /* Two stores have been issued in this cycle. Increase the
18838 priority of the first load in the ready list to favor it for
18839 issuing in the next cycle. */
18840 pos = lastpos;
18841
18842 while (pos >= 0)
18843 {
18844 if (is_load_insn (ready[pos], &load_mem)
18845 && !sel_sched_p ()
18846 && INSN_PRIORITY_KNOWN (ready[pos]))
18847 {
18848 INSN_PRIORITY (ready[pos])++;
18849
18850 /* Adjust the pendulum to account for the fact that a load
18851 was found and increased in priority. This is to prevent
18852 increasing the priority of multiple loads */
18853 load_store_pendulum--;
18854
18855 break;
18856 }
18857 pos--;
18858 }
18859 }
18860 else if (load_store_pendulum == -1)
18861 {
18862 /* A store has been issued in this cycle. Scan the ready list for
18863 another store to issue with it, preferring a store to an adjacent
18864 memory location */
18865 int first_store_pos = -1;
18866
18867 pos = lastpos;
18868
18869 while (pos >= 0)
18870 {
18871 if (is_store_insn (ready[pos], &str_mem))
18872 {
18873 rtx str_mem2;
18874 /* Maintain the index of the first store found on the
18875 list */
18876 if (first_store_pos == -1)
18877 first_store_pos = pos;
18878
18879 if (is_store_insn (last_scheduled_insn, &str_mem2)
18880 && adjacent_mem_locations (str_mem, str_mem2))
18881 {
18882 /* Found an adjacent store. Move it to the head of the
18883 ready list, and adjust it's priority so that it is
18884 more likely to stay there */
18885 move_to_end_of_ready (ready, pos, lastpos);
18886
18887 if (!sel_sched_p ()
18888 && INSN_PRIORITY_KNOWN (ready[lastpos]))
18889 INSN_PRIORITY (ready[lastpos])++;
18890
18891 first_store_pos = -1;
18892
18893 break;
18894 };
18895 }
18896 pos--;
18897 }
18898
18899 if (first_store_pos >= 0)
18900 {
18901 /* An adjacent store wasn't found, but a non-adjacent store was,
18902 so move the non-adjacent store to the front of the ready
18903 list, and adjust its priority so that it is more likely to
18904 stay there. */
18905 move_to_end_of_ready (ready, first_store_pos, lastpos);
18906 if (!sel_sched_p ()
18907 && INSN_PRIORITY_KNOWN (ready[lastpos]))
18908 INSN_PRIORITY (ready[lastpos])++;
18909 }
18910 }
18911 else if (load_store_pendulum == 2)
18912 {
18913 /* Two loads have been issued in this cycle. Increase the priority
18914 of the first store in the ready list to favor it for issuing in
18915 the next cycle. */
18916 pos = lastpos;
18917
18918 while (pos >= 0)
18919 {
18920 if (is_store_insn (ready[pos], &str_mem)
18921 && !sel_sched_p ()
18922 && INSN_PRIORITY_KNOWN (ready[pos]))
18923 {
18924 INSN_PRIORITY (ready[pos])++;
18925
18926 /* Adjust the pendulum to account for the fact that a store
18927 was found and increased in priority. This is to prevent
18928 increasing the priority of multiple stores */
18929 load_store_pendulum++;
18930
18931 break;
18932 }
18933 pos--;
18934 }
18935 }
18936
18937 return cached_can_issue_more;
18938 }
18939
18940 /* Do Power9 specific sched_reorder2 reordering of ready list. */
18941
18942 static int
18943 power9_sched_reorder2 (rtx_insn **ready, int lastpos)
18944 {
18945 int pos;
18946 enum attr_type type, type2;
18947
18948 type = get_attr_type (last_scheduled_insn);
18949
18950 /* Try to issue fixed point divides back-to-back in pairs so they will be
18951 routed to separate execution units and execute in parallel. */
18952 if (type == TYPE_DIV && divide_cnt == 0)
18953 {
18954 /* First divide has been scheduled. */
18955 divide_cnt = 1;
18956
18957 /* Scan the ready list looking for another divide, if found move it
18958 to the end of the list so it is chosen next. */
18959 pos = lastpos;
18960 while (pos >= 0)
18961 {
18962 if (recog_memoized (ready[pos]) >= 0
18963 && get_attr_type (ready[pos]) == TYPE_DIV)
18964 {
18965 move_to_end_of_ready (ready, pos, lastpos);
18966 break;
18967 }
18968 pos--;
18969 }
18970 }
18971 else
18972 {
18973 /* Last insn was the 2nd divide or not a divide, reset the counter. */
18974 divide_cnt = 0;
18975
18976 /* The best dispatch throughput for vector and vector load insns can be
18977 achieved by interleaving a vector and vector load such that they'll
18978 dispatch to the same superslice. If this pairing cannot be achieved
18979 then it is best to pair vector insns together and vector load insns
18980 together.
18981
18982 To aid in this pairing, vec_pairing maintains the current state with
18983 the following values:
18984
18985 0 : Initial state, no vecload/vector pairing has been started.
18986
18987 1 : A vecload or vector insn has been issued and a candidate for
18988 pairing has been found and moved to the end of the ready
18989 list. */
18990 if (type == TYPE_VECLOAD)
18991 {
18992 /* Issued a vecload. */
18993 if (vec_pairing == 0)
18994 {
18995 int vecload_pos = -1;
18996 /* We issued a single vecload, look for a vector insn to pair it
18997 with. If one isn't found, try to pair another vecload. */
18998 pos = lastpos;
18999 while (pos >= 0)
19000 {
19001 if (recog_memoized (ready[pos]) >= 0)
19002 {
19003 type2 = get_attr_type (ready[pos]);
19004 if (is_power9_pairable_vec_type (type2))
19005 {
19006 /* Found a vector insn to pair with, move it to the
19007 end of the ready list so it is scheduled next. */
19008 move_to_end_of_ready (ready, pos, lastpos);
19009 vec_pairing = 1;
19010 return cached_can_issue_more;
19011 }
19012 else if (type2 == TYPE_VECLOAD && vecload_pos == -1)
19013 /* Remember position of first vecload seen. */
19014 vecload_pos = pos;
19015 }
19016 pos--;
19017 }
19018 if (vecload_pos >= 0)
19019 {
19020 /* Didn't find a vector to pair with but did find a vecload,
19021 move it to the end of the ready list. */
19022 move_to_end_of_ready (ready, vecload_pos, lastpos);
19023 vec_pairing = 1;
19024 return cached_can_issue_more;
19025 }
19026 }
19027 }
19028 else if (is_power9_pairable_vec_type (type))
19029 {
19030 /* Issued a vector operation. */
19031 if (vec_pairing == 0)
19032 {
19033 int vec_pos = -1;
19034 /* We issued a single vector insn, look for a vecload to pair it
19035 with. If one isn't found, try to pair another vector. */
19036 pos = lastpos;
19037 while (pos >= 0)
19038 {
19039 if (recog_memoized (ready[pos]) >= 0)
19040 {
19041 type2 = get_attr_type (ready[pos]);
19042 if (type2 == TYPE_VECLOAD)
19043 {
19044 /* Found a vecload insn to pair with, move it to the
19045 end of the ready list so it is scheduled next. */
19046 move_to_end_of_ready (ready, pos, lastpos);
19047 vec_pairing = 1;
19048 return cached_can_issue_more;
19049 }
19050 else if (is_power9_pairable_vec_type (type2)
19051 && vec_pos == -1)
19052 /* Remember position of first vector insn seen. */
19053 vec_pos = pos;
19054 }
19055 pos--;
19056 }
19057 if (vec_pos >= 0)
19058 {
19059 /* Didn't find a vecload to pair with but did find a vector
19060 insn, move it to the end of the ready list. */
19061 move_to_end_of_ready (ready, vec_pos, lastpos);
19062 vec_pairing = 1;
19063 return cached_can_issue_more;
19064 }
19065 }
19066 }
19067
19068 /* We've either finished a vec/vecload pair, couldn't find an insn to
19069 continue the current pair, or the last insn had nothing to do with
19070 with pairing. In any case, reset the state. */
19071 vec_pairing = 0;
19072 }
19073
19074 return cached_can_issue_more;
19075 }
19076
19077 /* Determine if INSN is a store to memory that can be fused with a similar
19078 adjacent store. */
19079
19080 static bool
19081 is_fusable_store (rtx_insn *insn, rtx *str_mem)
19082 {
19083 /* Insn must be a non-prefixed base+disp form store. */
19084 if (is_store_insn (insn, str_mem)
19085 && get_attr_prefixed (insn) == PREFIXED_NO
19086 && get_attr_update (insn) == UPDATE_NO
19087 && get_attr_indexed (insn) == INDEXED_NO)
19088 {
19089 /* Further restrictions by mode and size. */
19090 if (!MEM_SIZE_KNOWN_P (*str_mem))
19091 return false;
19092
19093 machine_mode mode = GET_MODE (*str_mem);
19094 HOST_WIDE_INT size = MEM_SIZE (*str_mem);
19095
19096 if (INTEGRAL_MODE_P (mode))
19097 /* Must be word or dword size. */
19098 return (size == 4 || size == 8);
19099 else if (FLOAT_MODE_P (mode))
19100 /* Must be dword size. */
19101 return (size == 8);
19102 }
19103
19104 return false;
19105 }
19106
19107 /* Do Power10 specific reordering of the ready list. */
19108
19109 static int
19110 power10_sched_reorder (rtx_insn **ready, int lastpos)
19111 {
19112 rtx mem1;
19113
19114 /* Do store fusion during sched2 only. */
19115 if (!reload_completed)
19116 return cached_can_issue_more;
19117
19118 /* If the prior insn finished off a store fusion pair then simply
19119 reset the counter and return, nothing more to do. */
19120 if (load_store_pendulum != 0)
19121 {
19122 load_store_pendulum = 0;
19123 return cached_can_issue_more;
19124 }
19125
19126 /* Try to pair certain store insns to adjacent memory locations
19127 so that the hardware will fuse them to a single operation. */
19128 if (TARGET_P10_FUSION && TARGET_P10_FUSION_2STORE
19129 && is_fusable_store (last_scheduled_insn, &mem1))
19130 {
19131
19132 /* A fusable store was just scheduled. Scan the ready list for another
19133 store that it can fuse with. */
19134 int pos = lastpos;
19135 while (pos >= 0)
19136 {
19137 rtx mem2;
19138 /* GPR stores can be ascending or descending offsets, FPR/VSR stores
19139 must be ascending only. */
19140 if (is_fusable_store (ready[pos], &mem2)
19141 && ((INTEGRAL_MODE_P (GET_MODE (mem1))
19142 && adjacent_mem_locations (mem1, mem2))
19143 || (FLOAT_MODE_P (GET_MODE (mem1))
19144 && (adjacent_mem_locations (mem1, mem2) == mem1))))
19145 {
19146 /* Found a fusable store. Move it to the end of the ready list
19147 so it is scheduled next. */
19148 move_to_end_of_ready (ready, pos, lastpos);
19149
19150 load_store_pendulum = -1;
19151 break;
19152 }
19153 pos--;
19154 }
19155 }
19156
19157 return cached_can_issue_more;
19158 }
19159
19160 /* We are about to begin issuing insns for this clock cycle. */
19161
19162 static int
19163 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
19164 rtx_insn **ready ATTRIBUTE_UNUSED,
19165 int *pn_ready ATTRIBUTE_UNUSED,
19166 int clock_var ATTRIBUTE_UNUSED)
19167 {
19168 int n_ready = *pn_ready;
19169
19170 if (sched_verbose)
19171 fprintf (dump, "// rs6000_sched_reorder :\n");
19172
19173 /* Reorder the ready list, if the second to last ready insn
19174 is a nonepipeline insn. */
19175 if (rs6000_tune == PROCESSOR_CELL && n_ready > 1)
19176 {
19177 if (is_nonpipeline_insn (ready[n_ready - 1])
19178 && (recog_memoized (ready[n_ready - 2]) > 0))
19179 /* Simply swap first two insns. */
19180 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
19181 }
19182
19183 if (rs6000_tune == PROCESSOR_POWER6)
19184 load_store_pendulum = 0;
19185
19186 /* Do Power10 dependent reordering. */
19187 if (rs6000_tune == PROCESSOR_POWER10 && last_scheduled_insn)
19188 power10_sched_reorder (ready, n_ready - 1);
19189
19190 return rs6000_issue_rate ();
19191 }
19192
19193 /* Like rs6000_sched_reorder, but called after issuing each insn. */
19194
19195 static int
19196 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
19197 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
19198 {
19199 if (sched_verbose)
19200 fprintf (dump, "// rs6000_sched_reorder2 :\n");
19201
19202 /* Do Power6 dependent reordering if necessary. */
19203 if (rs6000_tune == PROCESSOR_POWER6 && last_scheduled_insn)
19204 return power6_sched_reorder2 (ready, *pn_ready - 1);
19205
19206 /* Do Power9 dependent reordering if necessary. */
19207 if (rs6000_tune == PROCESSOR_POWER9 && last_scheduled_insn
19208 && recog_memoized (last_scheduled_insn) >= 0)
19209 return power9_sched_reorder2 (ready, *pn_ready - 1);
19210
19211 /* Do Power10 dependent reordering. */
19212 if (rs6000_tune == PROCESSOR_POWER10 && last_scheduled_insn)
19213 return power10_sched_reorder (ready, *pn_ready - 1);
19214
19215 return cached_can_issue_more;
19216 }
19217
19218 /* Return whether the presence of INSN causes a dispatch group termination
19219 of group WHICH_GROUP.
19220
19221 If WHICH_GROUP == current_group, this function will return true if INSN
19222 causes the termination of the current group (i.e, the dispatch group to
19223 which INSN belongs). This means that INSN will be the last insn in the
19224 group it belongs to.
19225
19226 If WHICH_GROUP == previous_group, this function will return true if INSN
19227 causes the termination of the previous group (i.e, the dispatch group that
19228 precedes the group to which INSN belongs). This means that INSN will be
19229 the first insn in the group it belongs to). */
19230
19231 static bool
19232 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
19233 {
19234 bool first, last;
19235
19236 if (! insn)
19237 return false;
19238
19239 first = insn_must_be_first_in_group (insn);
19240 last = insn_must_be_last_in_group (insn);
19241
19242 if (first && last)
19243 return true;
19244
19245 if (which_group == current_group)
19246 return last;
19247 else if (which_group == previous_group)
19248 return first;
19249
19250 return false;
19251 }
19252
19253
19254 static bool
19255 insn_must_be_first_in_group (rtx_insn *insn)
19256 {
19257 enum attr_type type;
19258
19259 if (!insn
19260 || NOTE_P (insn)
19261 || DEBUG_INSN_P (insn)
19262 || GET_CODE (PATTERN (insn)) == USE
19263 || GET_CODE (PATTERN (insn)) == CLOBBER)
19264 return false;
19265
19266 switch (rs6000_tune)
19267 {
19268 case PROCESSOR_POWER5:
19269 if (is_cracked_insn (insn))
19270 return true;
19271 /* FALLTHRU */
19272 case PROCESSOR_POWER4:
19273 if (is_microcoded_insn (insn))
19274 return true;
19275
19276 if (!rs6000_sched_groups)
19277 return false;
19278
19279 type = get_attr_type (insn);
19280
19281 switch (type)
19282 {
19283 case TYPE_MFCR:
19284 case TYPE_MFCRF:
19285 case TYPE_MTCR:
19286 case TYPE_CR_LOGICAL:
19287 case TYPE_MTJMPR:
19288 case TYPE_MFJMPR:
19289 case TYPE_DIV:
19290 case TYPE_LOAD_L:
19291 case TYPE_STORE_C:
19292 case TYPE_ISYNC:
19293 case TYPE_SYNC:
19294 return true;
19295 default:
19296 break;
19297 }
19298 break;
19299 case PROCESSOR_POWER6:
19300 type = get_attr_type (insn);
19301
19302 switch (type)
19303 {
19304 case TYPE_EXTS:
19305 case TYPE_CNTLZ:
19306 case TYPE_TRAP:
19307 case TYPE_MUL:
19308 case TYPE_INSERT:
19309 case TYPE_FPCOMPARE:
19310 case TYPE_MFCR:
19311 case TYPE_MTCR:
19312 case TYPE_MFJMPR:
19313 case TYPE_MTJMPR:
19314 case TYPE_ISYNC:
19315 case TYPE_SYNC:
19316 case TYPE_LOAD_L:
19317 case TYPE_STORE_C:
19318 return true;
19319 case TYPE_SHIFT:
19320 if (get_attr_dot (insn) == DOT_NO
19321 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
19322 return true;
19323 else
19324 break;
19325 case TYPE_DIV:
19326 if (get_attr_size (insn) == SIZE_32)
19327 return true;
19328 else
19329 break;
19330 case TYPE_LOAD:
19331 case TYPE_STORE:
19332 case TYPE_FPLOAD:
19333 case TYPE_FPSTORE:
19334 if (get_attr_update (insn) == UPDATE_YES)
19335 return true;
19336 else
19337 break;
19338 default:
19339 break;
19340 }
19341 break;
19342 case PROCESSOR_POWER7:
19343 type = get_attr_type (insn);
19344
19345 switch (type)
19346 {
19347 case TYPE_CR_LOGICAL:
19348 case TYPE_MFCR:
19349 case TYPE_MFCRF:
19350 case TYPE_MTCR:
19351 case TYPE_DIV:
19352 case TYPE_ISYNC:
19353 case TYPE_LOAD_L:
19354 case TYPE_STORE_C:
19355 case TYPE_MFJMPR:
19356 case TYPE_MTJMPR:
19357 return true;
19358 case TYPE_MUL:
19359 case TYPE_SHIFT:
19360 case TYPE_EXTS:
19361 if (get_attr_dot (insn) == DOT_YES)
19362 return true;
19363 else
19364 break;
19365 case TYPE_LOAD:
19366 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19367 || get_attr_update (insn) == UPDATE_YES)
19368 return true;
19369 else
19370 break;
19371 case TYPE_STORE:
19372 case TYPE_FPLOAD:
19373 case TYPE_FPSTORE:
19374 if (get_attr_update (insn) == UPDATE_YES)
19375 return true;
19376 else
19377 break;
19378 default:
19379 break;
19380 }
19381 break;
19382 case PROCESSOR_POWER8:
19383 type = get_attr_type (insn);
19384
19385 switch (type)
19386 {
19387 case TYPE_CR_LOGICAL:
19388 case TYPE_MFCR:
19389 case TYPE_MFCRF:
19390 case TYPE_MTCR:
19391 case TYPE_SYNC:
19392 case TYPE_ISYNC:
19393 case TYPE_LOAD_L:
19394 case TYPE_STORE_C:
19395 case TYPE_VECSTORE:
19396 case TYPE_MFJMPR:
19397 case TYPE_MTJMPR:
19398 return true;
19399 case TYPE_SHIFT:
19400 case TYPE_EXTS:
19401 case TYPE_MUL:
19402 if (get_attr_dot (insn) == DOT_YES)
19403 return true;
19404 else
19405 break;
19406 case TYPE_LOAD:
19407 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19408 || get_attr_update (insn) == UPDATE_YES)
19409 return true;
19410 else
19411 break;
19412 case TYPE_STORE:
19413 if (get_attr_update (insn) == UPDATE_YES
19414 && get_attr_indexed (insn) == INDEXED_YES)
19415 return true;
19416 else
19417 break;
19418 default:
19419 break;
19420 }
19421 break;
19422 default:
19423 break;
19424 }
19425
19426 return false;
19427 }
19428
19429 static bool
19430 insn_must_be_last_in_group (rtx_insn *insn)
19431 {
19432 enum attr_type type;
19433
19434 if (!insn
19435 || NOTE_P (insn)
19436 || DEBUG_INSN_P (insn)
19437 || GET_CODE (PATTERN (insn)) == USE
19438 || GET_CODE (PATTERN (insn)) == CLOBBER)
19439 return false;
19440
19441 switch (rs6000_tune) {
19442 case PROCESSOR_POWER4:
19443 case PROCESSOR_POWER5:
19444 if (is_microcoded_insn (insn))
19445 return true;
19446
19447 if (is_branch_slot_insn (insn))
19448 return true;
19449
19450 break;
19451 case PROCESSOR_POWER6:
19452 type = get_attr_type (insn);
19453
19454 switch (type)
19455 {
19456 case TYPE_EXTS:
19457 case TYPE_CNTLZ:
19458 case TYPE_TRAP:
19459 case TYPE_MUL:
19460 case TYPE_FPCOMPARE:
19461 case TYPE_MFCR:
19462 case TYPE_MTCR:
19463 case TYPE_MFJMPR:
19464 case TYPE_MTJMPR:
19465 case TYPE_ISYNC:
19466 case TYPE_SYNC:
19467 case TYPE_LOAD_L:
19468 case TYPE_STORE_C:
19469 return true;
19470 case TYPE_SHIFT:
19471 if (get_attr_dot (insn) == DOT_NO
19472 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
19473 return true;
19474 else
19475 break;
19476 case TYPE_DIV:
19477 if (get_attr_size (insn) == SIZE_32)
19478 return true;
19479 else
19480 break;
19481 default:
19482 break;
19483 }
19484 break;
19485 case PROCESSOR_POWER7:
19486 type = get_attr_type (insn);
19487
19488 switch (type)
19489 {
19490 case TYPE_ISYNC:
19491 case TYPE_SYNC:
19492 case TYPE_LOAD_L:
19493 case TYPE_STORE_C:
19494 return true;
19495 case TYPE_LOAD:
19496 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19497 && get_attr_update (insn) == UPDATE_YES)
19498 return true;
19499 else
19500 break;
19501 case TYPE_STORE:
19502 if (get_attr_update (insn) == UPDATE_YES
19503 && get_attr_indexed (insn) == INDEXED_YES)
19504 return true;
19505 else
19506 break;
19507 default:
19508 break;
19509 }
19510 break;
19511 case PROCESSOR_POWER8:
19512 type = get_attr_type (insn);
19513
19514 switch (type)
19515 {
19516 case TYPE_MFCR:
19517 case TYPE_MTCR:
19518 case TYPE_ISYNC:
19519 case TYPE_SYNC:
19520 case TYPE_LOAD_L:
19521 case TYPE_STORE_C:
19522 return true;
19523 case TYPE_LOAD:
19524 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19525 && get_attr_update (insn) == UPDATE_YES)
19526 return true;
19527 else
19528 break;
19529 case TYPE_STORE:
19530 if (get_attr_update (insn) == UPDATE_YES
19531 && get_attr_indexed (insn) == INDEXED_YES)
19532 return true;
19533 else
19534 break;
19535 default:
19536 break;
19537 }
19538 break;
19539 default:
19540 break;
19541 }
19542
19543 return false;
19544 }
19545
19546 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
19547 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
19548
19549 static bool
19550 is_costly_group (rtx *group_insns, rtx next_insn)
19551 {
19552 int i;
19553 int issue_rate = rs6000_issue_rate ();
19554
19555 for (i = 0; i < issue_rate; i++)
19556 {
19557 sd_iterator_def sd_it;
19558 dep_t dep;
19559 rtx insn = group_insns[i];
19560
19561 if (!insn)
19562 continue;
19563
19564 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
19565 {
19566 rtx next = DEP_CON (dep);
19567
19568 if (next == next_insn
19569 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
19570 return true;
19571 }
19572 }
19573
19574 return false;
19575 }
19576
19577 /* Utility of the function redefine_groups.
19578 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
19579 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
19580 to keep it "far" (in a separate group) from GROUP_INSNS, following
19581 one of the following schemes, depending on the value of the flag
19582 -minsert_sched_nops = X:
19583 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
19584 in order to force NEXT_INSN into a separate group.
19585 (2) X < sched_finish_regroup_exact: insert exactly X nops.
19586 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
19587 insertion (has a group just ended, how many vacant issue slots remain in the
19588 last group, and how many dispatch groups were encountered so far). */
19589
19590 static int
19591 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
19592 rtx_insn *next_insn, bool *group_end, int can_issue_more,
19593 int *group_count)
19594 {
19595 rtx nop;
19596 bool force;
19597 int issue_rate = rs6000_issue_rate ();
19598 bool end = *group_end;
19599 int i;
19600
19601 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
19602 return can_issue_more;
19603
19604 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
19605 return can_issue_more;
19606
19607 force = is_costly_group (group_insns, next_insn);
19608 if (!force)
19609 return can_issue_more;
19610
19611 if (sched_verbose > 6)
19612 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
19613 *group_count ,can_issue_more);
19614
19615 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
19616 {
19617 if (*group_end)
19618 can_issue_more = 0;
19619
19620 /* Since only a branch can be issued in the last issue_slot, it is
19621 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
19622 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
19623 in this case the last nop will start a new group and the branch
19624 will be forced to the new group. */
19625 if (can_issue_more && !is_branch_slot_insn (next_insn))
19626 can_issue_more--;
19627
19628 /* Do we have a special group ending nop? */
19629 if (rs6000_tune == PROCESSOR_POWER6 || rs6000_tune == PROCESSOR_POWER7
19630 || rs6000_tune == PROCESSOR_POWER8)
19631 {
19632 nop = gen_group_ending_nop ();
19633 emit_insn_before (nop, next_insn);
19634 can_issue_more = 0;
19635 }
19636 else
19637 while (can_issue_more > 0)
19638 {
19639 nop = gen_nop ();
19640 emit_insn_before (nop, next_insn);
19641 can_issue_more--;
19642 }
19643
19644 *group_end = true;
19645 return 0;
19646 }
19647
19648 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
19649 {
19650 int n_nops = rs6000_sched_insert_nops;
19651
19652 /* Nops can't be issued from the branch slot, so the effective
19653 issue_rate for nops is 'issue_rate - 1'. */
19654 if (can_issue_more == 0)
19655 can_issue_more = issue_rate;
19656 can_issue_more--;
19657 if (can_issue_more == 0)
19658 {
19659 can_issue_more = issue_rate - 1;
19660 (*group_count)++;
19661 end = true;
19662 for (i = 0; i < issue_rate; i++)
19663 {
19664 group_insns[i] = 0;
19665 }
19666 }
19667
19668 while (n_nops > 0)
19669 {
19670 nop = gen_nop ();
19671 emit_insn_before (nop, next_insn);
19672 if (can_issue_more == issue_rate - 1) /* new group begins */
19673 end = false;
19674 can_issue_more--;
19675 if (can_issue_more == 0)
19676 {
19677 can_issue_more = issue_rate - 1;
19678 (*group_count)++;
19679 end = true;
19680 for (i = 0; i < issue_rate; i++)
19681 {
19682 group_insns[i] = 0;
19683 }
19684 }
19685 n_nops--;
19686 }
19687
19688 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
19689 can_issue_more++;
19690
19691 /* Is next_insn going to start a new group? */
19692 *group_end
19693 = (end
19694 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
19695 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
19696 || (can_issue_more < issue_rate &&
19697 insn_terminates_group_p (next_insn, previous_group)));
19698 if (*group_end && end)
19699 (*group_count)--;
19700
19701 if (sched_verbose > 6)
19702 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
19703 *group_count, can_issue_more);
19704 return can_issue_more;
19705 }
19706
19707 return can_issue_more;
19708 }
19709
19710 /* This function tries to synch the dispatch groups that the compiler "sees"
19711 with the dispatch groups that the processor dispatcher is expected to
19712 form in practice. It tries to achieve this synchronization by forcing the
19713 estimated processor grouping on the compiler (as opposed to the function
19714 'pad_goups' which tries to force the scheduler's grouping on the processor).
19715
19716 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
19717 examines the (estimated) dispatch groups that will be formed by the processor
19718 dispatcher. It marks these group boundaries to reflect the estimated
19719 processor grouping, overriding the grouping that the scheduler had marked.
19720 Depending on the value of the flag '-minsert-sched-nops' this function can
19721 force certain insns into separate groups or force a certain distance between
19722 them by inserting nops, for example, if there exists a "costly dependence"
19723 between the insns.
19724
19725 The function estimates the group boundaries that the processor will form as
19726 follows: It keeps track of how many vacant issue slots are available after
19727 each insn. A subsequent insn will start a new group if one of the following
19728 4 cases applies:
19729 - no more vacant issue slots remain in the current dispatch group.
19730 - only the last issue slot, which is the branch slot, is vacant, but the next
19731 insn is not a branch.
19732 - only the last 2 or less issue slots, including the branch slot, are vacant,
19733 which means that a cracked insn (which occupies two issue slots) can't be
19734 issued in this group.
19735 - less than 'issue_rate' slots are vacant, and the next insn always needs to
19736 start a new group. */
19737
19738 static int
19739 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
19740 rtx_insn *tail)
19741 {
19742 rtx_insn *insn, *next_insn;
19743 int issue_rate;
19744 int can_issue_more;
19745 int slot, i;
19746 bool group_end;
19747 int group_count = 0;
19748 rtx *group_insns;
19749
19750 /* Initialize. */
19751 issue_rate = rs6000_issue_rate ();
19752 group_insns = XALLOCAVEC (rtx, issue_rate);
19753 for (i = 0; i < issue_rate; i++)
19754 {
19755 group_insns[i] = 0;
19756 }
19757 can_issue_more = issue_rate;
19758 slot = 0;
19759 insn = get_next_active_insn (prev_head_insn, tail);
19760 group_end = false;
19761
19762 while (insn != NULL_RTX)
19763 {
19764 slot = (issue_rate - can_issue_more);
19765 group_insns[slot] = insn;
19766 can_issue_more =
19767 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
19768 if (insn_terminates_group_p (insn, current_group))
19769 can_issue_more = 0;
19770
19771 next_insn = get_next_active_insn (insn, tail);
19772 if (next_insn == NULL_RTX)
19773 return group_count + 1;
19774
19775 /* Is next_insn going to start a new group? */
19776 group_end
19777 = (can_issue_more == 0
19778 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
19779 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
19780 || (can_issue_more < issue_rate &&
19781 insn_terminates_group_p (next_insn, previous_group)));
19782
19783 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
19784 next_insn, &group_end, can_issue_more,
19785 &group_count);
19786
19787 if (group_end)
19788 {
19789 group_count++;
19790 can_issue_more = 0;
19791 for (i = 0; i < issue_rate; i++)
19792 {
19793 group_insns[i] = 0;
19794 }
19795 }
19796
19797 if (GET_MODE (next_insn) == TImode && can_issue_more)
19798 PUT_MODE (next_insn, VOIDmode);
19799 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
19800 PUT_MODE (next_insn, TImode);
19801
19802 insn = next_insn;
19803 if (can_issue_more == 0)
19804 can_issue_more = issue_rate;
19805 } /* while */
19806
19807 return group_count;
19808 }
19809
19810 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
19811 dispatch group boundaries that the scheduler had marked. Pad with nops
19812 any dispatch groups which have vacant issue slots, in order to force the
19813 scheduler's grouping on the processor dispatcher. The function
19814 returns the number of dispatch groups found. */
19815
19816 static int
19817 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
19818 rtx_insn *tail)
19819 {
19820 rtx_insn *insn, *next_insn;
19821 rtx nop;
19822 int issue_rate;
19823 int can_issue_more;
19824 int group_end;
19825 int group_count = 0;
19826
19827 /* Initialize issue_rate. */
19828 issue_rate = rs6000_issue_rate ();
19829 can_issue_more = issue_rate;
19830
19831 insn = get_next_active_insn (prev_head_insn, tail);
19832 next_insn = get_next_active_insn (insn, tail);
19833
19834 while (insn != NULL_RTX)
19835 {
19836 can_issue_more =
19837 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
19838
19839 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
19840
19841 if (next_insn == NULL_RTX)
19842 break;
19843
19844 if (group_end)
19845 {
19846 /* If the scheduler had marked group termination at this location
19847 (between insn and next_insn), and neither insn nor next_insn will
19848 force group termination, pad the group with nops to force group
19849 termination. */
19850 if (can_issue_more
19851 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
19852 && !insn_terminates_group_p (insn, current_group)
19853 && !insn_terminates_group_p (next_insn, previous_group))
19854 {
19855 if (!is_branch_slot_insn (next_insn))
19856 can_issue_more--;
19857
19858 while (can_issue_more)
19859 {
19860 nop = gen_nop ();
19861 emit_insn_before (nop, next_insn);
19862 can_issue_more--;
19863 }
19864 }
19865
19866 can_issue_more = issue_rate;
19867 group_count++;
19868 }
19869
19870 insn = next_insn;
19871 next_insn = get_next_active_insn (insn, tail);
19872 }
19873
19874 return group_count;
19875 }
19876
19877 /* We're beginning a new block. Initialize data structures as necessary. */
19878
19879 static void
19880 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
19881 int sched_verbose ATTRIBUTE_UNUSED,
19882 int max_ready ATTRIBUTE_UNUSED)
19883 {
19884 last_scheduled_insn = NULL;
19885 load_store_pendulum = 0;
19886 divide_cnt = 0;
19887 vec_pairing = 0;
19888 }
19889
19890 /* The following function is called at the end of scheduling BB.
19891 After reload, it inserts nops at insn group bundling. */
19892
19893 static void
19894 rs6000_sched_finish (FILE *dump, int sched_verbose)
19895 {
19896 int n_groups;
19897
19898 if (sched_verbose)
19899 fprintf (dump, "=== Finishing schedule.\n");
19900
19901 if (reload_completed && rs6000_sched_groups)
19902 {
19903 /* Do not run sched_finish hook when selective scheduling enabled. */
19904 if (sel_sched_p ())
19905 return;
19906
19907 if (rs6000_sched_insert_nops == sched_finish_none)
19908 return;
19909
19910 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
19911 n_groups = pad_groups (dump, sched_verbose,
19912 current_sched_info->prev_head,
19913 current_sched_info->next_tail);
19914 else
19915 n_groups = redefine_groups (dump, sched_verbose,
19916 current_sched_info->prev_head,
19917 current_sched_info->next_tail);
19918
19919 if (sched_verbose >= 6)
19920 {
19921 fprintf (dump, "ngroups = %d\n", n_groups);
19922 print_rtl (dump, current_sched_info->prev_head);
19923 fprintf (dump, "Done finish_sched\n");
19924 }
19925 }
19926 }
19927
19928 struct rs6000_sched_context
19929 {
19930 short cached_can_issue_more;
19931 rtx_insn *last_scheduled_insn;
19932 int load_store_pendulum;
19933 int divide_cnt;
19934 int vec_pairing;
19935 };
19936
19937 typedef struct rs6000_sched_context rs6000_sched_context_def;
19938 typedef rs6000_sched_context_def *rs6000_sched_context_t;
19939
19940 /* Allocate store for new scheduling context. */
19941 static void *
19942 rs6000_alloc_sched_context (void)
19943 {
19944 return xmalloc (sizeof (rs6000_sched_context_def));
19945 }
19946
19947 /* If CLEAN_P is true then initializes _SC with clean data,
19948 and from the global context otherwise. */
19949 static void
19950 rs6000_init_sched_context (void *_sc, bool clean_p)
19951 {
19952 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
19953
19954 if (clean_p)
19955 {
19956 sc->cached_can_issue_more = 0;
19957 sc->last_scheduled_insn = NULL;
19958 sc->load_store_pendulum = 0;
19959 sc->divide_cnt = 0;
19960 sc->vec_pairing = 0;
19961 }
19962 else
19963 {
19964 sc->cached_can_issue_more = cached_can_issue_more;
19965 sc->last_scheduled_insn = last_scheduled_insn;
19966 sc->load_store_pendulum = load_store_pendulum;
19967 sc->divide_cnt = divide_cnt;
19968 sc->vec_pairing = vec_pairing;
19969 }
19970 }
19971
19972 /* Sets the global scheduling context to the one pointed to by _SC. */
19973 static void
19974 rs6000_set_sched_context (void *_sc)
19975 {
19976 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
19977
19978 gcc_assert (sc != NULL);
19979
19980 cached_can_issue_more = sc->cached_can_issue_more;
19981 last_scheduled_insn = sc->last_scheduled_insn;
19982 load_store_pendulum = sc->load_store_pendulum;
19983 divide_cnt = sc->divide_cnt;
19984 vec_pairing = sc->vec_pairing;
19985 }
19986
19987 /* Free _SC. */
19988 static void
19989 rs6000_free_sched_context (void *_sc)
19990 {
19991 gcc_assert (_sc != NULL);
19992
19993 free (_sc);
19994 }
19995
19996 static bool
19997 rs6000_sched_can_speculate_insn (rtx_insn *insn)
19998 {
19999 switch (get_attr_type (insn))
20000 {
20001 case TYPE_DIV:
20002 case TYPE_SDIV:
20003 case TYPE_DDIV:
20004 case TYPE_VECDIV:
20005 case TYPE_SSQRT:
20006 case TYPE_DSQRT:
20007 return false;
20008
20009 default:
20010 return true;
20011 }
20012 }
20013 \f
20014 /* Length in units of the trampoline for entering a nested function. */
20015
20016 int
20017 rs6000_trampoline_size (void)
20018 {
20019 int ret = 0;
20020
20021 switch (DEFAULT_ABI)
20022 {
20023 default:
20024 gcc_unreachable ();
20025
20026 case ABI_AIX:
20027 ret = (TARGET_32BIT) ? 12 : 24;
20028 break;
20029
20030 case ABI_ELFv2:
20031 gcc_assert (!TARGET_32BIT);
20032 ret = 32;
20033 break;
20034
20035 case ABI_DARWIN:
20036 case ABI_V4:
20037 ret = (TARGET_32BIT) ? 40 : 48;
20038 break;
20039 }
20040
20041 return ret;
20042 }
20043
20044 /* Emit RTL insns to initialize the variable parts of a trampoline.
20045 FNADDR is an RTX for the address of the function's pure code.
20046 CXT is an RTX for the static chain value for the function. */
20047
20048 static void
20049 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
20050 {
20051 int regsize = (TARGET_32BIT) ? 4 : 8;
20052 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
20053 rtx ctx_reg = force_reg (Pmode, cxt);
20054 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
20055
20056 switch (DEFAULT_ABI)
20057 {
20058 default:
20059 gcc_unreachable ();
20060
20061 /* Under AIX, just build the 3 word function descriptor */
20062 case ABI_AIX:
20063 {
20064 rtx fnmem, fn_reg, toc_reg;
20065
20066 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
20067 error ("you cannot take the address of a nested function if you use "
20068 "the %qs option", "-mno-pointers-to-nested-functions");
20069
20070 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
20071 fn_reg = gen_reg_rtx (Pmode);
20072 toc_reg = gen_reg_rtx (Pmode);
20073
20074 /* Macro to shorten the code expansions below. */
20075 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
20076
20077 m_tramp = replace_equiv_address (m_tramp, addr);
20078
20079 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
20080 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
20081 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
20082 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
20083 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
20084
20085 # undef MEM_PLUS
20086 }
20087 break;
20088
20089 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
20090 case ABI_ELFv2:
20091 case ABI_DARWIN:
20092 case ABI_V4:
20093 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
20094 LCT_NORMAL, VOIDmode,
20095 addr, Pmode,
20096 GEN_INT (rs6000_trampoline_size ()), SImode,
20097 fnaddr, Pmode,
20098 ctx_reg, Pmode);
20099 break;
20100 }
20101 }
20102
20103 \f
20104 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
20105 identifier as an argument, so the front end shouldn't look it up. */
20106
20107 static bool
20108 rs6000_attribute_takes_identifier_p (const_tree attr_id)
20109 {
20110 return is_attribute_p ("altivec", attr_id);
20111 }
20112
20113 /* Handle the "altivec" attribute. The attribute may have
20114 arguments as follows:
20115
20116 __attribute__((altivec(vector__)))
20117 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
20118 __attribute__((altivec(bool__))) (always followed by 'unsigned')
20119
20120 and may appear more than once (e.g., 'vector bool char') in a
20121 given declaration. */
20122
20123 static tree
20124 rs6000_handle_altivec_attribute (tree *node,
20125 tree name ATTRIBUTE_UNUSED,
20126 tree args,
20127 int flags ATTRIBUTE_UNUSED,
20128 bool *no_add_attrs)
20129 {
20130 tree type = *node, result = NULL_TREE;
20131 machine_mode mode;
20132 int unsigned_p;
20133 char altivec_type
20134 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
20135 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
20136 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
20137 : '?');
20138
20139 while (POINTER_TYPE_P (type)
20140 || TREE_CODE (type) == FUNCTION_TYPE
20141 || TREE_CODE (type) == METHOD_TYPE
20142 || TREE_CODE (type) == ARRAY_TYPE)
20143 type = TREE_TYPE (type);
20144
20145 mode = TYPE_MODE (type);
20146
20147 /* Check for invalid AltiVec type qualifiers. */
20148 if (type == long_double_type_node)
20149 error ("use of %<long double%> in AltiVec types is invalid");
20150 else if (type == boolean_type_node)
20151 error ("use of boolean types in AltiVec types is invalid");
20152 else if (TREE_CODE (type) == COMPLEX_TYPE)
20153 error ("use of %<complex%> in AltiVec types is invalid");
20154 else if (DECIMAL_FLOAT_MODE_P (mode))
20155 error ("use of decimal floating-point types in AltiVec types is invalid");
20156 else if (!TARGET_VSX)
20157 {
20158 if (type == long_unsigned_type_node || type == long_integer_type_node)
20159 {
20160 if (TARGET_64BIT)
20161 error ("use of %<long%> in AltiVec types is invalid for "
20162 "64-bit code without %qs", "-mvsx");
20163 else if (rs6000_warn_altivec_long)
20164 warning (0, "use of %<long%> in AltiVec types is deprecated; "
20165 "use %<int%>");
20166 }
20167 else if (type == long_long_unsigned_type_node
20168 || type == long_long_integer_type_node)
20169 error ("use of %<long long%> in AltiVec types is invalid without %qs",
20170 "-mvsx");
20171 else if (type == double_type_node)
20172 error ("use of %<double%> in AltiVec types is invalid without %qs",
20173 "-mvsx");
20174 }
20175
20176 switch (altivec_type)
20177 {
20178 case 'v':
20179 unsigned_p = TYPE_UNSIGNED (type);
20180 switch (mode)
20181 {
20182 case E_TImode:
20183 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
20184 break;
20185 case E_DImode:
20186 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
20187 break;
20188 case E_SImode:
20189 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
20190 break;
20191 case E_HImode:
20192 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
20193 break;
20194 case E_QImode:
20195 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
20196 break;
20197 case E_SFmode: result = V4SF_type_node; break;
20198 case E_DFmode: result = V2DF_type_node; break;
20199 /* If the user says 'vector int bool', we may be handed the 'bool'
20200 attribute _before_ the 'vector' attribute, and so select the
20201 proper type in the 'b' case below. */
20202 case E_V4SImode: case E_V8HImode: case E_V16QImode: case E_V4SFmode:
20203 case E_V2DImode: case E_V2DFmode:
20204 result = type;
20205 default: break;
20206 }
20207 break;
20208 case 'b':
20209 switch (mode)
20210 {
20211 case E_TImode: case E_V1TImode: result = bool_V1TI_type_node; break;
20212 case E_DImode: case E_V2DImode: result = bool_V2DI_type_node; break;
20213 case E_SImode: case E_V4SImode: result = bool_V4SI_type_node; break;
20214 case E_HImode: case E_V8HImode: result = bool_V8HI_type_node; break;
20215 case E_QImode: case E_V16QImode: result = bool_V16QI_type_node;
20216 default: break;
20217 }
20218 break;
20219 case 'p':
20220 switch (mode)
20221 {
20222 case E_V8HImode: result = pixel_V8HI_type_node;
20223 default: break;
20224 }
20225 default: break;
20226 }
20227
20228 /* Propagate qualifiers attached to the element type
20229 onto the vector type. */
20230 if (result && result != type && TYPE_QUALS (type))
20231 result = build_qualified_type (result, TYPE_QUALS (type));
20232
20233 *no_add_attrs = true; /* No need to hang on to the attribute. */
20234
20235 if (result)
20236 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
20237
20238 return NULL_TREE;
20239 }
20240
20241 /* AltiVec defines five built-in scalar types that serve as vector
20242 elements; we must teach the compiler how to mangle them. The 128-bit
20243 floating point mangling is target-specific as well. MMA defines
20244 two built-in types to be used as opaque vector types. */
20245
20246 static const char *
20247 rs6000_mangle_type (const_tree type)
20248 {
20249 type = TYPE_MAIN_VARIANT (type);
20250
20251 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
20252 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE
20253 && TREE_CODE (type) != OPAQUE_TYPE)
20254 return NULL;
20255
20256 if (type == bool_char_type_node) return "U6__boolc";
20257 if (type == bool_short_type_node) return "U6__bools";
20258 if (type == pixel_type_node) return "u7__pixel";
20259 if (type == bool_int_type_node) return "U6__booli";
20260 if (type == bool_long_long_type_node) return "U6__boolx";
20261
20262 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IBM_P (TYPE_MODE (type)))
20263 return "g";
20264 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IEEE_P (TYPE_MODE (type)))
20265 return ieee128_mangling_gcc_8_1 ? "U10__float128" : "u9__ieee128";
20266
20267 if (type == vector_pair_type_node)
20268 return "u13__vector_pair";
20269 if (type == vector_quad_type_node)
20270 return "u13__vector_quad";
20271
20272 /* For all other types, use the default mangling. */
20273 return NULL;
20274 }
20275
20276 /* Handle a "longcall" or "shortcall" attribute; arguments as in
20277 struct attribute_spec.handler. */
20278
20279 static tree
20280 rs6000_handle_longcall_attribute (tree *node, tree name,
20281 tree args ATTRIBUTE_UNUSED,
20282 int flags ATTRIBUTE_UNUSED,
20283 bool *no_add_attrs)
20284 {
20285 if (TREE_CODE (*node) != FUNCTION_TYPE
20286 && TREE_CODE (*node) != FIELD_DECL
20287 && TREE_CODE (*node) != TYPE_DECL)
20288 {
20289 warning (OPT_Wattributes, "%qE attribute only applies to functions",
20290 name);
20291 *no_add_attrs = true;
20292 }
20293
20294 return NULL_TREE;
20295 }
20296
20297 /* Set longcall attributes on all functions declared when
20298 rs6000_default_long_calls is true. */
20299 static void
20300 rs6000_set_default_type_attributes (tree type)
20301 {
20302 if (rs6000_default_long_calls
20303 && (TREE_CODE (type) == FUNCTION_TYPE
20304 || TREE_CODE (type) == METHOD_TYPE))
20305 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
20306 NULL_TREE,
20307 TYPE_ATTRIBUTES (type));
20308
20309 #if TARGET_MACHO
20310 darwin_set_default_type_attributes (type);
20311 #endif
20312 }
20313
20314 /* Return a reference suitable for calling a function with the
20315 longcall attribute. */
20316
20317 static rtx
20318 rs6000_longcall_ref (rtx call_ref, rtx arg)
20319 {
20320 /* System V adds '.' to the internal name, so skip them. */
20321 const char *call_name = XSTR (call_ref, 0);
20322 if (*call_name == '.')
20323 {
20324 while (*call_name == '.')
20325 call_name++;
20326
20327 tree node = get_identifier (call_name);
20328 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
20329 }
20330
20331 if (TARGET_PLTSEQ)
20332 {
20333 rtx base = const0_rtx;
20334 int regno = 12;
20335 if (rs6000_pcrel_p ())
20336 {
20337 rtx reg = gen_rtx_REG (Pmode, regno);
20338 rtx u = gen_rtx_UNSPEC_VOLATILE (Pmode,
20339 gen_rtvec (3, base, call_ref, arg),
20340 UNSPECV_PLT_PCREL);
20341 emit_insn (gen_rtx_SET (reg, u));
20342 return reg;
20343 }
20344
20345 if (DEFAULT_ABI == ABI_ELFv2)
20346 base = gen_rtx_REG (Pmode, TOC_REGISTER);
20347 else
20348 {
20349 if (flag_pic)
20350 base = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
20351 regno = 11;
20352 }
20353 /* Reg must match that used by linker PLT stubs. For ELFv2, r12
20354 may be used by a function global entry point. For SysV4, r11
20355 is used by __glink_PLTresolve lazy resolver entry. */
20356 rtx reg = gen_rtx_REG (Pmode, regno);
20357 rtx hi = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, base, call_ref, arg),
20358 UNSPEC_PLT16_HA);
20359 rtx lo = gen_rtx_UNSPEC_VOLATILE (Pmode,
20360 gen_rtvec (3, reg, call_ref, arg),
20361 UNSPECV_PLT16_LO);
20362 emit_insn (gen_rtx_SET (reg, hi));
20363 emit_insn (gen_rtx_SET (reg, lo));
20364 return reg;
20365 }
20366
20367 return force_reg (Pmode, call_ref);
20368 }
20369 \f
20370 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
20371 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
20372 #endif
20373
20374 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
20375 struct attribute_spec.handler. */
20376 static tree
20377 rs6000_handle_struct_attribute (tree *node, tree name,
20378 tree args ATTRIBUTE_UNUSED,
20379 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
20380 {
20381 tree *type = NULL;
20382 if (DECL_P (*node))
20383 {
20384 if (TREE_CODE (*node) == TYPE_DECL)
20385 type = &TREE_TYPE (*node);
20386 }
20387 else
20388 type = node;
20389
20390 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
20391 || TREE_CODE (*type) == UNION_TYPE)))
20392 {
20393 warning (OPT_Wattributes, "%qE attribute ignored", name);
20394 *no_add_attrs = true;
20395 }
20396
20397 else if ((is_attribute_p ("ms_struct", name)
20398 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
20399 || ((is_attribute_p ("gcc_struct", name)
20400 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
20401 {
20402 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
20403 name);
20404 *no_add_attrs = true;
20405 }
20406
20407 return NULL_TREE;
20408 }
20409
20410 static bool
20411 rs6000_ms_bitfield_layout_p (const_tree record_type)
20412 {
20413 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
20414 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
20415 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
20416 }
20417 \f
20418 #ifdef USING_ELFOS_H
20419
20420 /* A get_unnamed_section callback, used for switching to toc_section. */
20421
20422 static void
20423 rs6000_elf_output_toc_section_asm_op (const char *data ATTRIBUTE_UNUSED)
20424 {
20425 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
20426 && TARGET_MINIMAL_TOC)
20427 {
20428 if (!toc_initialized)
20429 {
20430 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
20431 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20432 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
20433 fprintf (asm_out_file, "\t.tc ");
20434 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
20435 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
20436 fprintf (asm_out_file, "\n");
20437
20438 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
20439 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20440 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
20441 fprintf (asm_out_file, " = .+32768\n");
20442 toc_initialized = 1;
20443 }
20444 else
20445 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
20446 }
20447 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
20448 {
20449 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
20450 if (!toc_initialized)
20451 {
20452 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20453 toc_initialized = 1;
20454 }
20455 }
20456 else
20457 {
20458 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
20459 if (!toc_initialized)
20460 {
20461 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20462 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
20463 fprintf (asm_out_file, " = .+32768\n");
20464 toc_initialized = 1;
20465 }
20466 }
20467 }
20468
20469 /* Implement TARGET_ASM_INIT_SECTIONS. */
20470
20471 static void
20472 rs6000_elf_asm_init_sections (void)
20473 {
20474 toc_section
20475 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
20476
20477 sdata2_section
20478 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
20479 SDATA2_SECTION_ASM_OP);
20480 }
20481
20482 /* Implement TARGET_SELECT_RTX_SECTION. */
20483
20484 static section *
20485 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
20486 unsigned HOST_WIDE_INT align)
20487 {
20488 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
20489 return toc_section;
20490 else
20491 return default_elf_select_rtx_section (mode, x, align);
20492 }
20493 \f
20494 /* For a SYMBOL_REF, set generic flags and then perform some
20495 target-specific processing.
20496
20497 When the AIX ABI is requested on a non-AIX system, replace the
20498 function name with the real name (with a leading .) rather than the
20499 function descriptor name. This saves a lot of overriding code to
20500 read the prefixes. */
20501
20502 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
20503 static void
20504 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
20505 {
20506 default_encode_section_info (decl, rtl, first);
20507
20508 if (first
20509 && TREE_CODE (decl) == FUNCTION_DECL
20510 && !TARGET_AIX
20511 && DEFAULT_ABI == ABI_AIX)
20512 {
20513 rtx sym_ref = XEXP (rtl, 0);
20514 size_t len = strlen (XSTR (sym_ref, 0));
20515 char *str = XALLOCAVEC (char, len + 2);
20516 str[0] = '.';
20517 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
20518 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
20519 }
20520 }
20521
20522 static inline bool
20523 compare_section_name (const char *section, const char *templ)
20524 {
20525 int len;
20526
20527 len = strlen (templ);
20528 return (strncmp (section, templ, len) == 0
20529 && (section[len] == 0 || section[len] == '.'));
20530 }
20531
20532 bool
20533 rs6000_elf_in_small_data_p (const_tree decl)
20534 {
20535 if (rs6000_sdata == SDATA_NONE)
20536 return false;
20537
20538 /* We want to merge strings, so we never consider them small data. */
20539 if (TREE_CODE (decl) == STRING_CST)
20540 return false;
20541
20542 /* Functions are never in the small data area. */
20543 if (TREE_CODE (decl) == FUNCTION_DECL)
20544 return false;
20545
20546 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
20547 {
20548 const char *section = DECL_SECTION_NAME (decl);
20549 if (compare_section_name (section, ".sdata")
20550 || compare_section_name (section, ".sdata2")
20551 || compare_section_name (section, ".gnu.linkonce.s")
20552 || compare_section_name (section, ".sbss")
20553 || compare_section_name (section, ".sbss2")
20554 || compare_section_name (section, ".gnu.linkonce.sb")
20555 || strcmp (section, ".PPC.EMB.sdata0") == 0
20556 || strcmp (section, ".PPC.EMB.sbss0") == 0)
20557 return true;
20558 }
20559 else
20560 {
20561 /* If we are told not to put readonly data in sdata, then don't. */
20562 if (TREE_READONLY (decl) && rs6000_sdata != SDATA_EABI
20563 && !rs6000_readonly_in_sdata)
20564 return false;
20565
20566 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
20567
20568 if (size > 0
20569 && size <= g_switch_value
20570 /* If it's not public, and we're not going to reference it there,
20571 there's no need to put it in the small data section. */
20572 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
20573 return true;
20574 }
20575
20576 return false;
20577 }
20578
20579 #endif /* USING_ELFOS_H */
20580 \f
20581 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
20582
20583 static bool
20584 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
20585 {
20586 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
20587 }
20588
20589 /* Do not place thread-local symbols refs in the object blocks. */
20590
20591 static bool
20592 rs6000_use_blocks_for_decl_p (const_tree decl)
20593 {
20594 return !DECL_THREAD_LOCAL_P (decl);
20595 }
20596 \f
20597 /* Return a REG that occurs in ADDR with coefficient 1.
20598 ADDR can be effectively incremented by incrementing REG.
20599
20600 r0 is special and we must not select it as an address
20601 register by this routine since our caller will try to
20602 increment the returned register via an "la" instruction. */
20603
20604 rtx
20605 find_addr_reg (rtx addr)
20606 {
20607 while (GET_CODE (addr) == PLUS)
20608 {
20609 if (REG_P (XEXP (addr, 0))
20610 && REGNO (XEXP (addr, 0)) != 0)
20611 addr = XEXP (addr, 0);
20612 else if (REG_P (XEXP (addr, 1))
20613 && REGNO (XEXP (addr, 1)) != 0)
20614 addr = XEXP (addr, 1);
20615 else if (CONSTANT_P (XEXP (addr, 0)))
20616 addr = XEXP (addr, 1);
20617 else if (CONSTANT_P (XEXP (addr, 1)))
20618 addr = XEXP (addr, 0);
20619 else
20620 gcc_unreachable ();
20621 }
20622 gcc_assert (REG_P (addr) && REGNO (addr) != 0);
20623 return addr;
20624 }
20625
20626 void
20627 rs6000_fatal_bad_address (rtx op)
20628 {
20629 fatal_insn ("bad address", op);
20630 }
20631
20632 #if TARGET_MACHO
20633
20634 vec<branch_island, va_gc> *branch_islands;
20635
20636 /* Remember to generate a branch island for far calls to the given
20637 function. */
20638
20639 static void
20640 add_compiler_branch_island (tree label_name, tree function_name,
20641 int line_number)
20642 {
20643 branch_island bi = {function_name, label_name, line_number};
20644 vec_safe_push (branch_islands, bi);
20645 }
20646
20647 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
20648 already there or not. */
20649
20650 static int
20651 no_previous_def (tree function_name)
20652 {
20653 branch_island *bi;
20654 unsigned ix;
20655
20656 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
20657 if (function_name == bi->function_name)
20658 return 0;
20659 return 1;
20660 }
20661
20662 /* GET_PREV_LABEL gets the label name from the previous definition of
20663 the function. */
20664
20665 static tree
20666 get_prev_label (tree function_name)
20667 {
20668 branch_island *bi;
20669 unsigned ix;
20670
20671 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
20672 if (function_name == bi->function_name)
20673 return bi->label_name;
20674 return NULL_TREE;
20675 }
20676
20677 /* Generate external symbol indirection stubs (PIC and non-PIC). */
20678
20679 void
20680 machopic_output_stub (FILE *file, const char *symb, const char *stub)
20681 {
20682 unsigned int length;
20683 char *symbol_name, *lazy_ptr_name;
20684 char *local_label_0;
20685 static unsigned label = 0;
20686
20687 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
20688 symb = (*targetm.strip_name_encoding) (symb);
20689
20690 length = strlen (symb);
20691 symbol_name = XALLOCAVEC (char, length + 32);
20692 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
20693
20694 lazy_ptr_name = XALLOCAVEC (char, length + 32);
20695 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
20696
20697 if (MACHOPIC_PURE)
20698 {
20699 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
20700 fprintf (file, "\t.align 5\n");
20701
20702 fprintf (file, "%s:\n", stub);
20703 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20704
20705 label++;
20706 local_label_0 = XALLOCAVEC (char, 16);
20707 sprintf (local_label_0, "L%u$spb", label);
20708
20709 fprintf (file, "\tmflr r0\n");
20710 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
20711 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
20712 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
20713 lazy_ptr_name, local_label_0);
20714 fprintf (file, "\tmtlr r0\n");
20715 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
20716 (TARGET_64BIT ? "ldu" : "lwzu"),
20717 lazy_ptr_name, local_label_0);
20718 fprintf (file, "\tmtctr r12\n");
20719 fprintf (file, "\tbctr\n");
20720 }
20721 else /* mdynamic-no-pic or mkernel. */
20722 {
20723 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
20724 fprintf (file, "\t.align 4\n");
20725
20726 fprintf (file, "%s:\n", stub);
20727 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20728
20729 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
20730 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
20731 (TARGET_64BIT ? "ldu" : "lwzu"),
20732 lazy_ptr_name);
20733 fprintf (file, "\tmtctr r12\n");
20734 fprintf (file, "\tbctr\n");
20735 }
20736
20737 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
20738 fprintf (file, "%s:\n", lazy_ptr_name);
20739 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20740 fprintf (file, "%sdyld_stub_binding_helper\n",
20741 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
20742 }
20743
20744 /* Legitimize PIC addresses. If the address is already
20745 position-independent, we return ORIG. Newly generated
20746 position-independent addresses go into a reg. This is REG if non
20747 zero, otherwise we allocate register(s) as necessary. */
20748
20749 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
20750
20751 rtx
20752 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
20753 rtx reg)
20754 {
20755 rtx base, offset;
20756
20757 if (reg == NULL && !reload_completed)
20758 reg = gen_reg_rtx (Pmode);
20759
20760 if (GET_CODE (orig) == CONST)
20761 {
20762 rtx reg_temp;
20763
20764 if (GET_CODE (XEXP (orig, 0)) == PLUS
20765 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
20766 return orig;
20767
20768 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
20769
20770 /* Use a different reg for the intermediate value, as
20771 it will be marked UNCHANGING. */
20772 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
20773 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
20774 Pmode, reg_temp);
20775 offset =
20776 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
20777 Pmode, reg);
20778
20779 if (CONST_INT_P (offset))
20780 {
20781 if (SMALL_INT (offset))
20782 return plus_constant (Pmode, base, INTVAL (offset));
20783 else if (!reload_completed)
20784 offset = force_reg (Pmode, offset);
20785 else
20786 {
20787 rtx mem = force_const_mem (Pmode, orig);
20788 return machopic_legitimize_pic_address (mem, Pmode, reg);
20789 }
20790 }
20791 return gen_rtx_PLUS (Pmode, base, offset);
20792 }
20793
20794 /* Fall back on generic machopic code. */
20795 return machopic_legitimize_pic_address (orig, mode, reg);
20796 }
20797
20798 /* Output a .machine directive for the Darwin assembler, and call
20799 the generic start_file routine. */
20800
20801 static void
20802 rs6000_darwin_file_start (void)
20803 {
20804 static const struct
20805 {
20806 const char *arg;
20807 const char *name;
20808 HOST_WIDE_INT if_set;
20809 } mapping[] = {
20810 { "ppc64", "ppc64", MASK_64BIT },
20811 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
20812 { "power4", "ppc970", 0 },
20813 { "G5", "ppc970", 0 },
20814 { "7450", "ppc7450", 0 },
20815 { "7400", "ppc7400", MASK_ALTIVEC },
20816 { "G4", "ppc7400", 0 },
20817 { "750", "ppc750", 0 },
20818 { "740", "ppc750", 0 },
20819 { "G3", "ppc750", 0 },
20820 { "604e", "ppc604e", 0 },
20821 { "604", "ppc604", 0 },
20822 { "603e", "ppc603", 0 },
20823 { "603", "ppc603", 0 },
20824 { "601", "ppc601", 0 },
20825 { NULL, "ppc", 0 } };
20826 const char *cpu_id = "";
20827 size_t i;
20828
20829 rs6000_file_start ();
20830 darwin_file_start ();
20831
20832 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
20833
20834 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
20835 cpu_id = rs6000_default_cpu;
20836
20837 if (OPTION_SET_P (rs6000_cpu_index))
20838 cpu_id = processor_target_table[rs6000_cpu_index].name;
20839
20840 /* Look through the mapping array. Pick the first name that either
20841 matches the argument, has a bit set in IF_SET that is also set
20842 in the target flags, or has a NULL name. */
20843
20844 i = 0;
20845 while (mapping[i].arg != NULL
20846 && strcmp (mapping[i].arg, cpu_id) != 0
20847 && (mapping[i].if_set & rs6000_isa_flags) == 0)
20848 i++;
20849
20850 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
20851 }
20852
20853 #endif /* TARGET_MACHO */
20854
20855 #if TARGET_ELF
20856 static int
20857 rs6000_elf_reloc_rw_mask (void)
20858 {
20859 if (flag_pic)
20860 return 3;
20861 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
20862 return 2;
20863 else
20864 return 0;
20865 }
20866
20867 /* Record an element in the table of global constructors. SYMBOL is
20868 a SYMBOL_REF of the function to be called; PRIORITY is a number
20869 between 0 and MAX_INIT_PRIORITY.
20870
20871 This differs from default_named_section_asm_out_constructor in
20872 that we have special handling for -mrelocatable. */
20873
20874 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
20875 static void
20876 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
20877 {
20878 const char *section = ".ctors";
20879 char buf[18];
20880
20881 if (priority != DEFAULT_INIT_PRIORITY)
20882 {
20883 sprintf (buf, ".ctors.%.5u",
20884 /* Invert the numbering so the linker puts us in the proper
20885 order; constructors are run from right to left, and the
20886 linker sorts in increasing order. */
20887 MAX_INIT_PRIORITY - priority);
20888 section = buf;
20889 }
20890
20891 switch_to_section (get_section (section, SECTION_WRITE, NULL));
20892 assemble_align (POINTER_SIZE);
20893
20894 if (DEFAULT_ABI == ABI_V4
20895 && (TARGET_RELOCATABLE || flag_pic > 1))
20896 {
20897 fputs ("\t.long (", asm_out_file);
20898 output_addr_const (asm_out_file, symbol);
20899 fputs (")@fixup\n", asm_out_file);
20900 }
20901 else
20902 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
20903 }
20904
20905 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
20906 static void
20907 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
20908 {
20909 const char *section = ".dtors";
20910 char buf[18];
20911
20912 if (priority != DEFAULT_INIT_PRIORITY)
20913 {
20914 sprintf (buf, ".dtors.%.5u",
20915 /* Invert the numbering so the linker puts us in the proper
20916 order; constructors are run from right to left, and the
20917 linker sorts in increasing order. */
20918 MAX_INIT_PRIORITY - priority);
20919 section = buf;
20920 }
20921
20922 switch_to_section (get_section (section, SECTION_WRITE, NULL));
20923 assemble_align (POINTER_SIZE);
20924
20925 if (DEFAULT_ABI == ABI_V4
20926 && (TARGET_RELOCATABLE || flag_pic > 1))
20927 {
20928 fputs ("\t.long (", asm_out_file);
20929 output_addr_const (asm_out_file, symbol);
20930 fputs (")@fixup\n", asm_out_file);
20931 }
20932 else
20933 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
20934 }
20935
20936 void
20937 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
20938 {
20939 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
20940 {
20941 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
20942 ASM_OUTPUT_LABEL (file, name);
20943 fputs (DOUBLE_INT_ASM_OP, file);
20944 rs6000_output_function_entry (file, name);
20945 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
20946 if (DOT_SYMBOLS)
20947 {
20948 fputs ("\t.size\t", file);
20949 assemble_name (file, name);
20950 fputs (",24\n\t.type\t.", file);
20951 assemble_name (file, name);
20952 fputs (",@function\n", file);
20953 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
20954 {
20955 fputs ("\t.globl\t.", file);
20956 assemble_name (file, name);
20957 putc ('\n', file);
20958 }
20959 }
20960 else
20961 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
20962 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
20963 rs6000_output_function_entry (file, name);
20964 fputs (":\n", file);
20965 return;
20966 }
20967
20968 int uses_toc;
20969 if (DEFAULT_ABI == ABI_V4
20970 && (TARGET_RELOCATABLE || flag_pic > 1)
20971 && !TARGET_SECURE_PLT
20972 && (!constant_pool_empty_p () || crtl->profile)
20973 && (uses_toc = uses_TOC ()))
20974 {
20975 char buf[256];
20976
20977 if (uses_toc == 2)
20978 switch_to_other_text_partition ();
20979 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
20980
20981 fprintf (file, "\t.long ");
20982 assemble_name (file, toc_label_name);
20983 need_toc_init = 1;
20984 putc ('-', file);
20985 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
20986 assemble_name (file, buf);
20987 putc ('\n', file);
20988 if (uses_toc == 2)
20989 switch_to_other_text_partition ();
20990 }
20991
20992 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
20993 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
20994
20995 if (TARGET_CMODEL == CMODEL_LARGE
20996 && rs6000_global_entry_point_prologue_needed_p ())
20997 {
20998 char buf[256];
20999
21000 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
21001
21002 fprintf (file, "\t.quad .TOC.-");
21003 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
21004 assemble_name (file, buf);
21005 putc ('\n', file);
21006 }
21007
21008 if (DEFAULT_ABI == ABI_AIX)
21009 {
21010 const char *desc_name, *orig_name;
21011
21012 orig_name = (*targetm.strip_name_encoding) (name);
21013 desc_name = orig_name;
21014 while (*desc_name == '.')
21015 desc_name++;
21016
21017 if (TREE_PUBLIC (decl))
21018 fprintf (file, "\t.globl %s\n", desc_name);
21019
21020 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
21021 fprintf (file, "%s:\n", desc_name);
21022 fprintf (file, "\t.long %s\n", orig_name);
21023 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
21024 fputs ("\t.long 0\n", file);
21025 fprintf (file, "\t.previous\n");
21026 }
21027 ASM_OUTPUT_LABEL (file, name);
21028 }
21029
21030 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
21031 static void
21032 rs6000_elf_file_end (void)
21033 {
21034 #ifdef HAVE_AS_GNU_ATTRIBUTE
21035 /* ??? The value emitted depends on options active at file end.
21036 Assume anyone using #pragma or attributes that might change
21037 options knows what they are doing. */
21038 if ((TARGET_64BIT || DEFAULT_ABI == ABI_V4)
21039 && rs6000_passes_float)
21040 {
21041 int fp;
21042
21043 if (TARGET_HARD_FLOAT)
21044 fp = 1;
21045 else
21046 fp = 2;
21047 if (rs6000_passes_long_double)
21048 {
21049 if (!TARGET_LONG_DOUBLE_128)
21050 fp |= 2 * 4;
21051 else if (TARGET_IEEEQUAD)
21052 fp |= 3 * 4;
21053 else
21054 fp |= 1 * 4;
21055 }
21056 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", fp);
21057 }
21058 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
21059 {
21060 if (rs6000_passes_vector)
21061 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
21062 (TARGET_ALTIVEC_ABI ? 2 : 1));
21063 if (rs6000_returns_struct)
21064 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
21065 aix_struct_return ? 2 : 1);
21066 }
21067 #endif
21068 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
21069 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
21070 file_end_indicate_exec_stack ();
21071 #endif
21072
21073 if (flag_split_stack)
21074 file_end_indicate_split_stack ();
21075
21076 if (cpu_builtin_p)
21077 {
21078 /* We have expanded a CPU builtin, so we need to emit a reference to
21079 the special symbol that LIBC uses to declare it supports the
21080 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
21081 switch_to_section (data_section);
21082 fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3);
21083 fprintf (asm_out_file, "\t%s %s\n",
21084 TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol);
21085 }
21086 }
21087 #endif
21088
21089 #if TARGET_XCOFF
21090
21091 #ifndef HAVE_XCOFF_DWARF_EXTRAS
21092 #define HAVE_XCOFF_DWARF_EXTRAS 0
21093 #endif
21094
21095 static enum unwind_info_type
21096 rs6000_xcoff_debug_unwind_info (void)
21097 {
21098 return UI_NONE;
21099 }
21100
21101 static void
21102 rs6000_xcoff_asm_output_anchor (rtx symbol)
21103 {
21104 char buffer[100];
21105
21106 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
21107 SYMBOL_REF_BLOCK_OFFSET (symbol));
21108 fprintf (asm_out_file, "%s", SET_ASM_OP);
21109 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
21110 fprintf (asm_out_file, ",");
21111 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
21112 fprintf (asm_out_file, "\n");
21113 }
21114
21115 static void
21116 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
21117 {
21118 fputs (GLOBAL_ASM_OP, stream);
21119 RS6000_OUTPUT_BASENAME (stream, name);
21120 putc ('\n', stream);
21121 }
21122
21123 /* A get_unnamed_decl callback, used for read-only sections. PTR
21124 points to the section string variable. */
21125
21126 static void
21127 rs6000_xcoff_output_readonly_section_asm_op (const char *directive)
21128 {
21129 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
21130 directive
21131 ? xcoff_private_rodata_section_name
21132 : xcoff_read_only_section_name,
21133 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
21134 }
21135
21136 /* Likewise for read-write sections. */
21137
21138 static void
21139 rs6000_xcoff_output_readwrite_section_asm_op (const char *)
21140 {
21141 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
21142 xcoff_private_data_section_name,
21143 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
21144 }
21145
21146 static void
21147 rs6000_xcoff_output_tls_section_asm_op (const char *directive)
21148 {
21149 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
21150 directive
21151 ? xcoff_private_data_section_name
21152 : xcoff_tls_data_section_name,
21153 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
21154 }
21155
21156 /* A get_unnamed_section callback, used for switching to toc_section. */
21157
21158 static void
21159 rs6000_xcoff_output_toc_section_asm_op (const char *data ATTRIBUTE_UNUSED)
21160 {
21161 if (TARGET_MINIMAL_TOC)
21162 {
21163 /* toc_section is always selected at least once from
21164 rs6000_xcoff_file_start, so this is guaranteed to
21165 always be defined once and only once in each file. */
21166 if (!toc_initialized)
21167 {
21168 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
21169 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
21170 toc_initialized = 1;
21171 }
21172 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
21173 (TARGET_32BIT ? "" : ",3"));
21174 }
21175 else
21176 fputs ("\t.toc\n", asm_out_file);
21177 }
21178
21179 /* Implement TARGET_ASM_INIT_SECTIONS. */
21180
21181 static void
21182 rs6000_xcoff_asm_init_sections (void)
21183 {
21184 read_only_data_section
21185 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
21186 NULL);
21187
21188 private_data_section
21189 = get_unnamed_section (SECTION_WRITE,
21190 rs6000_xcoff_output_readwrite_section_asm_op,
21191 NULL);
21192
21193 read_only_private_data_section
21194 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
21195 "");
21196
21197 tls_data_section
21198 = get_unnamed_section (SECTION_TLS,
21199 rs6000_xcoff_output_tls_section_asm_op,
21200 NULL);
21201
21202 tls_private_data_section
21203 = get_unnamed_section (SECTION_TLS,
21204 rs6000_xcoff_output_tls_section_asm_op,
21205 "");
21206
21207 toc_section
21208 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
21209
21210 readonly_data_section = read_only_data_section;
21211 }
21212
21213 static int
21214 rs6000_xcoff_reloc_rw_mask (void)
21215 {
21216 return 3;
21217 }
21218
21219 static void
21220 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
21221 tree decl ATTRIBUTE_UNUSED)
21222 {
21223 int smclass;
21224 static const char * const suffix[7]
21225 = { "PR", "RO", "RW", "BS", "TL", "UL", "XO" };
21226
21227 if (flags & SECTION_EXCLUDE)
21228 smclass = 6;
21229 else if (flags & SECTION_DEBUG)
21230 {
21231 fprintf (asm_out_file, "\t.dwsect %s\n", name);
21232 return;
21233 }
21234 else if (flags & SECTION_CODE)
21235 smclass = 0;
21236 else if (flags & SECTION_TLS)
21237 {
21238 if (flags & SECTION_BSS)
21239 smclass = 5;
21240 else
21241 smclass = 4;
21242 }
21243 else if (flags & SECTION_WRITE)
21244 {
21245 if (flags & SECTION_BSS)
21246 smclass = 3;
21247 else
21248 smclass = 2;
21249 }
21250 else
21251 smclass = 1;
21252
21253 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
21254 (flags & SECTION_CODE) ? "." : "",
21255 name, suffix[smclass], flags & SECTION_ENTSIZE);
21256 }
21257
21258 #define IN_NAMED_SECTION(DECL) \
21259 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
21260 && DECL_SECTION_NAME (DECL) != NULL)
21261
21262 static section *
21263 rs6000_xcoff_select_section (tree decl, int reloc,
21264 unsigned HOST_WIDE_INT align)
21265 {
21266 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
21267 named section. */
21268 if (align > BIGGEST_ALIGNMENT && VAR_OR_FUNCTION_DECL_P (decl))
21269 {
21270 resolve_unique_section (decl, reloc, true);
21271 if (IN_NAMED_SECTION (decl))
21272 return get_named_section (decl, NULL, reloc);
21273 }
21274
21275 if (decl_readonly_section (decl, reloc))
21276 {
21277 if (TREE_PUBLIC (decl))
21278 return read_only_data_section;
21279 else
21280 return read_only_private_data_section;
21281 }
21282 else
21283 {
21284 #if HAVE_AS_TLS
21285 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
21286 {
21287 if (bss_initializer_p (decl))
21288 return tls_comm_section;
21289 else if (TREE_PUBLIC (decl))
21290 return tls_data_section;
21291 else
21292 return tls_private_data_section;
21293 }
21294 else
21295 #endif
21296 if (TREE_PUBLIC (decl))
21297 return data_section;
21298 else
21299 return private_data_section;
21300 }
21301 }
21302
21303 static void
21304 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
21305 {
21306 const char *name;
21307
21308 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
21309 name = (*targetm.strip_name_encoding) (name);
21310 set_decl_section_name (decl, name);
21311 }
21312
21313 /* Select section for constant in constant pool.
21314
21315 On RS/6000, all constants are in the private read-only data area.
21316 However, if this is being placed in the TOC it must be output as a
21317 toc entry. */
21318
21319 static section *
21320 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
21321 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
21322 {
21323 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
21324 return toc_section;
21325 else
21326 return read_only_private_data_section;
21327 }
21328
21329 /* Remove any trailing [DS] or the like from the symbol name. */
21330
21331 static const char *
21332 rs6000_xcoff_strip_name_encoding (const char *name)
21333 {
21334 size_t len;
21335 if (*name == '*')
21336 name++;
21337 len = strlen (name);
21338 if (name[len - 1] == ']')
21339 return ggc_alloc_string (name, len - 4);
21340 else
21341 return name;
21342 }
21343
21344 /* Section attributes. AIX is always PIC. */
21345
21346 static unsigned int
21347 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
21348 {
21349 unsigned int align;
21350 unsigned int flags = default_section_type_flags (decl, name, reloc);
21351
21352 if (decl && DECL_P (decl) && VAR_P (decl) && bss_initializer_p (decl))
21353 flags |= SECTION_BSS;
21354
21355 /* Align to at least UNIT size. */
21356 if (!decl || !DECL_P (decl))
21357 align = MIN_UNITS_PER_WORD;
21358 /* Align code CSECT to at least 32 bytes. */
21359 else if ((flags & SECTION_CODE) != 0)
21360 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT), 32);
21361 else
21362 /* Increase alignment of large objects if not already stricter. */
21363 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
21364 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
21365 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
21366
21367 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
21368 }
21369
21370 /* Output at beginning of assembler file.
21371
21372 Initialize the section names for the RS/6000 at this point.
21373
21374 Specify filename, including full path, to assembler.
21375
21376 We want to go into the TOC section so at least one .toc will be emitted.
21377 Also, in order to output proper .bs/.es pairs, we need at least one static
21378 [RW] section emitted.
21379
21380 Finally, declare mcount when profiling to make the assembler happy. */
21381
21382 static void
21383 rs6000_xcoff_file_start (void)
21384 {
21385 rs6000_gen_section_name (&xcoff_bss_section_name,
21386 main_input_filename, ".bss_");
21387 rs6000_gen_section_name (&xcoff_private_data_section_name,
21388 main_input_filename, ".rw_");
21389 rs6000_gen_section_name (&xcoff_private_rodata_section_name,
21390 main_input_filename, ".rop_");
21391 rs6000_gen_section_name (&xcoff_read_only_section_name,
21392 main_input_filename, ".ro_");
21393 rs6000_gen_section_name (&xcoff_tls_data_section_name,
21394 main_input_filename, ".tls_");
21395
21396 fputs ("\t.file\t", asm_out_file);
21397 output_quoted_string (asm_out_file, main_input_filename);
21398 fputc ('\n', asm_out_file);
21399 if (write_symbols != NO_DEBUG)
21400 switch_to_section (private_data_section);
21401 switch_to_section (toc_section);
21402 switch_to_section (text_section);
21403 if (profile_flag)
21404 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
21405 rs6000_file_start ();
21406 }
21407
21408 /* Output at end of assembler file.
21409 On the RS/6000, referencing data should automatically pull in text. */
21410
21411 static void
21412 rs6000_xcoff_file_end (void)
21413 {
21414 switch_to_section (text_section);
21415 if (xcoff_tls_exec_model_detected)
21416 {
21417 /* Add a .ref to __tls_get_addr to force libpthread dependency. */
21418 fputs ("\t.extern __tls_get_addr\n\t.ref __tls_get_addr\n", asm_out_file);
21419 }
21420 fputs ("_section_.text:\n", asm_out_file);
21421 switch_to_section (data_section);
21422 fputs (TARGET_32BIT
21423 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
21424 asm_out_file);
21425
21426 }
21427
21428 struct declare_alias_data
21429 {
21430 FILE *file;
21431 bool function_descriptor;
21432 };
21433
21434 /* Declare alias N. A helper function for for_node_and_aliases. */
21435
21436 static bool
21437 rs6000_declare_alias (struct symtab_node *n, void *d)
21438 {
21439 struct declare_alias_data *data = (struct declare_alias_data *)d;
21440 /* Main symbol is output specially, because varasm machinery does part of
21441 the job for us - we do not need to declare .globl/lglobs and such. */
21442 if (!n->alias || n->weakref)
21443 return false;
21444
21445 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
21446 return false;
21447
21448 /* Prevent assemble_alias from trying to use .set pseudo operation
21449 that does not behave as expected by the middle-end. */
21450 TREE_ASM_WRITTEN (n->decl) = true;
21451
21452 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
21453 char *buffer = (char *) alloca (strlen (name) + 2);
21454 char *p;
21455 int dollar_inside = 0;
21456
21457 strcpy (buffer, name);
21458 p = strchr (buffer, '$');
21459 while (p) {
21460 *p = '_';
21461 dollar_inside++;
21462 p = strchr (p + 1, '$');
21463 }
21464 if (TREE_PUBLIC (n->decl))
21465 {
21466 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
21467 {
21468 if (dollar_inside) {
21469 if (data->function_descriptor)
21470 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
21471 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
21472 }
21473 if (data->function_descriptor)
21474 {
21475 fputs ("\t.globl .", data->file);
21476 RS6000_OUTPUT_BASENAME (data->file, buffer);
21477 putc ('\n', data->file);
21478 }
21479 fputs ("\t.globl ", data->file);
21480 assemble_name (data->file, buffer);
21481 putc ('\n', data->file);
21482 }
21483 #ifdef ASM_WEAKEN_DECL
21484 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
21485 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
21486 #endif
21487 }
21488 else
21489 {
21490 if (dollar_inside)
21491 {
21492 if (data->function_descriptor)
21493 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
21494 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
21495 }
21496 if (data->function_descriptor)
21497 {
21498 fputs ("\t.lglobl .", data->file);
21499 RS6000_OUTPUT_BASENAME (data->file, buffer);
21500 putc ('\n', data->file);
21501 }
21502 fputs ("\t.lglobl ", data->file);
21503 assemble_name (data->file, buffer);
21504 putc ('\n', data->file);
21505 }
21506 if (data->function_descriptor)
21507 putc ('.', data->file);
21508 ASM_OUTPUT_LABEL (data->file, buffer);
21509 return false;
21510 }
21511
21512
21513 #ifdef HAVE_GAS_HIDDEN
21514 /* Helper function to calculate visibility of a DECL
21515 and return the value as a const string. */
21516
21517 static const char *
21518 rs6000_xcoff_visibility (tree decl)
21519 {
21520 static const char * const visibility_types[] = {
21521 "", ",protected", ",hidden", ",internal"
21522 };
21523
21524 enum symbol_visibility vis = DECL_VISIBILITY (decl);
21525 return visibility_types[vis];
21526 }
21527 #endif
21528
21529
21530 /* This macro produces the initial definition of a function name.
21531 On the RS/6000, we need to place an extra '.' in the function name and
21532 output the function descriptor.
21533 Dollar signs are converted to underscores.
21534
21535 The csect for the function will have already been created when
21536 text_section was selected. We do have to go back to that csect, however.
21537
21538 The third and fourth parameters to the .function pseudo-op (16 and 044)
21539 are placeholders which no longer have any use.
21540
21541 Because AIX assembler's .set command has unexpected semantics, we output
21542 all aliases as alternative labels in front of the definition. */
21543
21544 void
21545 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
21546 {
21547 char *buffer = (char *) alloca (strlen (name) + 1);
21548 char *p;
21549 int dollar_inside = 0;
21550 struct declare_alias_data data = {file, false};
21551
21552 strcpy (buffer, name);
21553 p = strchr (buffer, '$');
21554 while (p) {
21555 *p = '_';
21556 dollar_inside++;
21557 p = strchr (p + 1, '$');
21558 }
21559 if (TREE_PUBLIC (decl))
21560 {
21561 if (!RS6000_WEAK || !DECL_WEAK (decl))
21562 {
21563 if (dollar_inside) {
21564 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
21565 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
21566 }
21567 fputs ("\t.globl .", file);
21568 RS6000_OUTPUT_BASENAME (file, buffer);
21569 #ifdef HAVE_GAS_HIDDEN
21570 fputs (rs6000_xcoff_visibility (decl), file);
21571 #endif
21572 putc ('\n', file);
21573 }
21574 }
21575 else
21576 {
21577 if (dollar_inside) {
21578 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
21579 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
21580 }
21581 fputs ("\t.lglobl .", file);
21582 RS6000_OUTPUT_BASENAME (file, buffer);
21583 putc ('\n', file);
21584 }
21585
21586 fputs ("\t.csect ", file);
21587 assemble_name (file, buffer);
21588 fputs (TARGET_32BIT ? "\n" : ",3\n", file);
21589
21590 ASM_OUTPUT_LABEL (file, buffer);
21591
21592 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
21593 &data, true);
21594 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
21595 RS6000_OUTPUT_BASENAME (file, buffer);
21596 fputs (", TOC[tc0], 0\n", file);
21597
21598 in_section = NULL;
21599 switch_to_section (function_section (decl));
21600 putc ('.', file);
21601 ASM_OUTPUT_LABEL (file, buffer);
21602
21603 data.function_descriptor = true;
21604 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
21605 &data, true);
21606 if (!DECL_IGNORED_P (decl))
21607 {
21608 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
21609 xcoffout_declare_function (file, decl, buffer);
21610 else if (dwarf_debuginfo_p ())
21611 {
21612 name = (*targetm.strip_name_encoding) (name);
21613 fprintf (file, "\t.function .%s,.%s,2,0\n", name, name);
21614 }
21615 }
21616 return;
21617 }
21618
21619
21620 /* Output assembly language to globalize a symbol from a DECL,
21621 possibly with visibility. */
21622
21623 void
21624 rs6000_xcoff_asm_globalize_decl_name (FILE *stream, tree decl)
21625 {
21626 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
21627 fputs (GLOBAL_ASM_OP, stream);
21628 assemble_name (stream, name);
21629 #ifdef HAVE_GAS_HIDDEN
21630 fputs (rs6000_xcoff_visibility (decl), stream);
21631 #endif
21632 putc ('\n', stream);
21633 }
21634
21635 /* Output assembly language to define a symbol as COMMON from a DECL,
21636 possibly with visibility. */
21637
21638 void
21639 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream,
21640 tree decl ATTRIBUTE_UNUSED,
21641 const char *name,
21642 unsigned HOST_WIDE_INT size,
21643 unsigned int align)
21644 {
21645 unsigned int align2 = 2;
21646
21647 if (align == 0)
21648 align = DATA_ABI_ALIGNMENT (TREE_TYPE (decl), DECL_ALIGN (decl));
21649
21650 if (align > 32)
21651 align2 = floor_log2 (align / BITS_PER_UNIT);
21652 else if (size > 4)
21653 align2 = 3;
21654
21655 if (! DECL_COMMON (decl))
21656 {
21657 /* Forget section. */
21658 in_section = NULL;
21659
21660 /* Globalize TLS BSS. */
21661 if (TREE_PUBLIC (decl) && DECL_THREAD_LOCAL_P (decl))
21662 {
21663 fputs (GLOBAL_ASM_OP, stream);
21664 assemble_name (stream, name);
21665 fputc ('\n', stream);
21666 }
21667
21668 /* Switch to section and skip space. */
21669 fputs ("\t.csect ", stream);
21670 assemble_name (stream, name);
21671 fprintf (stream, ",%u\n", align2);
21672 ASM_DECLARE_OBJECT_NAME (stream, name, decl);
21673 ASM_OUTPUT_SKIP (stream, size ? size : 1);
21674 return;
21675 }
21676
21677 if (TREE_PUBLIC (decl))
21678 {
21679 fprintf (stream,
21680 "\t.comm %s," HOST_WIDE_INT_PRINT_UNSIGNED ",%u" ,
21681 name, size, align2);
21682
21683 #ifdef HAVE_GAS_HIDDEN
21684 if (decl != NULL)
21685 fputs (rs6000_xcoff_visibility (decl), stream);
21686 #endif
21687 putc ('\n', stream);
21688 }
21689 else
21690 fprintf (stream,
21691 "\t.lcomm %s," HOST_WIDE_INT_PRINT_UNSIGNED ",%s,%u\n",
21692 (*targetm.strip_name_encoding) (name), size, name, align2);
21693 }
21694
21695 /* This macro produces the initial definition of a object (variable) name.
21696 Because AIX assembler's .set command has unexpected semantics, we output
21697 all aliases as alternative labels in front of the definition. */
21698
21699 void
21700 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
21701 {
21702 struct declare_alias_data data = {file, false};
21703 ASM_OUTPUT_LABEL (file, name);
21704 symtab_node::get_create (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
21705 &data, true);
21706 }
21707
21708 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
21709
21710 void
21711 rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label)
21712 {
21713 fputs (integer_asm_op (size, FALSE), file);
21714 assemble_name (file, label);
21715 fputs ("-$", file);
21716 }
21717
21718 /* Output a symbol offset relative to the dbase for the current object.
21719 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
21720 signed offsets.
21721
21722 __gcc_unwind_dbase is embedded in all executables/libraries through
21723 libgcc/config/rs6000/crtdbase.S. */
21724
21725 void
21726 rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label)
21727 {
21728 fputs (integer_asm_op (size, FALSE), file);
21729 assemble_name (file, label);
21730 fputs("-__gcc_unwind_dbase", file);
21731 }
21732
21733 #ifdef HAVE_AS_TLS
21734 static void
21735 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
21736 {
21737 rtx symbol;
21738 int flags;
21739 const char *symname;
21740
21741 default_encode_section_info (decl, rtl, first);
21742
21743 /* Careful not to prod global register variables. */
21744 if (!MEM_P (rtl))
21745 return;
21746 symbol = XEXP (rtl, 0);
21747 if (!SYMBOL_REF_P (symbol))
21748 return;
21749
21750 flags = SYMBOL_REF_FLAGS (symbol);
21751
21752 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
21753 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
21754
21755 SYMBOL_REF_FLAGS (symbol) = flags;
21756
21757 symname = XSTR (symbol, 0);
21758
21759 /* Append CSECT mapping class, unless the symbol already is qualified.
21760 Aliases are implemented as labels, so the symbol name should not add
21761 a mapping class. */
21762 if (decl
21763 && DECL_P (decl)
21764 && VAR_OR_FUNCTION_DECL_P (decl)
21765 && (symtab_node::get (decl) == NULL
21766 || symtab_node::get (decl)->alias == 0)
21767 && symname[strlen (symname) - 1] != ']')
21768 {
21769 const char *smclass = NULL;
21770
21771 if (TREE_CODE (decl) == FUNCTION_DECL)
21772 smclass = "[DS]";
21773 else if (DECL_THREAD_LOCAL_P (decl))
21774 {
21775 if (bss_initializer_p (decl))
21776 smclass = "[UL]";
21777 else if (flag_data_sections)
21778 smclass = "[TL]";
21779 }
21780 else if (DECL_EXTERNAL (decl))
21781 smclass = "[UA]";
21782 else if (bss_initializer_p (decl))
21783 smclass = "[BS]";
21784 else if (flag_data_sections)
21785 {
21786 /* This must exactly match the logic of select section. */
21787 if (decl_readonly_section (decl, compute_reloc_for_var (decl)))
21788 smclass = "[RO]";
21789 else
21790 smclass = "[RW]";
21791 }
21792
21793 if (smclass != NULL)
21794 {
21795 char *newname = XALLOCAVEC (char, strlen (symname) + 5);
21796
21797 strcpy (newname, symname);
21798 strcat (newname, smclass);
21799 XSTR (symbol, 0) = ggc_strdup (newname);
21800 }
21801 }
21802 }
21803 #endif /* HAVE_AS_TLS */
21804 #endif /* TARGET_XCOFF */
21805
21806 void
21807 rs6000_asm_weaken_decl (FILE *stream, tree decl,
21808 const char *name, const char *val)
21809 {
21810 fputs ("\t.weak\t", stream);
21811 assemble_name (stream, name);
21812 if (decl && TREE_CODE (decl) == FUNCTION_DECL
21813 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
21814 {
21815 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
21816 if (TARGET_XCOFF)
21817 fputs (rs6000_xcoff_visibility (decl), stream);
21818 #endif
21819 fputs ("\n\t.weak\t.", stream);
21820 RS6000_OUTPUT_BASENAME (stream, name);
21821 }
21822 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
21823 if (TARGET_XCOFF)
21824 fputs (rs6000_xcoff_visibility (decl), stream);
21825 #endif
21826 fputc ('\n', stream);
21827
21828 if (val)
21829 {
21830 #ifdef ASM_OUTPUT_DEF
21831 ASM_OUTPUT_DEF (stream, name, val);
21832 #endif
21833 if (decl && TREE_CODE (decl) == FUNCTION_DECL
21834 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
21835 {
21836 fputs ("\t.set\t.", stream);
21837 RS6000_OUTPUT_BASENAME (stream, name);
21838 fputs (",.", stream);
21839 RS6000_OUTPUT_BASENAME (stream, val);
21840 fputc ('\n', stream);
21841 }
21842 }
21843 }
21844
21845
21846 /* Return true if INSN should not be copied. */
21847
21848 static bool
21849 rs6000_cannot_copy_insn_p (rtx_insn *insn)
21850 {
21851 return recog_memoized (insn) >= 0
21852 && get_attr_cannot_copy (insn);
21853 }
21854
21855 /* Compute a (partial) cost for rtx X. Return true if the complete
21856 cost has been computed, and false if subexpressions should be
21857 scanned. In either case, *TOTAL contains the cost result. */
21858
21859 static bool
21860 rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
21861 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
21862 {
21863 int code = GET_CODE (x);
21864
21865 switch (code)
21866 {
21867 /* On the RS/6000, if it is valid in the insn, it is free. */
21868 case CONST_INT:
21869 if (((outer_code == SET
21870 || outer_code == PLUS
21871 || outer_code == MINUS)
21872 && (satisfies_constraint_I (x)
21873 || satisfies_constraint_L (x)))
21874 || (outer_code == AND
21875 && (satisfies_constraint_K (x)
21876 || (mode == SImode
21877 ? satisfies_constraint_L (x)
21878 : satisfies_constraint_J (x))))
21879 || ((outer_code == IOR || outer_code == XOR)
21880 && (satisfies_constraint_K (x)
21881 || (mode == SImode
21882 ? satisfies_constraint_L (x)
21883 : satisfies_constraint_J (x))))
21884 || outer_code == ASHIFT
21885 || outer_code == ASHIFTRT
21886 || outer_code == LSHIFTRT
21887 || outer_code == ROTATE
21888 || outer_code == ROTATERT
21889 || outer_code == ZERO_EXTRACT
21890 || (outer_code == MULT
21891 && satisfies_constraint_I (x))
21892 || ((outer_code == DIV || outer_code == UDIV
21893 || outer_code == MOD || outer_code == UMOD)
21894 && exact_log2 (INTVAL (x)) >= 0)
21895 || (outer_code == COMPARE
21896 && (satisfies_constraint_I (x)
21897 || satisfies_constraint_K (x)))
21898 || ((outer_code == EQ || outer_code == NE)
21899 && (satisfies_constraint_I (x)
21900 || satisfies_constraint_K (x)
21901 || (mode == SImode
21902 ? satisfies_constraint_L (x)
21903 : satisfies_constraint_J (x))))
21904 || (outer_code == GTU
21905 && satisfies_constraint_I (x))
21906 || (outer_code == LTU
21907 && satisfies_constraint_P (x)))
21908 {
21909 *total = 0;
21910 return true;
21911 }
21912 else if ((outer_code == PLUS
21913 && reg_or_add_cint_operand (x, mode))
21914 || (outer_code == MINUS
21915 && reg_or_sub_cint_operand (x, mode))
21916 || ((outer_code == SET
21917 || outer_code == IOR
21918 || outer_code == XOR)
21919 && (INTVAL (x)
21920 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
21921 {
21922 *total = COSTS_N_INSNS (1);
21923 return true;
21924 }
21925 /* FALLTHRU */
21926
21927 case CONST_DOUBLE:
21928 case CONST_WIDE_INT:
21929 case CONST:
21930 case HIGH:
21931 case SYMBOL_REF:
21932 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
21933 return true;
21934
21935 case MEM:
21936 /* When optimizing for size, MEM should be slightly more expensive
21937 than generating address, e.g., (plus (reg) (const)).
21938 L1 cache latency is about two instructions. */
21939 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
21940 if (rs6000_slow_unaligned_access (mode, MEM_ALIGN (x)))
21941 *total += COSTS_N_INSNS (100);
21942 return true;
21943
21944 case LABEL_REF:
21945 *total = 0;
21946 return true;
21947
21948 case PLUS:
21949 case MINUS:
21950 if (FLOAT_MODE_P (mode))
21951 *total = rs6000_cost->fp;
21952 else
21953 *total = COSTS_N_INSNS (1);
21954 return false;
21955
21956 case MULT:
21957 if (CONST_INT_P (XEXP (x, 1))
21958 && satisfies_constraint_I (XEXP (x, 1)))
21959 {
21960 if (INTVAL (XEXP (x, 1)) >= -256
21961 && INTVAL (XEXP (x, 1)) <= 255)
21962 *total = rs6000_cost->mulsi_const9;
21963 else
21964 *total = rs6000_cost->mulsi_const;
21965 }
21966 else if (mode == SFmode)
21967 *total = rs6000_cost->fp;
21968 else if (FLOAT_MODE_P (mode))
21969 *total = rs6000_cost->dmul;
21970 else if (mode == DImode)
21971 *total = rs6000_cost->muldi;
21972 else
21973 *total = rs6000_cost->mulsi;
21974 return false;
21975
21976 case FMA:
21977 if (mode == SFmode)
21978 *total = rs6000_cost->fp;
21979 else
21980 *total = rs6000_cost->dmul;
21981 break;
21982
21983 case DIV:
21984 case MOD:
21985 if (FLOAT_MODE_P (mode))
21986 {
21987 *total = mode == DFmode ? rs6000_cost->ddiv
21988 : rs6000_cost->sdiv;
21989 return false;
21990 }
21991 /* FALLTHRU */
21992
21993 case UDIV:
21994 case UMOD:
21995 if (CONST_INT_P (XEXP (x, 1))
21996 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
21997 {
21998 if (code == DIV || code == MOD)
21999 /* Shift, addze */
22000 *total = COSTS_N_INSNS (2);
22001 else
22002 /* Shift */
22003 *total = COSTS_N_INSNS (1);
22004 }
22005 else
22006 {
22007 if (GET_MODE (XEXP (x, 1)) == DImode)
22008 *total = rs6000_cost->divdi;
22009 else
22010 *total = rs6000_cost->divsi;
22011 }
22012 /* Add in shift and subtract for MOD unless we have a mod instruction. */
22013 if (!TARGET_MODULO && (code == MOD || code == UMOD))
22014 *total += COSTS_N_INSNS (2);
22015 return false;
22016
22017 case CTZ:
22018 *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4);
22019 return false;
22020
22021 case FFS:
22022 *total = COSTS_N_INSNS (4);
22023 return false;
22024
22025 case POPCOUNT:
22026 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
22027 return false;
22028
22029 case PARITY:
22030 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
22031 return false;
22032
22033 case NOT:
22034 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
22035 *total = 0;
22036 else
22037 *total = COSTS_N_INSNS (1);
22038 return false;
22039
22040 case AND:
22041 if (CONST_INT_P (XEXP (x, 1)))
22042 {
22043 rtx left = XEXP (x, 0);
22044 rtx_code left_code = GET_CODE (left);
22045
22046 /* rotate-and-mask: 1 insn. */
22047 if ((left_code == ROTATE
22048 || left_code == ASHIFT
22049 || left_code == LSHIFTRT)
22050 && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
22051 {
22052 *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
22053 if (!CONST_INT_P (XEXP (left, 1)))
22054 *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
22055 *total += COSTS_N_INSNS (1);
22056 return true;
22057 }
22058
22059 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
22060 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
22061 if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
22062 || (val & 0xffff) == val
22063 || (val & 0xffff0000) == val
22064 || ((val & 0xffff) == 0 && mode == SImode))
22065 {
22066 *total = rtx_cost (left, mode, AND, 0, speed);
22067 *total += COSTS_N_INSNS (1);
22068 return true;
22069 }
22070
22071 /* 2 insns. */
22072 if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
22073 {
22074 *total = rtx_cost (left, mode, AND, 0, speed);
22075 *total += COSTS_N_INSNS (2);
22076 return true;
22077 }
22078 }
22079
22080 *total = COSTS_N_INSNS (1);
22081 return false;
22082
22083 case IOR:
22084 /* FIXME */
22085 *total = COSTS_N_INSNS (1);
22086 return true;
22087
22088 case CLZ:
22089 case XOR:
22090 case ZERO_EXTRACT:
22091 *total = COSTS_N_INSNS (1);
22092 return false;
22093
22094 case ASHIFT:
22095 /* The EXTSWSLI instruction is a combined instruction. Don't count both
22096 the sign extend and shift separately within the insn. */
22097 if (TARGET_EXTSWSLI && mode == DImode
22098 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
22099 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
22100 {
22101 *total = 0;
22102 return false;
22103 }
22104 /* fall through */
22105
22106 case ASHIFTRT:
22107 case LSHIFTRT:
22108 case ROTATE:
22109 case ROTATERT:
22110 /* Handle mul_highpart. */
22111 if (outer_code == TRUNCATE
22112 && GET_CODE (XEXP (x, 0)) == MULT)
22113 {
22114 if (mode == DImode)
22115 *total = rs6000_cost->muldi;
22116 else
22117 *total = rs6000_cost->mulsi;
22118 return true;
22119 }
22120 else if (outer_code == AND)
22121 *total = 0;
22122 else
22123 *total = COSTS_N_INSNS (1);
22124 return false;
22125
22126 case SIGN_EXTEND:
22127 case ZERO_EXTEND:
22128 if (MEM_P (XEXP (x, 0)))
22129 *total = 0;
22130 else
22131 *total = COSTS_N_INSNS (1);
22132 return false;
22133
22134 case COMPARE:
22135 case NEG:
22136 case ABS:
22137 if (!FLOAT_MODE_P (mode))
22138 {
22139 *total = COSTS_N_INSNS (1);
22140 return false;
22141 }
22142 /* FALLTHRU */
22143
22144 case FLOAT:
22145 case UNSIGNED_FLOAT:
22146 case FIX:
22147 case UNSIGNED_FIX:
22148 case FLOAT_TRUNCATE:
22149 *total = rs6000_cost->fp;
22150 return false;
22151
22152 case FLOAT_EXTEND:
22153 if (mode == DFmode)
22154 *total = rs6000_cost->sfdf_convert;
22155 else
22156 *total = rs6000_cost->fp;
22157 return false;
22158
22159 case CALL:
22160 case IF_THEN_ELSE:
22161 if (!speed)
22162 {
22163 *total = COSTS_N_INSNS (1);
22164 return true;
22165 }
22166 else if (FLOAT_MODE_P (mode) && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT)
22167 {
22168 *total = rs6000_cost->fp;
22169 return false;
22170 }
22171 break;
22172
22173 case NE:
22174 case EQ:
22175 case GTU:
22176 case LTU:
22177 /* Carry bit requires mode == Pmode.
22178 NEG or PLUS already counted so only add one. */
22179 if (mode == Pmode
22180 && (outer_code == NEG || outer_code == PLUS))
22181 {
22182 *total = COSTS_N_INSNS (1);
22183 return true;
22184 }
22185 /* FALLTHRU */
22186
22187 case GT:
22188 case LT:
22189 case UNORDERED:
22190 if (outer_code == SET)
22191 {
22192 if (XEXP (x, 1) == const0_rtx)
22193 {
22194 *total = COSTS_N_INSNS (2);
22195 return true;
22196 }
22197 else
22198 {
22199 *total = COSTS_N_INSNS (3);
22200 return false;
22201 }
22202 }
22203 /* CC COMPARE. */
22204 if (outer_code == COMPARE)
22205 {
22206 *total = 0;
22207 return true;
22208 }
22209 break;
22210
22211 case UNSPEC:
22212 if (XINT (x, 1) == UNSPECV_MMA_XXSETACCZ)
22213 {
22214 *total = 0;
22215 return true;
22216 }
22217 break;
22218
22219 default:
22220 break;
22221 }
22222
22223 return false;
22224 }
22225
22226 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
22227
22228 static bool
22229 rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
22230 int opno, int *total, bool speed)
22231 {
22232 bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed);
22233
22234 fprintf (stderr,
22235 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
22236 "opno = %d, total = %d, speed = %s, x:\n",
22237 ret ? "complete" : "scan inner",
22238 GET_MODE_NAME (mode),
22239 GET_RTX_NAME (outer_code),
22240 opno,
22241 *total,
22242 speed ? "true" : "false");
22243
22244 debug_rtx (x);
22245
22246 return ret;
22247 }
22248
22249 static int
22250 rs6000_insn_cost (rtx_insn *insn, bool speed)
22251 {
22252 if (recog_memoized (insn) < 0)
22253 return 0;
22254
22255 /* If we are optimizing for size, just use the length. */
22256 if (!speed)
22257 return get_attr_length (insn);
22258
22259 /* Use the cost if provided. */
22260 int cost = get_attr_cost (insn);
22261 if (cost > 0)
22262 return cost;
22263
22264 /* If the insn tells us how many insns there are, use that. Otherwise use
22265 the length/4. Adjust the insn length to remove the extra size that
22266 prefixed instructions take. */
22267 int n = get_attr_num_insns (insn);
22268 if (n == 0)
22269 {
22270 int length = get_attr_length (insn);
22271 if (get_attr_prefixed (insn) == PREFIXED_YES)
22272 {
22273 int adjust = 0;
22274 ADJUST_INSN_LENGTH (insn, adjust);
22275 length -= adjust;
22276 }
22277
22278 n = length / 4;
22279 }
22280
22281 enum attr_type type = get_attr_type (insn);
22282
22283 switch (type)
22284 {
22285 case TYPE_LOAD:
22286 case TYPE_FPLOAD:
22287 case TYPE_VECLOAD:
22288 cost = COSTS_N_INSNS (n + 1);
22289 break;
22290
22291 case TYPE_MUL:
22292 switch (get_attr_size (insn))
22293 {
22294 case SIZE_8:
22295 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const9;
22296 break;
22297 case SIZE_16:
22298 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const;
22299 break;
22300 case SIZE_32:
22301 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi;
22302 break;
22303 case SIZE_64:
22304 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->muldi;
22305 break;
22306 default:
22307 gcc_unreachable ();
22308 }
22309 break;
22310 case TYPE_DIV:
22311 switch (get_attr_size (insn))
22312 {
22313 case SIZE_32:
22314 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divsi;
22315 break;
22316 case SIZE_64:
22317 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divdi;
22318 break;
22319 default:
22320 gcc_unreachable ();
22321 }
22322 break;
22323
22324 case TYPE_FP:
22325 cost = n * rs6000_cost->fp;
22326 break;
22327 case TYPE_DMUL:
22328 cost = n * rs6000_cost->dmul;
22329 break;
22330 case TYPE_SDIV:
22331 cost = n * rs6000_cost->sdiv;
22332 break;
22333 case TYPE_DDIV:
22334 cost = n * rs6000_cost->ddiv;
22335 break;
22336
22337 case TYPE_SYNC:
22338 case TYPE_LOAD_L:
22339 case TYPE_MFCR:
22340 case TYPE_MFCRF:
22341 cost = COSTS_N_INSNS (n + 2);
22342 break;
22343
22344 default:
22345 cost = COSTS_N_INSNS (n);
22346 }
22347
22348 return cost;
22349 }
22350
22351 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
22352
22353 static int
22354 rs6000_debug_address_cost (rtx x, machine_mode mode,
22355 addr_space_t as, bool speed)
22356 {
22357 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
22358
22359 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
22360 ret, speed ? "true" : "false");
22361 debug_rtx (x);
22362
22363 return ret;
22364 }
22365
22366
22367 /* A C expression returning the cost of moving data from a register of class
22368 CLASS1 to one of CLASS2. */
22369
22370 static int
22371 rs6000_register_move_cost (machine_mode mode,
22372 reg_class_t from, reg_class_t to)
22373 {
22374 int ret;
22375 reg_class_t rclass;
22376
22377 if (TARGET_DEBUG_COST)
22378 dbg_cost_ctrl++;
22379
22380 /* If we have VSX, we can easily move between FPR or Altivec registers,
22381 otherwise we can only easily move within classes.
22382 Do this first so we give best-case answers for union classes
22383 containing both gprs and vsx regs. */
22384 HARD_REG_SET to_vsx, from_vsx;
22385 to_vsx = reg_class_contents[to] & reg_class_contents[VSX_REGS];
22386 from_vsx = reg_class_contents[from] & reg_class_contents[VSX_REGS];
22387 if (!hard_reg_set_empty_p (to_vsx)
22388 && !hard_reg_set_empty_p (from_vsx)
22389 && (TARGET_VSX
22390 || hard_reg_set_intersect_p (to_vsx, from_vsx)))
22391 {
22392 int reg = FIRST_FPR_REGNO;
22393 if (TARGET_VSX
22394 || (TEST_HARD_REG_BIT (to_vsx, FIRST_ALTIVEC_REGNO)
22395 && TEST_HARD_REG_BIT (from_vsx, FIRST_ALTIVEC_REGNO)))
22396 reg = FIRST_ALTIVEC_REGNO;
22397 ret = 2 * hard_regno_nregs (reg, mode);
22398 }
22399
22400 /* Moves from/to GENERAL_REGS. */
22401 else if ((rclass = from, reg_classes_intersect_p (to, GENERAL_REGS))
22402 || (rclass = to, reg_classes_intersect_p (from, GENERAL_REGS)))
22403 {
22404 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
22405 {
22406 if (TARGET_DIRECT_MOVE)
22407 {
22408 /* Keep the cost for direct moves above that for within
22409 a register class even if the actual processor cost is
22410 comparable. We do this because a direct move insn
22411 can't be a nop, whereas with ideal register
22412 allocation a move within the same class might turn
22413 out to be a nop. */
22414 if (rs6000_tune == PROCESSOR_POWER9
22415 || rs6000_tune == PROCESSOR_POWER10)
22416 ret = 3 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22417 else
22418 ret = 4 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22419 /* SFmode requires a conversion when moving between gprs
22420 and vsx. */
22421 if (mode == SFmode)
22422 ret += 2;
22423 }
22424 else
22425 ret = (rs6000_memory_move_cost (mode, rclass, false)
22426 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
22427 }
22428
22429 /* It's more expensive to move CR_REGS than CR0_REGS because of the
22430 shift. */
22431 else if (rclass == CR_REGS)
22432 ret = 4;
22433
22434 /* For those processors that have slow LR/CTR moves, make them more
22435 expensive than memory in order to bias spills to memory .*/
22436 else if ((rs6000_tune == PROCESSOR_POWER6
22437 || rs6000_tune == PROCESSOR_POWER7
22438 || rs6000_tune == PROCESSOR_POWER8
22439 || rs6000_tune == PROCESSOR_POWER9)
22440 && reg_class_subset_p (rclass, SPECIAL_REGS))
22441 ret = 6 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22442
22443 else
22444 /* A move will cost one instruction per GPR moved. */
22445 ret = 2 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22446 }
22447
22448 /* Everything else has to go through GENERAL_REGS. */
22449 else
22450 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
22451 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
22452
22453 if (TARGET_DEBUG_COST)
22454 {
22455 if (dbg_cost_ctrl == 1)
22456 fprintf (stderr,
22457 "rs6000_register_move_cost: ret=%d, mode=%s, from=%s, to=%s\n",
22458 ret, GET_MODE_NAME (mode), reg_class_names[from],
22459 reg_class_names[to]);
22460 dbg_cost_ctrl--;
22461 }
22462
22463 return ret;
22464 }
22465
22466 /* A C expressions returning the cost of moving data of MODE from a register to
22467 or from memory. */
22468
22469 static int
22470 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
22471 bool in ATTRIBUTE_UNUSED)
22472 {
22473 int ret;
22474
22475 if (TARGET_DEBUG_COST)
22476 dbg_cost_ctrl++;
22477
22478 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
22479 ret = 4 * hard_regno_nregs (0, mode);
22480 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
22481 || reg_classes_intersect_p (rclass, VSX_REGS)))
22482 ret = 4 * hard_regno_nregs (32, mode);
22483 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
22484 ret = 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO, mode);
22485 else
22486 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
22487
22488 if (TARGET_DEBUG_COST)
22489 {
22490 if (dbg_cost_ctrl == 1)
22491 fprintf (stderr,
22492 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
22493 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
22494 dbg_cost_ctrl--;
22495 }
22496
22497 return ret;
22498 }
22499
22500 /* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
22501
22502 The register allocator chooses GEN_OR_VSX_REGS for the allocno
22503 class if GENERAL_REGS and VSX_REGS cost is lower than the memory
22504 cost. This happens a lot when TARGET_DIRECT_MOVE makes the register
22505 move cost between GENERAL_REGS and VSX_REGS low.
22506
22507 It might seem reasonable to use a union class. After all, if usage
22508 of vsr is low and gpr high, it might make sense to spill gpr to vsr
22509 rather than memory. However, in cases where register pressure of
22510 both is high, like the cactus_adm spec test, allowing
22511 GEN_OR_VSX_REGS as the allocno class results in bad decisions in
22512 the first scheduling pass. This is partly due to an allocno of
22513 GEN_OR_VSX_REGS wrongly contributing to the GENERAL_REGS pressure
22514 class, which gives too high a pressure for GENERAL_REGS and too low
22515 for VSX_REGS. So, force a choice of the subclass here.
22516
22517 The best class is also the union if GENERAL_REGS and VSX_REGS have
22518 the same cost. In that case we do use GEN_OR_VSX_REGS as the
22519 allocno class, since trying to narrow down the class by regno mode
22520 is prone to error. For example, SImode is allowed in VSX regs and
22521 in some cases (eg. gcc.target/powerpc/p9-xxbr-3.c do_bswap32_vect)
22522 it would be wrong to choose an allocno of GENERAL_REGS based on
22523 SImode. */
22524
22525 static reg_class_t
22526 rs6000_ira_change_pseudo_allocno_class (int regno ATTRIBUTE_UNUSED,
22527 reg_class_t allocno_class,
22528 reg_class_t best_class)
22529 {
22530 switch (allocno_class)
22531 {
22532 case GEN_OR_VSX_REGS:
22533 /* best_class must be a subset of allocno_class. */
22534 gcc_checking_assert (best_class == GEN_OR_VSX_REGS
22535 || best_class == GEN_OR_FLOAT_REGS
22536 || best_class == VSX_REGS
22537 || best_class == ALTIVEC_REGS
22538 || best_class == FLOAT_REGS
22539 || best_class == GENERAL_REGS
22540 || best_class == BASE_REGS);
22541 /* Use best_class but choose wider classes when copying from the
22542 wider class to best_class is cheap. This mimics IRA choice
22543 of allocno class. */
22544 if (best_class == BASE_REGS)
22545 return GENERAL_REGS;
22546 if (TARGET_VSX && best_class == FLOAT_REGS)
22547 return VSX_REGS;
22548 return best_class;
22549
22550 case VSX_REGS:
22551 if (best_class == ALTIVEC_REGS)
22552 return ALTIVEC_REGS;
22553
22554 default:
22555 break;
22556 }
22557
22558 return allocno_class;
22559 }
22560
22561 /* Returns a code for a target-specific builtin that implements
22562 reciprocal of the function, or NULL_TREE if not available. */
22563
22564 static tree
22565 rs6000_builtin_reciprocal (tree fndecl)
22566 {
22567 switch (DECL_MD_FUNCTION_CODE (fndecl))
22568 {
22569 case RS6000_BIF_XVSQRTDP:
22570 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
22571 return NULL_TREE;
22572
22573 return rs6000_builtin_decls[RS6000_BIF_RSQRT_2DF];
22574
22575 case RS6000_BIF_XVSQRTSP:
22576 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
22577 return NULL_TREE;
22578
22579 return rs6000_builtin_decls[RS6000_BIF_RSQRT_4SF];
22580
22581 default:
22582 return NULL_TREE;
22583 }
22584 }
22585
22586 /* Load up a constant. If the mode is a vector mode, splat the value across
22587 all of the vector elements. */
22588
22589 static rtx
22590 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
22591 {
22592 rtx reg;
22593
22594 if (mode == SFmode || mode == DFmode)
22595 {
22596 rtx d = const_double_from_real_value (dconst, mode);
22597 reg = force_reg (mode, d);
22598 }
22599 else if (mode == V4SFmode)
22600 {
22601 rtx d = const_double_from_real_value (dconst, SFmode);
22602 rtvec v = gen_rtvec (4, d, d, d, d);
22603 reg = gen_reg_rtx (mode);
22604 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
22605 }
22606 else if (mode == V2DFmode)
22607 {
22608 rtx d = const_double_from_real_value (dconst, DFmode);
22609 rtvec v = gen_rtvec (2, d, d);
22610 reg = gen_reg_rtx (mode);
22611 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
22612 }
22613 else
22614 gcc_unreachable ();
22615
22616 return reg;
22617 }
22618
22619 /* Generate an FMA instruction. */
22620
22621 static void
22622 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
22623 {
22624 machine_mode mode = GET_MODE (target);
22625 rtx dst;
22626
22627 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
22628 gcc_assert (dst != NULL);
22629
22630 if (dst != target)
22631 emit_move_insn (target, dst);
22632 }
22633
22634 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
22635
22636 static void
22637 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
22638 {
22639 machine_mode mode = GET_MODE (dst);
22640 rtx r;
22641
22642 /* This is a tad more complicated, since the fnma_optab is for
22643 a different expression: fma(-m1, m2, a), which is the same
22644 thing except in the case of signed zeros.
22645
22646 Fortunately we know that if FMA is supported that FNMSUB is
22647 also supported in the ISA. Just expand it directly. */
22648
22649 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
22650
22651 r = gen_rtx_NEG (mode, a);
22652 r = gen_rtx_FMA (mode, m1, m2, r);
22653 r = gen_rtx_NEG (mode, r);
22654 emit_insn (gen_rtx_SET (dst, r));
22655 }
22656
22657 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
22658 add a reg_note saying that this was a division. Support both scalar and
22659 vector divide. Assumes no trapping math and finite arguments. */
22660
22661 void
22662 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
22663 {
22664 machine_mode mode = GET_MODE (dst);
22665 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
22666 int i;
22667
22668 /* Low precision estimates guarantee 5 bits of accuracy. High
22669 precision estimates guarantee 14 bits of accuracy. SFmode
22670 requires 23 bits of accuracy. DFmode requires 52 bits of
22671 accuracy. Each pass at least doubles the accuracy, leading
22672 to the following. */
22673 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
22674 if (mode == DFmode || mode == V2DFmode)
22675 passes++;
22676
22677 enum insn_code code = optab_handler (smul_optab, mode);
22678 insn_gen_fn gen_mul = GEN_FCN (code);
22679
22680 gcc_assert (code != CODE_FOR_nothing);
22681
22682 one = rs6000_load_constant_and_splat (mode, dconst1);
22683
22684 /* x0 = 1./d estimate */
22685 x0 = gen_reg_rtx (mode);
22686 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
22687 UNSPEC_FRES)));
22688
22689 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
22690 if (passes > 1) {
22691
22692 /* e0 = 1. - d * x0 */
22693 e0 = gen_reg_rtx (mode);
22694 rs6000_emit_nmsub (e0, d, x0, one);
22695
22696 /* x1 = x0 + e0 * x0 */
22697 x1 = gen_reg_rtx (mode);
22698 rs6000_emit_madd (x1, e0, x0, x0);
22699
22700 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
22701 ++i, xprev = xnext, eprev = enext) {
22702
22703 /* enext = eprev * eprev */
22704 enext = gen_reg_rtx (mode);
22705 emit_insn (gen_mul (enext, eprev, eprev));
22706
22707 /* xnext = xprev + enext * xprev */
22708 xnext = gen_reg_rtx (mode);
22709 rs6000_emit_madd (xnext, enext, xprev, xprev);
22710 }
22711
22712 } else
22713 xprev = x0;
22714
22715 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
22716
22717 /* u = n * xprev */
22718 u = gen_reg_rtx (mode);
22719 emit_insn (gen_mul (u, n, xprev));
22720
22721 /* v = n - (d * u) */
22722 v = gen_reg_rtx (mode);
22723 rs6000_emit_nmsub (v, d, u, n);
22724
22725 /* dst = (v * xprev) + u */
22726 rs6000_emit_madd (dst, v, xprev, u);
22727
22728 if (note_p)
22729 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
22730 }
22731
22732 /* Goldschmidt's Algorithm for single/double-precision floating point
22733 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
22734
22735 void
22736 rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
22737 {
22738 machine_mode mode = GET_MODE (src);
22739 rtx e = gen_reg_rtx (mode);
22740 rtx g = gen_reg_rtx (mode);
22741 rtx h = gen_reg_rtx (mode);
22742
22743 /* Low precision estimates guarantee 5 bits of accuracy. High
22744 precision estimates guarantee 14 bits of accuracy. SFmode
22745 requires 23 bits of accuracy. DFmode requires 52 bits of
22746 accuracy. Each pass at least doubles the accuracy, leading
22747 to the following. */
22748 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
22749 if (mode == DFmode || mode == V2DFmode)
22750 passes++;
22751
22752 int i;
22753 rtx mhalf;
22754 enum insn_code code = optab_handler (smul_optab, mode);
22755 insn_gen_fn gen_mul = GEN_FCN (code);
22756
22757 gcc_assert (code != CODE_FOR_nothing);
22758
22759 mhalf = rs6000_load_constant_and_splat (mode, dconsthalf);
22760
22761 /* e = rsqrt estimate */
22762 emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
22763 UNSPEC_RSQRT)));
22764
22765 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
22766 if (!recip)
22767 {
22768 rtx zero = force_reg (mode, CONST0_RTX (mode));
22769
22770 if (mode == SFmode)
22771 {
22772 rtx target = emit_conditional_move (e, { GT, src, zero, mode },
22773 e, zero, mode, 0);
22774 if (target != e)
22775 emit_move_insn (e, target);
22776 }
22777 else
22778 {
22779 rtx cond = gen_rtx_GT (VOIDmode, e, zero);
22780 rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero);
22781 }
22782 }
22783
22784 /* g = sqrt estimate. */
22785 emit_insn (gen_mul (g, e, src));
22786 /* h = 1/(2*sqrt) estimate. */
22787 emit_insn (gen_mul (h, e, mhalf));
22788
22789 if (recip)
22790 {
22791 if (passes == 1)
22792 {
22793 rtx t = gen_reg_rtx (mode);
22794 rs6000_emit_nmsub (t, g, h, mhalf);
22795 /* Apply correction directly to 1/rsqrt estimate. */
22796 rs6000_emit_madd (dst, e, t, e);
22797 }
22798 else
22799 {
22800 for (i = 0; i < passes; i++)
22801 {
22802 rtx t1 = gen_reg_rtx (mode);
22803 rtx g1 = gen_reg_rtx (mode);
22804 rtx h1 = gen_reg_rtx (mode);
22805
22806 rs6000_emit_nmsub (t1, g, h, mhalf);
22807 rs6000_emit_madd (g1, g, t1, g);
22808 rs6000_emit_madd (h1, h, t1, h);
22809
22810 g = g1;
22811 h = h1;
22812 }
22813 /* Multiply by 2 for 1/rsqrt. */
22814 emit_insn (gen_add3_insn (dst, h, h));
22815 }
22816 }
22817 else
22818 {
22819 rtx t = gen_reg_rtx (mode);
22820 rs6000_emit_nmsub (t, g, h, mhalf);
22821 rs6000_emit_madd (dst, g, t, g);
22822 }
22823
22824 return;
22825 }
22826
22827 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
22828 (Power7) targets. DST is the target, and SRC is the argument operand. */
22829
22830 void
22831 rs6000_emit_popcount (rtx dst, rtx src)
22832 {
22833 machine_mode mode = GET_MODE (dst);
22834 rtx tmp1, tmp2;
22835
22836 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
22837 if (TARGET_POPCNTD)
22838 {
22839 if (mode == SImode)
22840 emit_insn (gen_popcntdsi2 (dst, src));
22841 else
22842 emit_insn (gen_popcntddi2 (dst, src));
22843 return;
22844 }
22845
22846 tmp1 = gen_reg_rtx (mode);
22847
22848 if (mode == SImode)
22849 {
22850 emit_insn (gen_popcntbsi2 (tmp1, src));
22851 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
22852 NULL_RTX, 0);
22853 tmp2 = force_reg (SImode, tmp2);
22854 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
22855 }
22856 else
22857 {
22858 emit_insn (gen_popcntbdi2 (tmp1, src));
22859 tmp2 = expand_mult (DImode, tmp1,
22860 GEN_INT ((HOST_WIDE_INT)
22861 0x01010101 << 32 | 0x01010101),
22862 NULL_RTX, 0);
22863 tmp2 = force_reg (DImode, tmp2);
22864 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
22865 }
22866 }
22867
22868
22869 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
22870 target, and SRC is the argument operand. */
22871
22872 void
22873 rs6000_emit_parity (rtx dst, rtx src)
22874 {
22875 machine_mode mode = GET_MODE (dst);
22876 rtx tmp;
22877
22878 tmp = gen_reg_rtx (mode);
22879
22880 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
22881 if (TARGET_CMPB)
22882 {
22883 if (mode == SImode)
22884 {
22885 emit_insn (gen_popcntbsi2 (tmp, src));
22886 emit_insn (gen_paritysi2_cmpb (dst, tmp));
22887 }
22888 else
22889 {
22890 emit_insn (gen_popcntbdi2 (tmp, src));
22891 emit_insn (gen_paritydi2_cmpb (dst, tmp));
22892 }
22893 return;
22894 }
22895
22896 if (mode == SImode)
22897 {
22898 /* Is mult+shift >= shift+xor+shift+xor? */
22899 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
22900 {
22901 rtx tmp1, tmp2, tmp3, tmp4;
22902
22903 tmp1 = gen_reg_rtx (SImode);
22904 emit_insn (gen_popcntbsi2 (tmp1, src));
22905
22906 tmp2 = gen_reg_rtx (SImode);
22907 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
22908 tmp3 = gen_reg_rtx (SImode);
22909 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
22910
22911 tmp4 = gen_reg_rtx (SImode);
22912 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
22913 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
22914 }
22915 else
22916 rs6000_emit_popcount (tmp, src);
22917 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
22918 }
22919 else
22920 {
22921 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
22922 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
22923 {
22924 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
22925
22926 tmp1 = gen_reg_rtx (DImode);
22927 emit_insn (gen_popcntbdi2 (tmp1, src));
22928
22929 tmp2 = gen_reg_rtx (DImode);
22930 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
22931 tmp3 = gen_reg_rtx (DImode);
22932 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
22933
22934 tmp4 = gen_reg_rtx (DImode);
22935 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
22936 tmp5 = gen_reg_rtx (DImode);
22937 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
22938
22939 tmp6 = gen_reg_rtx (DImode);
22940 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
22941 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
22942 }
22943 else
22944 rs6000_emit_popcount (tmp, src);
22945 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
22946 }
22947 }
22948
22949 /* Expand an Altivec constant permutation for little endian mode.
22950 OP0 and OP1 are the input vectors and TARGET is the output vector.
22951 SEL specifies the constant permutation vector.
22952
22953 There are two issues: First, the two input operands must be
22954 swapped so that together they form a double-wide array in LE
22955 order. Second, the vperm instruction has surprising behavior
22956 in LE mode: it interprets the elements of the source vectors
22957 in BE mode ("left to right") and interprets the elements of
22958 the destination vector in LE mode ("right to left"). To
22959 correct for this, we must subtract each element of the permute
22960 control vector from 31.
22961
22962 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
22963 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
22964 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
22965 serve as the permute control vector. Then, in BE mode,
22966
22967 vperm 9,10,11,12
22968
22969 places the desired result in vr9. However, in LE mode the
22970 vector contents will be
22971
22972 vr10 = 00000003 00000002 00000001 00000000
22973 vr11 = 00000007 00000006 00000005 00000004
22974
22975 The result of the vperm using the same permute control vector is
22976
22977 vr9 = 05000000 07000000 01000000 03000000
22978
22979 That is, the leftmost 4 bytes of vr10 are interpreted as the
22980 source for the rightmost 4 bytes of vr9, and so on.
22981
22982 If we change the permute control vector to
22983
22984 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
22985
22986 and issue
22987
22988 vperm 9,11,10,12
22989
22990 we get the desired
22991
22992 vr9 = 00000006 00000004 00000002 00000000. */
22993
22994 static void
22995 altivec_expand_vec_perm_const_le (rtx target, rtx op0, rtx op1,
22996 const vec_perm_indices &sel)
22997 {
22998 unsigned int i;
22999 rtx perm[16];
23000 rtx constv, unspec;
23001
23002 /* Unpack and adjust the constant selector. */
23003 for (i = 0; i < 16; ++i)
23004 {
23005 unsigned int elt = 31 - (sel[i] & 31);
23006 perm[i] = GEN_INT (elt);
23007 }
23008
23009 /* Expand to a permute, swapping the inputs and using the
23010 adjusted selector. */
23011 if (!REG_P (op0))
23012 op0 = force_reg (V16QImode, op0);
23013 if (!REG_P (op1))
23014 op1 = force_reg (V16QImode, op1);
23015
23016 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
23017 constv = force_reg (V16QImode, constv);
23018 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
23019 UNSPEC_VPERM);
23020 if (!REG_P (target))
23021 {
23022 rtx tmp = gen_reg_rtx (V16QImode);
23023 emit_move_insn (tmp, unspec);
23024 unspec = tmp;
23025 }
23026
23027 emit_move_insn (target, unspec);
23028 }
23029
23030 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
23031 permute control vector. But here it's not a constant, so we must
23032 generate a vector NAND or NOR to do the adjustment. */
23033
23034 void
23035 altivec_expand_vec_perm_le (rtx operands[4])
23036 {
23037 rtx notx, iorx, unspec;
23038 rtx target = operands[0];
23039 rtx op0 = operands[1];
23040 rtx op1 = operands[2];
23041 rtx sel = operands[3];
23042 rtx tmp = target;
23043 rtx norreg = gen_reg_rtx (V16QImode);
23044 machine_mode mode = GET_MODE (target);
23045
23046 /* Get everything in regs so the pattern matches. */
23047 if (!REG_P (op0))
23048 op0 = force_reg (mode, op0);
23049 if (!REG_P (op1))
23050 op1 = force_reg (mode, op1);
23051 if (!REG_P (sel))
23052 sel = force_reg (V16QImode, sel);
23053 if (!REG_P (target))
23054 tmp = gen_reg_rtx (mode);
23055
23056 if (TARGET_P9_VECTOR)
23057 {
23058 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, sel),
23059 UNSPEC_VPERMR);
23060 }
23061 else
23062 {
23063 /* Invert the selector with a VNAND if available, else a VNOR.
23064 The VNAND is preferred for future fusion opportunities. */
23065 notx = gen_rtx_NOT (V16QImode, sel);
23066 iorx = (TARGET_P8_VECTOR
23067 ? gen_rtx_IOR (V16QImode, notx, notx)
23068 : gen_rtx_AND (V16QImode, notx, notx));
23069 emit_insn (gen_rtx_SET (norreg, iorx));
23070
23071 /* Permute with operands reversed and adjusted selector. */
23072 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
23073 UNSPEC_VPERM);
23074 }
23075
23076 /* Copy into target, possibly by way of a register. */
23077 if (!REG_P (target))
23078 {
23079 emit_move_insn (tmp, unspec);
23080 unspec = tmp;
23081 }
23082
23083 emit_move_insn (target, unspec);
23084 }
23085
23086 /* Expand an Altivec constant permutation. Return true if we match
23087 an efficient implementation; false to fall back to VPERM.
23088
23089 OP0 and OP1 are the input vectors and TARGET is the output vector.
23090 SEL specifies the constant permutation vector. */
23091
23092 static bool
23093 altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1,
23094 const vec_perm_indices &sel)
23095 {
23096 struct altivec_perm_insn {
23097 HOST_WIDE_INT mask;
23098 enum insn_code impl;
23099 unsigned char perm[16];
23100 };
23101 static const struct altivec_perm_insn patterns[] = {
23102 {OPTION_MASK_ALTIVEC,
23103 CODE_FOR_altivec_vpkuhum_direct,
23104 {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}},
23105 {OPTION_MASK_ALTIVEC,
23106 CODE_FOR_altivec_vpkuwum_direct,
23107 {2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31}},
23108 {OPTION_MASK_ALTIVEC,
23109 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
23110 : CODE_FOR_altivec_vmrglb_direct,
23111 {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}},
23112 {OPTION_MASK_ALTIVEC,
23113 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
23114 : CODE_FOR_altivec_vmrglh_direct,
23115 {0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23}},
23116 {OPTION_MASK_ALTIVEC,
23117 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct_v4si
23118 : CODE_FOR_altivec_vmrglw_direct_v4si,
23119 {0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23}},
23120 {OPTION_MASK_ALTIVEC,
23121 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
23122 : CODE_FOR_altivec_vmrghb_direct,
23123 {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}},
23124 {OPTION_MASK_ALTIVEC,
23125 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
23126 : CODE_FOR_altivec_vmrghh_direct,
23127 {8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31}},
23128 {OPTION_MASK_ALTIVEC,
23129 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct_v4si
23130 : CODE_FOR_altivec_vmrghw_direct_v4si,
23131 {8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31}},
23132 {OPTION_MASK_P8_VECTOR,
23133 BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgew_v4sf_direct
23134 : CODE_FOR_p8_vmrgow_v4sf_direct,
23135 {0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27}},
23136 {OPTION_MASK_P8_VECTOR,
23137 BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgow_v4sf_direct
23138 : CODE_FOR_p8_vmrgew_v4sf_direct,
23139 {4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31}},
23140 {OPTION_MASK_VSX, CODE_FOR_vsx_xxpermdi_v16qi,
23141 {0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23}},
23142 {OPTION_MASK_VSX, CODE_FOR_vsx_xxpermdi_v16qi,
23143 {8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}},
23144 {OPTION_MASK_VSX, CODE_FOR_vsx_xxpermdi_v16qi,
23145 {0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31}},
23146 {OPTION_MASK_VSX, CODE_FOR_vsx_xxpermdi_v16qi,
23147 {8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31}}};
23148
23149 unsigned int i, j, elt, which;
23150 unsigned char perm[16];
23151 rtx x;
23152 bool one_vec;
23153
23154 /* Unpack the constant selector. */
23155 for (i = which = 0; i < 16; ++i)
23156 {
23157 elt = sel[i] & 31;
23158 which |= (elt < 16 ? 1 : 2);
23159 perm[i] = elt;
23160 }
23161
23162 /* Simplify the constant selector based on operands. */
23163 switch (which)
23164 {
23165 default:
23166 gcc_unreachable ();
23167
23168 case 3:
23169 one_vec = false;
23170 if (!rtx_equal_p (op0, op1))
23171 break;
23172 /* FALLTHRU */
23173
23174 case 2:
23175 for (i = 0; i < 16; ++i)
23176 perm[i] &= 15;
23177 op0 = op1;
23178 one_vec = true;
23179 break;
23180
23181 case 1:
23182 op1 = op0;
23183 one_vec = true;
23184 break;
23185 }
23186
23187 /* Look for splat patterns. */
23188 if (one_vec)
23189 {
23190 elt = perm[0];
23191
23192 for (i = 0; i < 16; ++i)
23193 if (perm[i] != elt)
23194 break;
23195 if (i == 16)
23196 {
23197 if (!BYTES_BIG_ENDIAN)
23198 elt = 15 - elt;
23199 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
23200 return true;
23201 }
23202
23203 if (elt % 2 == 0)
23204 {
23205 for (i = 0; i < 16; i += 2)
23206 if (perm[i] != elt || perm[i + 1] != elt + 1)
23207 break;
23208 if (i == 16)
23209 {
23210 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
23211 x = gen_reg_rtx (V8HImode);
23212 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
23213 GEN_INT (field)));
23214 emit_move_insn (target, gen_lowpart (V16QImode, x));
23215 return true;
23216 }
23217 }
23218
23219 if (elt % 4 == 0)
23220 {
23221 for (i = 0; i < 16; i += 4)
23222 if (perm[i] != elt
23223 || perm[i + 1] != elt + 1
23224 || perm[i + 2] != elt + 2
23225 || perm[i + 3] != elt + 3)
23226 break;
23227 if (i == 16)
23228 {
23229 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
23230 x = gen_reg_rtx (V4SImode);
23231 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
23232 GEN_INT (field)));
23233 emit_move_insn (target, gen_lowpart (V16QImode, x));
23234 return true;
23235 }
23236 }
23237 }
23238
23239 /* Look for merge and pack patterns. */
23240 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
23241 {
23242 bool swapped;
23243
23244 if ((patterns[j].mask & rs6000_isa_flags) == 0)
23245 continue;
23246
23247 elt = patterns[j].perm[0];
23248 if (perm[0] == elt)
23249 swapped = false;
23250 else if (perm[0] == elt + 16)
23251 swapped = true;
23252 else
23253 continue;
23254 for (i = 1; i < 16; ++i)
23255 {
23256 elt = patterns[j].perm[i];
23257 if (swapped)
23258 elt = (elt >= 16 ? elt - 16 : elt + 16);
23259 else if (one_vec && elt >= 16)
23260 elt -= 16;
23261 if (perm[i] != elt)
23262 break;
23263 }
23264 if (i == 16)
23265 {
23266 enum insn_code icode = patterns[j].impl;
23267 machine_mode omode = insn_data[icode].operand[0].mode;
23268 machine_mode imode = insn_data[icode].operand[1].mode;
23269
23270 rtx perm_idx = GEN_INT (0);
23271 if (icode == CODE_FOR_vsx_xxpermdi_v16qi)
23272 {
23273 int perm_val = 0;
23274 if (one_vec)
23275 {
23276 if (perm[0] == 8)
23277 perm_val |= 2;
23278 if (perm[8] == 8)
23279 perm_val |= 1;
23280 }
23281 else
23282 {
23283 if (perm[0] != 0)
23284 perm_val |= 2;
23285 if (perm[8] != 16)
23286 perm_val |= 1;
23287 }
23288 perm_idx = GEN_INT (perm_val);
23289 }
23290
23291 /* For little-endian, don't use vpkuwum and vpkuhum if the
23292 underlying vector type is not V4SI and V8HI, respectively.
23293 For example, using vpkuwum with a V8HI picks up the even
23294 halfwords (BE numbering) when the even halfwords (LE
23295 numbering) are what we need. */
23296 if (!BYTES_BIG_ENDIAN
23297 && icode == CODE_FOR_altivec_vpkuwum_direct
23298 && ((REG_P (op0)
23299 && GET_MODE (op0) != V4SImode)
23300 || (SUBREG_P (op0)
23301 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
23302 continue;
23303 if (!BYTES_BIG_ENDIAN
23304 && icode == CODE_FOR_altivec_vpkuhum_direct
23305 && ((REG_P (op0)
23306 && GET_MODE (op0) != V8HImode)
23307 || (SUBREG_P (op0)
23308 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
23309 continue;
23310
23311 /* For little-endian, the two input operands must be swapped
23312 (or swapped back) to ensure proper right-to-left numbering
23313 from 0 to 2N-1. */
23314 if (swapped ^ !BYTES_BIG_ENDIAN
23315 && icode != CODE_FOR_vsx_xxpermdi_v16qi)
23316 std::swap (op0, op1);
23317 if (imode != V16QImode)
23318 {
23319 op0 = gen_lowpart (imode, op0);
23320 op1 = gen_lowpart (imode, op1);
23321 }
23322 if (omode == V16QImode)
23323 x = target;
23324 else
23325 x = gen_reg_rtx (omode);
23326 if (icode == CODE_FOR_vsx_xxpermdi_v16qi)
23327 emit_insn (GEN_FCN (icode) (x, op0, op1, perm_idx));
23328 else
23329 emit_insn (GEN_FCN (icode) (x, op0, op1));
23330 if (omode != V16QImode)
23331 emit_move_insn (target, gen_lowpart (V16QImode, x));
23332 return true;
23333 }
23334 }
23335
23336 if (!BYTES_BIG_ENDIAN)
23337 {
23338 altivec_expand_vec_perm_const_le (target, op0, op1, sel);
23339 return true;
23340 }
23341
23342 return false;
23343 }
23344
23345 /* Expand a VSX Permute Doubleword constant permutation.
23346 Return true if we match an efficient implementation. */
23347
23348 static bool
23349 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
23350 unsigned char perm0, unsigned char perm1)
23351 {
23352 rtx x;
23353
23354 /* If both selectors come from the same operand, fold to single op. */
23355 if ((perm0 & 2) == (perm1 & 2))
23356 {
23357 if (perm0 & 2)
23358 op0 = op1;
23359 else
23360 op1 = op0;
23361 }
23362 /* If both operands are equal, fold to simpler permutation. */
23363 if (rtx_equal_p (op0, op1))
23364 {
23365 perm0 = perm0 & 1;
23366 perm1 = (perm1 & 1) + 2;
23367 }
23368 /* If the first selector comes from the second operand, swap. */
23369 else if (perm0 & 2)
23370 {
23371 if (perm1 & 2)
23372 return false;
23373 perm0 -= 2;
23374 perm1 += 2;
23375 std::swap (op0, op1);
23376 }
23377 /* If the second selector does not come from the second operand, fail. */
23378 else if ((perm1 & 2) == 0)
23379 return false;
23380
23381 /* Success! */
23382 if (target != NULL)
23383 {
23384 machine_mode vmode, dmode;
23385 rtvec v;
23386
23387 vmode = GET_MODE (target);
23388 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
23389 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4).require ();
23390 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
23391 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
23392 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
23393 emit_insn (gen_rtx_SET (target, x));
23394 }
23395 return true;
23396 }
23397
23398 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
23399
23400 static bool
23401 rs6000_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
23402 rtx op1, const vec_perm_indices &sel)
23403 {
23404 bool testing_p = !target;
23405
23406 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
23407 if (TARGET_ALTIVEC && testing_p)
23408 return true;
23409
23410 if (op0)
23411 {
23412 rtx nop0 = force_reg (vmode, op0);
23413 if (op0 == op1)
23414 op1 = nop0;
23415 op0 = nop0;
23416 }
23417 if (op1)
23418 op1 = force_reg (vmode, op1);
23419
23420 /* Check for ps_merge* or xxpermdi insns. */
23421 if ((vmode == V2DFmode || vmode == V2DImode) && VECTOR_MEM_VSX_P (vmode))
23422 {
23423 if (testing_p)
23424 {
23425 op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
23426 op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
23427 }
23428 if (rs6000_expand_vec_perm_const_1 (target, op0, op1, sel[0], sel[1]))
23429 return true;
23430 }
23431
23432 if (TARGET_ALTIVEC)
23433 {
23434 /* Force the target-independent code to lower to V16QImode. */
23435 if (vmode != V16QImode)
23436 return false;
23437 if (altivec_expand_vec_perm_const (target, op0, op1, sel))
23438 return true;
23439 }
23440
23441 return false;
23442 }
23443
23444 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave.
23445 OP0 and OP1 are the input vectors and TARGET is the output vector.
23446 PERM specifies the constant permutation vector. */
23447
23448 static void
23449 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
23450 machine_mode vmode, const vec_perm_builder &perm)
23451 {
23452 rtx x = expand_vec_perm_const (vmode, op0, op1, perm, BLKmode, target);
23453 if (x != target)
23454 emit_move_insn (target, x);
23455 }
23456
23457 /* Expand an extract even operation. */
23458
23459 void
23460 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
23461 {
23462 machine_mode vmode = GET_MODE (target);
23463 unsigned i, nelt = GET_MODE_NUNITS (vmode);
23464 vec_perm_builder perm (nelt, nelt, 1);
23465
23466 for (i = 0; i < nelt; i++)
23467 perm.quick_push (i * 2);
23468
23469 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
23470 }
23471
23472 /* Expand a vector interleave operation. */
23473
23474 void
23475 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
23476 {
23477 machine_mode vmode = GET_MODE (target);
23478 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
23479 vec_perm_builder perm (nelt, nelt, 1);
23480
23481 high = (highp ? 0 : nelt / 2);
23482 for (i = 0; i < nelt / 2; i++)
23483 {
23484 perm.quick_push (i + high);
23485 perm.quick_push (i + nelt + high);
23486 }
23487
23488 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
23489 }
23490
23491 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
23492 void
23493 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
23494 {
23495 HOST_WIDE_INT hwi_scale (scale);
23496 REAL_VALUE_TYPE r_pow;
23497 rtvec v = rtvec_alloc (2);
23498 rtx elt;
23499 rtx scale_vec = gen_reg_rtx (V2DFmode);
23500 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
23501 elt = const_double_from_real_value (r_pow, DFmode);
23502 RTVEC_ELT (v, 0) = elt;
23503 RTVEC_ELT (v, 1) = elt;
23504 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
23505 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
23506 }
23507
23508 /* Return an RTX representing where to find the function value of a
23509 function returning MODE. */
23510 static rtx
23511 rs6000_complex_function_value (machine_mode mode)
23512 {
23513 unsigned int regno;
23514 rtx r1, r2;
23515 machine_mode inner = GET_MODE_INNER (mode);
23516 unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode);
23517
23518 if (TARGET_FLOAT128_TYPE
23519 && (mode == KCmode
23520 || (mode == TCmode && TARGET_IEEEQUAD)))
23521 regno = ALTIVEC_ARG_RETURN;
23522
23523 else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
23524 regno = FP_ARG_RETURN;
23525
23526 else
23527 {
23528 regno = GP_ARG_RETURN;
23529
23530 /* 32-bit is OK since it'll go in r3/r4. */
23531 if (TARGET_32BIT && inner_bytes >= 4)
23532 return gen_rtx_REG (mode, regno);
23533 }
23534
23535 if (inner_bytes >= 8)
23536 return gen_rtx_REG (mode, regno);
23537
23538 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
23539 const0_rtx);
23540 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
23541 GEN_INT (inner_bytes));
23542 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
23543 }
23544
23545 /* Return an rtx describing a return value of MODE as a PARALLEL
23546 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
23547 stride REG_STRIDE. */
23548
23549 static rtx
23550 rs6000_parallel_return (machine_mode mode,
23551 int n_elts, machine_mode elt_mode,
23552 unsigned int regno, unsigned int reg_stride)
23553 {
23554 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
23555
23556 int i;
23557 for (i = 0; i < n_elts; i++)
23558 {
23559 rtx r = gen_rtx_REG (elt_mode, regno);
23560 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
23561 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
23562 regno += reg_stride;
23563 }
23564
23565 return par;
23566 }
23567
23568 /* Target hook for TARGET_FUNCTION_VALUE.
23569
23570 An integer value is in r3 and a floating-point value is in fp1,
23571 unless -msoft-float. */
23572
23573 static rtx
23574 rs6000_function_value (const_tree valtype,
23575 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
23576 bool outgoing ATTRIBUTE_UNUSED)
23577 {
23578 machine_mode mode;
23579 unsigned int regno;
23580 machine_mode elt_mode;
23581 int n_elts;
23582
23583 /* Special handling for structs in darwin64. */
23584 if (TARGET_MACHO
23585 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
23586 {
23587 CUMULATIVE_ARGS valcum;
23588 rtx valret;
23589
23590 valcum.words = 0;
23591 valcum.fregno = FP_ARG_MIN_REG;
23592 valcum.vregno = ALTIVEC_ARG_MIN_REG;
23593 /* Do a trial code generation as if this were going to be passed as
23594 an argument; if any part goes in memory, we return NULL. */
23595 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
23596 if (valret)
23597 return valret;
23598 /* Otherwise fall through to standard ABI rules. */
23599 }
23600
23601 mode = TYPE_MODE (valtype);
23602
23603 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
23604 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
23605 {
23606 int first_reg, n_regs;
23607
23608 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode))
23609 {
23610 /* _Decimal128 must use even/odd register pairs. */
23611 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
23612 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
23613 }
23614 else
23615 {
23616 first_reg = ALTIVEC_ARG_RETURN;
23617 n_regs = 1;
23618 }
23619
23620 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
23621 }
23622
23623 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
23624 if (TARGET_32BIT && TARGET_POWERPC64)
23625 switch (mode)
23626 {
23627 default:
23628 break;
23629 case E_DImode:
23630 case E_SCmode:
23631 case E_DCmode:
23632 case E_TCmode:
23633 int count = GET_MODE_SIZE (mode) / 4;
23634 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
23635 }
23636
23637 if ((INTEGRAL_TYPE_P (valtype)
23638 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
23639 || POINTER_TYPE_P (valtype))
23640 mode = TARGET_32BIT ? SImode : DImode;
23641
23642 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
23643 /* _Decimal128 must use an even/odd register pair. */
23644 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
23645 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT
23646 && !FLOAT128_VECTOR_P (mode))
23647 regno = FP_ARG_RETURN;
23648 else if (TREE_CODE (valtype) == COMPLEX_TYPE
23649 && targetm.calls.split_complex_arg)
23650 return rs6000_complex_function_value (mode);
23651 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
23652 return register is used in both cases, and we won't see V2DImode/V2DFmode
23653 for pure altivec, combine the two cases. */
23654 else if ((TREE_CODE (valtype) == VECTOR_TYPE || VECTOR_ALIGNMENT_P (mode))
23655 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
23656 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
23657 regno = ALTIVEC_ARG_RETURN;
23658 else
23659 regno = GP_ARG_RETURN;
23660
23661 return gen_rtx_REG (mode, regno);
23662 }
23663
23664 /* Define how to find the value returned by a library function
23665 assuming the value has mode MODE. */
23666 rtx
23667 rs6000_libcall_value (machine_mode mode)
23668 {
23669 unsigned int regno;
23670
23671 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
23672 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
23673 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
23674
23675 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
23676 /* _Decimal128 must use an even/odd register pair. */
23677 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
23678 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && TARGET_HARD_FLOAT)
23679 regno = FP_ARG_RETURN;
23680 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
23681 return register is used in both cases, and we won't see V2DImode/V2DFmode
23682 for pure altivec, combine the two cases. */
23683 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
23684 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
23685 regno = ALTIVEC_ARG_RETURN;
23686 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
23687 return rs6000_complex_function_value (mode);
23688 else
23689 regno = GP_ARG_RETURN;
23690
23691 return gen_rtx_REG (mode, regno);
23692 }
23693
23694 /* Compute register pressure classes. We implement the target hook to avoid
23695 IRA picking something like GEN_OR_FLOAT_REGS as a pressure class, which can
23696 lead to incorrect estimates of number of available registers and therefor
23697 increased register pressure/spill. */
23698 static int
23699 rs6000_compute_pressure_classes (enum reg_class *pressure_classes)
23700 {
23701 int n;
23702
23703 n = 0;
23704 pressure_classes[n++] = GENERAL_REGS;
23705 if (TARGET_ALTIVEC)
23706 pressure_classes[n++] = ALTIVEC_REGS;
23707 if (TARGET_VSX)
23708 pressure_classes[n++] = VSX_REGS;
23709 else
23710 {
23711 if (TARGET_HARD_FLOAT)
23712 pressure_classes[n++] = FLOAT_REGS;
23713 }
23714 pressure_classes[n++] = CR_REGS;
23715 pressure_classes[n++] = SPECIAL_REGS;
23716
23717 return n;
23718 }
23719
23720 /* Given FROM and TO register numbers, say whether this elimination is allowed.
23721 Frame pointer elimination is automatically handled.
23722
23723 For the RS/6000, if frame pointer elimination is being done, we would like
23724 to convert ap into fp, not sp.
23725
23726 We need r30 if -mminimal-toc was specified, and there are constant pool
23727 references. */
23728
23729 static bool
23730 rs6000_can_eliminate (const int from, const int to)
23731 {
23732 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
23733 ? ! frame_pointer_needed
23734 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
23735 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC_OR_PCREL
23736 || constant_pool_empty_p ()
23737 : true);
23738 }
23739
23740 /* Define the offset between two registers, FROM to be eliminated and its
23741 replacement TO, at the start of a routine. */
23742 HOST_WIDE_INT
23743 rs6000_initial_elimination_offset (int from, int to)
23744 {
23745 rs6000_stack_t *info = rs6000_stack_info ();
23746 HOST_WIDE_INT offset;
23747
23748 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
23749 offset = info->push_p ? 0 : -info->total_size;
23750 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
23751 {
23752 offset = info->push_p ? 0 : -info->total_size;
23753 if (FRAME_GROWS_DOWNWARD)
23754 offset += info->fixed_size + info->vars_size + info->parm_size;
23755 }
23756 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
23757 offset = FRAME_GROWS_DOWNWARD
23758 ? info->fixed_size + info->vars_size + info->parm_size
23759 : 0;
23760 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
23761 offset = info->total_size;
23762 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
23763 offset = info->push_p ? info->total_size : 0;
23764 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
23765 offset = 0;
23766 else
23767 gcc_unreachable ();
23768
23769 return offset;
23770 }
23771
23772 /* Fill in sizes of registers used by unwinder. */
23773
23774 static void
23775 rs6000_init_dwarf_reg_sizes_extra (tree address)
23776 {
23777 if (TARGET_MACHO && ! TARGET_ALTIVEC)
23778 {
23779 int i;
23780 machine_mode mode = TYPE_MODE (char_type_node);
23781 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
23782 rtx mem = gen_rtx_MEM (BLKmode, addr);
23783 rtx value = gen_int_mode (16, mode);
23784
23785 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
23786 The unwinder still needs to know the size of Altivec registers. */
23787
23788 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
23789 {
23790 int column = DWARF_REG_TO_UNWIND_COLUMN
23791 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
23792 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
23793
23794 emit_move_insn (adjust_address (mem, mode, offset), value);
23795 }
23796 }
23797 }
23798
23799 /* Map internal gcc register numbers to debug format register numbers.
23800 FORMAT specifies the type of debug register number to use:
23801 0 -- debug information, except for frame-related sections
23802 1 -- DWARF .debug_frame section
23803 2 -- DWARF .eh_frame section */
23804
23805 unsigned int
23806 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
23807 {
23808 /* On some platforms, we use the standard DWARF register
23809 numbering for .debug_info and .debug_frame. */
23810 if ((format == 0 && dwarf_debuginfo_p ()) || format == 1)
23811 {
23812 #ifdef RS6000_USE_DWARF_NUMBERING
23813 if (regno <= 31)
23814 return regno;
23815 if (FP_REGNO_P (regno))
23816 return regno - FIRST_FPR_REGNO + 32;
23817 if (ALTIVEC_REGNO_P (regno))
23818 return regno - FIRST_ALTIVEC_REGNO + 1124;
23819 if (regno == LR_REGNO)
23820 return 108;
23821 if (regno == CTR_REGNO)
23822 return 109;
23823 if (regno == CA_REGNO)
23824 return 101; /* XER */
23825 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
23826 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
23827 The actual code emitted saves the whole of CR, so we map CR2_REGNO
23828 to the DWARF reg for CR. */
23829 if (format == 1 && regno == CR2_REGNO)
23830 return 64;
23831 if (CR_REGNO_P (regno))
23832 return regno - CR0_REGNO + 86;
23833 if (regno == VRSAVE_REGNO)
23834 return 356;
23835 if (regno == VSCR_REGNO)
23836 return 67;
23837
23838 /* These do not make much sense. */
23839 if (regno == FRAME_POINTER_REGNUM)
23840 return 111;
23841 if (regno == ARG_POINTER_REGNUM)
23842 return 67;
23843 if (regno == 64)
23844 return 100;
23845
23846 gcc_unreachable ();
23847 #endif
23848 }
23849
23850 /* We use the GCC 7 (and before) internal number for non-DWARF debug
23851 information, and also for .eh_frame. */
23852 /* Translate the regnos to their numbers in GCC 7 (and before). */
23853 if (regno <= 31)
23854 return regno;
23855 if (FP_REGNO_P (regno))
23856 return regno - FIRST_FPR_REGNO + 32;
23857 if (ALTIVEC_REGNO_P (regno))
23858 return regno - FIRST_ALTIVEC_REGNO + 77;
23859 if (regno == LR_REGNO)
23860 return 65;
23861 if (regno == CTR_REGNO)
23862 return 66;
23863 if (regno == CA_REGNO)
23864 return 76; /* XER */
23865 if (CR_REGNO_P (regno))
23866 return regno - CR0_REGNO + 68;
23867 if (regno == VRSAVE_REGNO)
23868 return 109;
23869 if (regno == VSCR_REGNO)
23870 return 110;
23871
23872 if (regno == FRAME_POINTER_REGNUM)
23873 return 111;
23874 if (regno == ARG_POINTER_REGNUM)
23875 return 67;
23876 if (regno == 64)
23877 return 64;
23878
23879 gcc_unreachable ();
23880 }
23881
23882 /* target hook eh_return_filter_mode */
23883 static scalar_int_mode
23884 rs6000_eh_return_filter_mode (void)
23885 {
23886 return TARGET_32BIT ? SImode : word_mode;
23887 }
23888
23889 /* Target hook for translate_mode_attribute. */
23890 static machine_mode
23891 rs6000_translate_mode_attribute (machine_mode mode)
23892 {
23893 if ((FLOAT128_IEEE_P (mode)
23894 && ieee128_float_type_node == long_double_type_node)
23895 || (FLOAT128_IBM_P (mode)
23896 && ibm128_float_type_node == long_double_type_node))
23897 return COMPLEX_MODE_P (mode) ? E_TCmode : E_TFmode;
23898 return mode;
23899 }
23900
23901 /* Target hook for scalar_mode_supported_p. */
23902 static bool
23903 rs6000_scalar_mode_supported_p (scalar_mode mode)
23904 {
23905 /* -m32 does not support TImode. This is the default, from
23906 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
23907 same ABI as for -m32. But default_scalar_mode_supported_p allows
23908 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
23909 for -mpowerpc64. */
23910 if (TARGET_32BIT && mode == TImode)
23911 return false;
23912
23913 if (DECIMAL_FLOAT_MODE_P (mode))
23914 return default_decimal_float_supported_p ();
23915 else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode))
23916 return true;
23917 else
23918 return default_scalar_mode_supported_p (mode);
23919 }
23920
23921 /* Target hook for libgcc_floating_mode_supported_p. */
23922
23923 static bool
23924 rs6000_libgcc_floating_mode_supported_p (scalar_float_mode mode)
23925 {
23926 switch (mode)
23927 {
23928 case E_SFmode:
23929 case E_DFmode:
23930 case E_TFmode:
23931 return true;
23932
23933 /* We only return true for KFmode if IEEE 128-bit types are supported, and
23934 if long double does not use the IEEE 128-bit format. If long double
23935 uses the IEEE 128-bit format, it will use TFmode and not KFmode.
23936 Because the code will not use KFmode in that case, there will be aborts
23937 because it can't find KFmode in the Floatn types. */
23938 case E_KFmode:
23939 return TARGET_FLOAT128_TYPE && !TARGET_IEEEQUAD;
23940
23941 default:
23942 return false;
23943 }
23944 }
23945
23946 /* Target hook for vector_mode_supported_p. */
23947 static bool
23948 rs6000_vector_mode_supported_p (machine_mode mode)
23949 {
23950 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
23951 128-bit, the compiler might try to widen IEEE 128-bit to IBM
23952 double-double. */
23953 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode))
23954 return true;
23955
23956 else
23957 return false;
23958 }
23959
23960 /* Target hook for floatn_mode. */
23961 static opt_scalar_float_mode
23962 rs6000_floatn_mode (int n, bool extended)
23963 {
23964 if (extended)
23965 {
23966 switch (n)
23967 {
23968 case 32:
23969 return DFmode;
23970
23971 case 64:
23972 if (TARGET_FLOAT128_TYPE)
23973 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
23974 else
23975 return opt_scalar_float_mode ();
23976
23977 case 128:
23978 return opt_scalar_float_mode ();
23979
23980 default:
23981 /* Those are the only valid _FloatNx types. */
23982 gcc_unreachable ();
23983 }
23984 }
23985 else
23986 {
23987 switch (n)
23988 {
23989 case 32:
23990 return SFmode;
23991
23992 case 64:
23993 return DFmode;
23994
23995 case 128:
23996 if (TARGET_FLOAT128_TYPE)
23997 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
23998 else
23999 return opt_scalar_float_mode ();
24000
24001 default:
24002 return opt_scalar_float_mode ();
24003 }
24004 }
24005
24006 }
24007
24008 /* Target hook for c_mode_for_suffix. */
24009 static machine_mode
24010 rs6000_c_mode_for_suffix (char suffix)
24011 {
24012 if (TARGET_FLOAT128_TYPE)
24013 {
24014 if (suffix == 'q' || suffix == 'Q')
24015 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
24016
24017 /* At the moment, we are not defining a suffix for IBM extended double.
24018 If/when the default for -mabi=ieeelongdouble is changed, and we want
24019 to support __ibm128 constants in legacy library code, we may need to
24020 re-evalaute this decision. Currently, c-lex.cc only supports 'w' and
24021 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
24022 __float80 constants. */
24023 }
24024
24025 return VOIDmode;
24026 }
24027
24028 /* Target hook for invalid_arg_for_unprototyped_fn. */
24029 static const char *
24030 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
24031 {
24032 return (!rs6000_darwin64_abi
24033 && typelist == 0
24034 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
24035 && (funcdecl == NULL_TREE
24036 || (TREE_CODE (funcdecl) == FUNCTION_DECL
24037 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
24038 ? N_("AltiVec argument passed to unprototyped function")
24039 : NULL;
24040 }
24041
24042 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
24043 setup by using __stack_chk_fail_local hidden function instead of
24044 calling __stack_chk_fail directly. Otherwise it is better to call
24045 __stack_chk_fail directly. */
24046
24047 static tree ATTRIBUTE_UNUSED
24048 rs6000_stack_protect_fail (void)
24049 {
24050 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
24051 ? default_hidden_stack_protect_fail ()
24052 : default_external_stack_protect_fail ();
24053 }
24054
24055 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
24056
24057 #if TARGET_ELF
24058 static unsigned HOST_WIDE_INT
24059 rs6000_asan_shadow_offset (void)
24060 {
24061 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
24062 }
24063 #endif
24064 \f
24065 /* Mask options that we want to support inside of attribute((target)) and
24066 #pragma GCC target operations. Note, we do not include things like
24067 64/32-bit, endianness, hard/soft floating point, etc. that would have
24068 different calling sequences. */
24069
24070 struct rs6000_opt_mask {
24071 const char *name; /* option name */
24072 HOST_WIDE_INT mask; /* mask to set */
24073 bool invert; /* invert sense of mask */
24074 bool valid_target; /* option is a target option */
24075 };
24076
24077 static struct rs6000_opt_mask const rs6000_opt_masks[] =
24078 {
24079 { "altivec", OPTION_MASK_ALTIVEC, false, true },
24080 { "block-ops-unaligned-vsx", OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX,
24081 false, true },
24082 { "block-ops-vector-pair", OPTION_MASK_BLOCK_OPS_VECTOR_PAIR,
24083 false, true },
24084 { "cmpb", OPTION_MASK_CMPB, false, true },
24085 { "crypto", OPTION_MASK_CRYPTO, false, true },
24086 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
24087 { "dlmzb", OPTION_MASK_DLMZB, false, true },
24088 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
24089 false, true },
24090 { "float128", OPTION_MASK_FLOAT128_KEYWORD, false, true },
24091 { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, true },
24092 { "fprnd", OPTION_MASK_FPRND, false, true },
24093 { "power10", OPTION_MASK_POWER10, false, true },
24094 { "hard-dfp", OPTION_MASK_DFP, false, true },
24095 { "htm", OPTION_MASK_HTM, false, true },
24096 { "isel", OPTION_MASK_ISEL, false, true },
24097 { "mfcrf", OPTION_MASK_MFCRF, false, true },
24098 { "mfpgpr", 0, false, true },
24099 { "mma", OPTION_MASK_MMA, false, true },
24100 { "modulo", OPTION_MASK_MODULO, false, true },
24101 { "mulhw", OPTION_MASK_MULHW, false, true },
24102 { "multiple", OPTION_MASK_MULTIPLE, false, true },
24103 { "pcrel", OPTION_MASK_PCREL, false, true },
24104 { "pcrel-opt", OPTION_MASK_PCREL_OPT, false, true },
24105 { "popcntb", OPTION_MASK_POPCNTB, false, true },
24106 { "popcntd", OPTION_MASK_POPCNTD, false, true },
24107 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
24108 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
24109 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
24110 { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true },
24111 { "power9-misc", OPTION_MASK_P9_MISC, false, true },
24112 { "power9-vector", OPTION_MASK_P9_VECTOR, false, true },
24113 { "power10-fusion", OPTION_MASK_P10_FUSION, false, true },
24114 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
24115 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
24116 { "prefixed", OPTION_MASK_PREFIXED, false, true },
24117 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
24118 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
24119 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
24120 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
24121 { "string", 0, false, true },
24122 { "update", OPTION_MASK_NO_UPDATE, true , true },
24123 { "vsx", OPTION_MASK_VSX, false, true },
24124 #ifdef OPTION_MASK_64BIT
24125 #if TARGET_AIX_OS
24126 { "aix64", OPTION_MASK_64BIT, false, false },
24127 { "aix32", OPTION_MASK_64BIT, true, false },
24128 #else
24129 { "64", OPTION_MASK_64BIT, false, false },
24130 { "32", OPTION_MASK_64BIT, true, false },
24131 #endif
24132 #endif
24133 #ifdef OPTION_MASK_EABI
24134 { "eabi", OPTION_MASK_EABI, false, false },
24135 #endif
24136 #ifdef OPTION_MASK_LITTLE_ENDIAN
24137 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
24138 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
24139 #endif
24140 #ifdef OPTION_MASK_RELOCATABLE
24141 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
24142 #endif
24143 #ifdef OPTION_MASK_STRICT_ALIGN
24144 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
24145 #endif
24146 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
24147 { "string", 0, false, false },
24148 };
24149
24150 /* Builtin mask mapping for printing the flags. */
24151 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
24152 {
24153 { "altivec", RS6000_BTM_ALTIVEC, false, false },
24154 { "vsx", RS6000_BTM_VSX, false, false },
24155 { "fre", RS6000_BTM_FRE, false, false },
24156 { "fres", RS6000_BTM_FRES, false, false },
24157 { "frsqrte", RS6000_BTM_FRSQRTE, false, false },
24158 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
24159 { "popcntd", RS6000_BTM_POPCNTD, false, false },
24160 { "cell", RS6000_BTM_CELL, false, false },
24161 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
24162 { "power9-vector", RS6000_BTM_P9_VECTOR, false, false },
24163 { "power9-misc", RS6000_BTM_P9_MISC, false, false },
24164 { "crypto", RS6000_BTM_CRYPTO, false, false },
24165 { "htm", RS6000_BTM_HTM, false, false },
24166 { "hard-dfp", RS6000_BTM_DFP, false, false },
24167 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
24168 { "long-double-128", RS6000_BTM_LDBL128, false, false },
24169 { "powerpc64", RS6000_BTM_POWERPC64, false, false },
24170 { "float128", RS6000_BTM_FLOAT128, false, false },
24171 { "float128-hw", RS6000_BTM_FLOAT128_HW,false, false },
24172 { "mma", RS6000_BTM_MMA, false, false },
24173 { "power10", RS6000_BTM_P10, false, false },
24174 };
24175
24176 /* Option variables that we want to support inside attribute((target)) and
24177 #pragma GCC target operations. */
24178
24179 struct rs6000_opt_var {
24180 const char *name; /* option name */
24181 size_t global_offset; /* offset of the option in global_options. */
24182 size_t target_offset; /* offset of the option in target options. */
24183 };
24184
24185 static struct rs6000_opt_var const rs6000_opt_vars[] =
24186 {
24187 { "friz",
24188 offsetof (struct gcc_options, x_TARGET_FRIZ),
24189 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
24190 { "avoid-indexed-addresses",
24191 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
24192 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
24193 { "longcall",
24194 offsetof (struct gcc_options, x_rs6000_default_long_calls),
24195 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
24196 { "optimize-swaps",
24197 offsetof (struct gcc_options, x_rs6000_optimize_swaps),
24198 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
24199 { "allow-movmisalign",
24200 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
24201 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
24202 { "sched-groups",
24203 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
24204 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
24205 { "always-hint",
24206 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
24207 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
24208 { "align-branch-targets",
24209 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
24210 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
24211 { "sched-prolog",
24212 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
24213 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
24214 { "sched-epilog",
24215 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
24216 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
24217 { "speculate-indirect-jumps",
24218 offsetof (struct gcc_options, x_rs6000_speculate_indirect_jumps),
24219 offsetof (struct cl_target_option, x_rs6000_speculate_indirect_jumps), },
24220 };
24221
24222 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
24223 parsing. Return true if there were no errors. */
24224
24225 static bool
24226 rs6000_inner_target_options (tree args, bool attr_p)
24227 {
24228 bool ret = true;
24229
24230 if (args == NULL_TREE)
24231 ;
24232
24233 else if (TREE_CODE (args) == STRING_CST)
24234 {
24235 char *p = ASTRDUP (TREE_STRING_POINTER (args));
24236 char *q;
24237
24238 while ((q = strtok (p, ",")) != NULL)
24239 {
24240 bool error_p = false;
24241 bool not_valid_p = false;
24242 const char *cpu_opt = NULL;
24243
24244 p = NULL;
24245 if (startswith (q, "cpu="))
24246 {
24247 int cpu_index = rs6000_cpu_name_lookup (q+4);
24248 if (cpu_index >= 0)
24249 rs6000_cpu_index = cpu_index;
24250 else
24251 {
24252 error_p = true;
24253 cpu_opt = q+4;
24254 }
24255 }
24256 else if (startswith (q, "tune="))
24257 {
24258 int tune_index = rs6000_cpu_name_lookup (q+5);
24259 if (tune_index >= 0)
24260 rs6000_tune_index = tune_index;
24261 else
24262 {
24263 error_p = true;
24264 cpu_opt = q+5;
24265 }
24266 }
24267 else
24268 {
24269 size_t i;
24270 bool invert = false;
24271 char *r = q;
24272
24273 error_p = true;
24274 if (startswith (r, "no-"))
24275 {
24276 invert = true;
24277 r += 3;
24278 }
24279
24280 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
24281 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
24282 {
24283 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
24284
24285 if (!rs6000_opt_masks[i].valid_target)
24286 not_valid_p = true;
24287 else
24288 {
24289 error_p = false;
24290 rs6000_isa_flags_explicit |= mask;
24291
24292 /* VSX needs altivec, so -mvsx automagically sets
24293 altivec and disables -mavoid-indexed-addresses. */
24294 if (!invert)
24295 {
24296 if (mask == OPTION_MASK_VSX)
24297 {
24298 mask |= OPTION_MASK_ALTIVEC;
24299 TARGET_AVOID_XFORM = 0;
24300 }
24301 }
24302
24303 if (rs6000_opt_masks[i].invert)
24304 invert = !invert;
24305
24306 if (invert)
24307 rs6000_isa_flags &= ~mask;
24308 else
24309 rs6000_isa_flags |= mask;
24310 }
24311 break;
24312 }
24313
24314 if (error_p && !not_valid_p)
24315 {
24316 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
24317 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
24318 {
24319 size_t j = rs6000_opt_vars[i].global_offset;
24320 *((int *) ((char *)&global_options + j)) = !invert;
24321 error_p = false;
24322 not_valid_p = false;
24323 break;
24324 }
24325 }
24326 }
24327
24328 if (error_p)
24329 {
24330 const char *eprefix, *esuffix;
24331
24332 ret = false;
24333 if (attr_p)
24334 {
24335 eprefix = "__attribute__((__target__(";
24336 esuffix = ")))";
24337 }
24338 else
24339 {
24340 eprefix = "#pragma GCC target ";
24341 esuffix = "";
24342 }
24343
24344 if (cpu_opt)
24345 error ("invalid cpu %qs for %s%qs%s", cpu_opt, eprefix,
24346 q, esuffix);
24347 else if (not_valid_p)
24348 error ("%s%qs%s is not allowed", eprefix, q, esuffix);
24349 else
24350 error ("%s%qs%s is invalid", eprefix, q, esuffix);
24351 }
24352 }
24353 }
24354
24355 else if (TREE_CODE (args) == TREE_LIST)
24356 {
24357 do
24358 {
24359 tree value = TREE_VALUE (args);
24360 if (value)
24361 {
24362 bool ret2 = rs6000_inner_target_options (value, attr_p);
24363 if (!ret2)
24364 ret = false;
24365 }
24366 args = TREE_CHAIN (args);
24367 }
24368 while (args != NULL_TREE);
24369 }
24370
24371 else
24372 {
24373 error ("attribute %<target%> argument not a string");
24374 return false;
24375 }
24376
24377 return ret;
24378 }
24379
24380 /* Print out the target options as a list for -mdebug=target. */
24381
24382 static void
24383 rs6000_debug_target_options (tree args, const char *prefix)
24384 {
24385 if (args == NULL_TREE)
24386 fprintf (stderr, "%s<NULL>", prefix);
24387
24388 else if (TREE_CODE (args) == STRING_CST)
24389 {
24390 char *p = ASTRDUP (TREE_STRING_POINTER (args));
24391 char *q;
24392
24393 while ((q = strtok (p, ",")) != NULL)
24394 {
24395 p = NULL;
24396 fprintf (stderr, "%s\"%s\"", prefix, q);
24397 prefix = ", ";
24398 }
24399 }
24400
24401 else if (TREE_CODE (args) == TREE_LIST)
24402 {
24403 do
24404 {
24405 tree value = TREE_VALUE (args);
24406 if (value)
24407 {
24408 rs6000_debug_target_options (value, prefix);
24409 prefix = ", ";
24410 }
24411 args = TREE_CHAIN (args);
24412 }
24413 while (args != NULL_TREE);
24414 }
24415
24416 else
24417 gcc_unreachable ();
24418
24419 return;
24420 }
24421
24422 \f
24423 /* Hook to validate attribute((target("..."))). */
24424
24425 static bool
24426 rs6000_valid_attribute_p (tree fndecl,
24427 tree ARG_UNUSED (name),
24428 tree args,
24429 int flags)
24430 {
24431 struct cl_target_option cur_target;
24432 bool ret;
24433 tree old_optimize;
24434 tree new_target, new_optimize;
24435 tree func_optimize;
24436
24437 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
24438
24439 if (TARGET_DEBUG_TARGET)
24440 {
24441 tree tname = DECL_NAME (fndecl);
24442 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
24443 if (tname)
24444 fprintf (stderr, "function: %.*s\n",
24445 (int) IDENTIFIER_LENGTH (tname),
24446 IDENTIFIER_POINTER (tname));
24447 else
24448 fprintf (stderr, "function: unknown\n");
24449
24450 fprintf (stderr, "args:");
24451 rs6000_debug_target_options (args, " ");
24452 fprintf (stderr, "\n");
24453
24454 if (flags)
24455 fprintf (stderr, "flags: 0x%x\n", flags);
24456
24457 fprintf (stderr, "--------------------\n");
24458 }
24459
24460 /* attribute((target("default"))) does nothing, beyond
24461 affecting multi-versioning. */
24462 if (TREE_VALUE (args)
24463 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
24464 && TREE_CHAIN (args) == NULL_TREE
24465 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
24466 return true;
24467
24468 old_optimize = build_optimization_node (&global_options,
24469 &global_options_set);
24470 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
24471
24472 /* If the function changed the optimization levels as well as setting target
24473 options, start with the optimizations specified. */
24474 if (func_optimize && func_optimize != old_optimize)
24475 cl_optimization_restore (&global_options, &global_options_set,
24476 TREE_OPTIMIZATION (func_optimize));
24477
24478 /* The target attributes may also change some optimization flags, so update
24479 the optimization options if necessary. */
24480 cl_target_option_save (&cur_target, &global_options, &global_options_set);
24481 rs6000_cpu_index = rs6000_tune_index = -1;
24482 ret = rs6000_inner_target_options (args, true);
24483
24484 /* Set up any additional state. */
24485 if (ret)
24486 {
24487 ret = rs6000_option_override_internal (false);
24488 new_target = build_target_option_node (&global_options,
24489 &global_options_set);
24490 }
24491 else
24492 new_target = NULL;
24493
24494 new_optimize = build_optimization_node (&global_options,
24495 &global_options_set);
24496
24497 if (!new_target)
24498 ret = false;
24499
24500 else if (fndecl)
24501 {
24502 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
24503
24504 if (old_optimize != new_optimize)
24505 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
24506 }
24507
24508 cl_target_option_restore (&global_options, &global_options_set, &cur_target);
24509
24510 if (old_optimize != new_optimize)
24511 cl_optimization_restore (&global_options, &global_options_set,
24512 TREE_OPTIMIZATION (old_optimize));
24513
24514 return ret;
24515 }
24516
24517 \f
24518 /* Hook to validate the current #pragma GCC target and set the state, and
24519 update the macros based on what was changed. If ARGS is NULL, then
24520 POP_TARGET is used to reset the options. */
24521
24522 bool
24523 rs6000_pragma_target_parse (tree args, tree pop_target)
24524 {
24525 tree prev_tree = build_target_option_node (&global_options,
24526 &global_options_set);
24527 tree cur_tree;
24528 struct cl_target_option *prev_opt, *cur_opt;
24529 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
24530 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
24531
24532 if (TARGET_DEBUG_TARGET)
24533 {
24534 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
24535 fprintf (stderr, "args:");
24536 rs6000_debug_target_options (args, " ");
24537 fprintf (stderr, "\n");
24538
24539 if (pop_target)
24540 {
24541 fprintf (stderr, "pop_target:\n");
24542 debug_tree (pop_target);
24543 }
24544 else
24545 fprintf (stderr, "pop_target: <NULL>\n");
24546
24547 fprintf (stderr, "--------------------\n");
24548 }
24549
24550 if (! args)
24551 {
24552 cur_tree = ((pop_target)
24553 ? pop_target
24554 : target_option_default_node);
24555 cl_target_option_restore (&global_options, &global_options_set,
24556 TREE_TARGET_OPTION (cur_tree));
24557 }
24558 else
24559 {
24560 rs6000_cpu_index = rs6000_tune_index = -1;
24561 if (!rs6000_inner_target_options (args, false)
24562 || !rs6000_option_override_internal (false)
24563 || (cur_tree = build_target_option_node (&global_options,
24564 &global_options_set))
24565 == NULL_TREE)
24566 {
24567 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
24568 fprintf (stderr, "invalid pragma\n");
24569
24570 return false;
24571 }
24572 }
24573
24574 target_option_current_node = cur_tree;
24575 rs6000_activate_target_options (target_option_current_node);
24576
24577 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
24578 change the macros that are defined. */
24579 if (rs6000_target_modify_macros_ptr)
24580 {
24581 prev_opt = TREE_TARGET_OPTION (prev_tree);
24582 prev_bumask = prev_opt->x_rs6000_builtin_mask;
24583 prev_flags = prev_opt->x_rs6000_isa_flags;
24584
24585 cur_opt = TREE_TARGET_OPTION (cur_tree);
24586 cur_flags = cur_opt->x_rs6000_isa_flags;
24587 cur_bumask = cur_opt->x_rs6000_builtin_mask;
24588
24589 diff_bumask = (prev_bumask ^ cur_bumask);
24590 diff_flags = (prev_flags ^ cur_flags);
24591
24592 if ((diff_flags != 0) || (diff_bumask != 0))
24593 {
24594 /* Delete old macros. */
24595 rs6000_target_modify_macros_ptr (false,
24596 prev_flags & diff_flags,
24597 prev_bumask & diff_bumask);
24598
24599 /* Define new macros. */
24600 rs6000_target_modify_macros_ptr (true,
24601 cur_flags & diff_flags,
24602 cur_bumask & diff_bumask);
24603 }
24604 }
24605
24606 return true;
24607 }
24608
24609 \f
24610 /* Remember the last target of rs6000_set_current_function. */
24611 static GTY(()) tree rs6000_previous_fndecl;
24612
24613 /* Restore target's globals from NEW_TREE and invalidate the
24614 rs6000_previous_fndecl cache. */
24615
24616 void
24617 rs6000_activate_target_options (tree new_tree)
24618 {
24619 cl_target_option_restore (&global_options, &global_options_set,
24620 TREE_TARGET_OPTION (new_tree));
24621 if (TREE_TARGET_GLOBALS (new_tree))
24622 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
24623 else if (new_tree == target_option_default_node)
24624 restore_target_globals (&default_target_globals);
24625 else
24626 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
24627 rs6000_previous_fndecl = NULL_TREE;
24628 }
24629
24630 /* Establish appropriate back-end context for processing the function
24631 FNDECL. The argument might be NULL to indicate processing at top
24632 level, outside of any function scope. */
24633 static void
24634 rs6000_set_current_function (tree fndecl)
24635 {
24636 if (TARGET_DEBUG_TARGET)
24637 {
24638 fprintf (stderr, "\n==================== rs6000_set_current_function");
24639
24640 if (fndecl)
24641 fprintf (stderr, ", fndecl %s (%p)",
24642 (DECL_NAME (fndecl)
24643 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
24644 : "<unknown>"), (void *)fndecl);
24645
24646 if (rs6000_previous_fndecl)
24647 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
24648
24649 fprintf (stderr, "\n");
24650 }
24651
24652 /* Only change the context if the function changes. This hook is called
24653 several times in the course of compiling a function, and we don't want to
24654 slow things down too much or call target_reinit when it isn't safe. */
24655 if (fndecl == rs6000_previous_fndecl)
24656 return;
24657
24658 tree old_tree;
24659 if (rs6000_previous_fndecl == NULL_TREE)
24660 old_tree = target_option_current_node;
24661 else if (DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl))
24662 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl);
24663 else
24664 old_tree = target_option_default_node;
24665
24666 tree new_tree;
24667 if (fndecl == NULL_TREE)
24668 {
24669 if (old_tree != target_option_current_node)
24670 new_tree = target_option_current_node;
24671 else
24672 new_tree = NULL_TREE;
24673 }
24674 else
24675 {
24676 new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
24677 if (new_tree == NULL_TREE)
24678 new_tree = target_option_default_node;
24679 }
24680
24681 if (TARGET_DEBUG_TARGET)
24682 {
24683 if (new_tree)
24684 {
24685 fprintf (stderr, "\nnew fndecl target specific options:\n");
24686 debug_tree (new_tree);
24687 }
24688
24689 if (old_tree)
24690 {
24691 fprintf (stderr, "\nold fndecl target specific options:\n");
24692 debug_tree (old_tree);
24693 }
24694
24695 if (old_tree != NULL_TREE || new_tree != NULL_TREE)
24696 fprintf (stderr, "--------------------\n");
24697 }
24698
24699 if (new_tree && old_tree != new_tree)
24700 rs6000_activate_target_options (new_tree);
24701
24702 if (fndecl)
24703 rs6000_previous_fndecl = fndecl;
24704 }
24705
24706 \f
24707 /* Save the current options */
24708
24709 static void
24710 rs6000_function_specific_save (struct cl_target_option *ptr,
24711 struct gcc_options *opts,
24712 struct gcc_options */* opts_set */)
24713 {
24714 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
24715 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
24716 }
24717
24718 /* Restore the current options */
24719
24720 static void
24721 rs6000_function_specific_restore (struct gcc_options *opts,
24722 struct gcc_options */* opts_set */,
24723 struct cl_target_option *ptr)
24724
24725 {
24726 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
24727 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
24728 (void) rs6000_option_override_internal (false);
24729 }
24730
24731 /* Print the current options */
24732
24733 static void
24734 rs6000_function_specific_print (FILE *file, int indent,
24735 struct cl_target_option *ptr)
24736 {
24737 rs6000_print_isa_options (file, indent, "Isa options set",
24738 ptr->x_rs6000_isa_flags);
24739
24740 rs6000_print_isa_options (file, indent, "Isa options explicit",
24741 ptr->x_rs6000_isa_flags_explicit);
24742 }
24743
24744 /* Helper function to print the current isa or misc options on a line. */
24745
24746 static void
24747 rs6000_print_options_internal (FILE *file,
24748 int indent,
24749 const char *string,
24750 HOST_WIDE_INT flags,
24751 const char *prefix,
24752 const struct rs6000_opt_mask *opts,
24753 size_t num_elements)
24754 {
24755 size_t i;
24756 size_t start_column = 0;
24757 size_t cur_column;
24758 size_t max_column = 120;
24759 size_t prefix_len = strlen (prefix);
24760 size_t comma_len = 0;
24761 const char *comma = "";
24762
24763 if (indent)
24764 start_column += fprintf (file, "%*s", indent, "");
24765
24766 if (!flags)
24767 {
24768 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
24769 return;
24770 }
24771
24772 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
24773
24774 /* Print the various mask options. */
24775 cur_column = start_column;
24776 for (i = 0; i < num_elements; i++)
24777 {
24778 bool invert = opts[i].invert;
24779 const char *name = opts[i].name;
24780 const char *no_str = "";
24781 HOST_WIDE_INT mask = opts[i].mask;
24782 size_t len = comma_len + prefix_len + strlen (name);
24783
24784 if (!invert)
24785 {
24786 if ((flags & mask) == 0)
24787 {
24788 no_str = "no-";
24789 len += strlen ("no-");
24790 }
24791
24792 flags &= ~mask;
24793 }
24794
24795 else
24796 {
24797 if ((flags & mask) != 0)
24798 {
24799 no_str = "no-";
24800 len += strlen ("no-");
24801 }
24802
24803 flags |= mask;
24804 }
24805
24806 cur_column += len;
24807 if (cur_column > max_column)
24808 {
24809 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
24810 cur_column = start_column + len;
24811 comma = "";
24812 }
24813
24814 fprintf (file, "%s%s%s%s", comma, prefix, no_str, name);
24815 comma = ", ";
24816 comma_len = strlen (", ");
24817 }
24818
24819 fputs ("\n", file);
24820 }
24821
24822 /* Helper function to print the current isa options on a line. */
24823
24824 static void
24825 rs6000_print_isa_options (FILE *file, int indent, const char *string,
24826 HOST_WIDE_INT flags)
24827 {
24828 rs6000_print_options_internal (file, indent, string, flags, "-m",
24829 &rs6000_opt_masks[0],
24830 ARRAY_SIZE (rs6000_opt_masks));
24831 }
24832
24833 static void
24834 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
24835 HOST_WIDE_INT flags)
24836 {
24837 rs6000_print_options_internal (file, indent, string, flags, "",
24838 &rs6000_builtin_mask_names[0],
24839 ARRAY_SIZE (rs6000_builtin_mask_names));
24840 }
24841
24842 /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06,
24843 2.07, and 3.0 options that relate to the vector unit (-mdirect-move,
24844 -mupper-regs-df, etc.).
24845
24846 If the user used -mno-power8-vector, we need to turn off all of the implicit
24847 ISA 2.07 and 3.0 options that relate to the vector unit.
24848
24849 If the user used -mno-power9-vector, we need to turn off all of the implicit
24850 ISA 3.0 options that relate to the vector unit.
24851
24852 This function does not handle explicit options such as the user specifying
24853 -mdirect-move. These are handled in rs6000_option_override_internal, and
24854 the appropriate error is given if needed.
24855
24856 We return a mask of all of the implicit options that should not be enabled
24857 by default. */
24858
24859 static HOST_WIDE_INT
24860 rs6000_disable_incompatible_switches (void)
24861 {
24862 HOST_WIDE_INT ignore_masks = rs6000_isa_flags_explicit;
24863 size_t i, j;
24864
24865 static const struct {
24866 const HOST_WIDE_INT no_flag; /* flag explicitly turned off. */
24867 const HOST_WIDE_INT dep_flags; /* flags that depend on this option. */
24868 const char *const name; /* name of the switch. */
24869 } flags[] = {
24870 { OPTION_MASK_P9_VECTOR, OTHER_P9_VECTOR_MASKS, "power9-vector" },
24871 { OPTION_MASK_P8_VECTOR, OTHER_P8_VECTOR_MASKS, "power8-vector" },
24872 { OPTION_MASK_VSX, OTHER_VSX_VECTOR_MASKS, "vsx" },
24873 { OPTION_MASK_ALTIVEC, OTHER_ALTIVEC_MASKS, "altivec" },
24874 };
24875
24876 for (i = 0; i < ARRAY_SIZE (flags); i++)
24877 {
24878 HOST_WIDE_INT no_flag = flags[i].no_flag;
24879
24880 if ((rs6000_isa_flags & no_flag) == 0
24881 && (rs6000_isa_flags_explicit & no_flag) != 0)
24882 {
24883 HOST_WIDE_INT dep_flags = flags[i].dep_flags;
24884 HOST_WIDE_INT set_flags = (rs6000_isa_flags_explicit
24885 & rs6000_isa_flags
24886 & dep_flags);
24887
24888 if (set_flags)
24889 {
24890 for (j = 0; j < ARRAY_SIZE (rs6000_opt_masks); j++)
24891 if ((set_flags & rs6000_opt_masks[j].mask) != 0)
24892 {
24893 set_flags &= ~rs6000_opt_masks[j].mask;
24894 error ("%<-mno-%s%> turns off %<-m%s%>",
24895 flags[i].name,
24896 rs6000_opt_masks[j].name);
24897 }
24898
24899 gcc_assert (!set_flags);
24900 }
24901
24902 rs6000_isa_flags &= ~dep_flags;
24903 ignore_masks |= no_flag | dep_flags;
24904 }
24905 }
24906
24907 return ignore_masks;
24908 }
24909
24910 \f
24911 /* Helper function for printing the function name when debugging. */
24912
24913 static const char *
24914 get_decl_name (tree fn)
24915 {
24916 tree name;
24917
24918 if (!fn)
24919 return "<null>";
24920
24921 name = DECL_NAME (fn);
24922 if (!name)
24923 return "<no-name>";
24924
24925 return IDENTIFIER_POINTER (name);
24926 }
24927
24928 /* Return the clone id of the target we are compiling code for in a target
24929 clone. The clone id is ordered from 0 (default) to CLONE_MAX-1 and gives
24930 the priority list for the target clones (ordered from lowest to
24931 highest). */
24932
24933 static int
24934 rs6000_clone_priority (tree fndecl)
24935 {
24936 tree fn_opts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
24937 HOST_WIDE_INT isa_masks;
24938 int ret = CLONE_DEFAULT;
24939 tree attrs = lookup_attribute ("target", DECL_ATTRIBUTES (fndecl));
24940 const char *attrs_str = NULL;
24941
24942 attrs = TREE_VALUE (TREE_VALUE (attrs));
24943 attrs_str = TREE_STRING_POINTER (attrs);
24944
24945 /* Return priority zero for default function. Return the ISA needed for the
24946 function if it is not the default. */
24947 if (strcmp (attrs_str, "default") != 0)
24948 {
24949 if (fn_opts == NULL_TREE)
24950 fn_opts = target_option_default_node;
24951
24952 if (!fn_opts || !TREE_TARGET_OPTION (fn_opts))
24953 isa_masks = rs6000_isa_flags;
24954 else
24955 isa_masks = TREE_TARGET_OPTION (fn_opts)->x_rs6000_isa_flags;
24956
24957 for (ret = CLONE_MAX - 1; ret != 0; ret--)
24958 if ((rs6000_clone_map[ret].isa_mask & isa_masks) != 0)
24959 break;
24960 }
24961
24962 if (TARGET_DEBUG_TARGET)
24963 fprintf (stderr, "rs6000_get_function_version_priority (%s) => %d\n",
24964 get_decl_name (fndecl), ret);
24965
24966 return ret;
24967 }
24968
24969 /* This compares the priority of target features in function DECL1 and DECL2.
24970 It returns positive value if DECL1 is higher priority, negative value if
24971 DECL2 is higher priority and 0 if they are the same. Note, priorities are
24972 ordered from lowest (CLONE_DEFAULT) to highest (currently CLONE_ISA_3_0). */
24973
24974 static int
24975 rs6000_compare_version_priority (tree decl1, tree decl2)
24976 {
24977 int priority1 = rs6000_clone_priority (decl1);
24978 int priority2 = rs6000_clone_priority (decl2);
24979 int ret = priority1 - priority2;
24980
24981 if (TARGET_DEBUG_TARGET)
24982 fprintf (stderr, "rs6000_compare_version_priority (%s, %s) => %d\n",
24983 get_decl_name (decl1), get_decl_name (decl2), ret);
24984
24985 return ret;
24986 }
24987
24988 /* Make a dispatcher declaration for the multi-versioned function DECL.
24989 Calls to DECL function will be replaced with calls to the dispatcher
24990 by the front-end. Returns the decl of the dispatcher function. */
24991
24992 static tree
24993 rs6000_get_function_versions_dispatcher (void *decl)
24994 {
24995 tree fn = (tree) decl;
24996 struct cgraph_node *node = NULL;
24997 struct cgraph_node *default_node = NULL;
24998 struct cgraph_function_version_info *node_v = NULL;
24999 struct cgraph_function_version_info *first_v = NULL;
25000
25001 tree dispatch_decl = NULL;
25002
25003 struct cgraph_function_version_info *default_version_info = NULL;
25004 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
25005
25006 if (TARGET_DEBUG_TARGET)
25007 fprintf (stderr, "rs6000_get_function_versions_dispatcher (%s)\n",
25008 get_decl_name (fn));
25009
25010 node = cgraph_node::get (fn);
25011 gcc_assert (node != NULL);
25012
25013 node_v = node->function_version ();
25014 gcc_assert (node_v != NULL);
25015
25016 if (node_v->dispatcher_resolver != NULL)
25017 return node_v->dispatcher_resolver;
25018
25019 /* Find the default version and make it the first node. */
25020 first_v = node_v;
25021 /* Go to the beginning of the chain. */
25022 while (first_v->prev != NULL)
25023 first_v = first_v->prev;
25024
25025 default_version_info = first_v;
25026 while (default_version_info != NULL)
25027 {
25028 const tree decl2 = default_version_info->this_node->decl;
25029 if (is_function_default_version (decl2))
25030 break;
25031 default_version_info = default_version_info->next;
25032 }
25033
25034 /* If there is no default node, just return NULL. */
25035 if (default_version_info == NULL)
25036 return NULL;
25037
25038 /* Make default info the first node. */
25039 if (first_v != default_version_info)
25040 {
25041 default_version_info->prev->next = default_version_info->next;
25042 if (default_version_info->next)
25043 default_version_info->next->prev = default_version_info->prev;
25044 first_v->prev = default_version_info;
25045 default_version_info->next = first_v;
25046 default_version_info->prev = NULL;
25047 }
25048
25049 default_node = default_version_info->this_node;
25050
25051 #ifndef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
25052 error_at (DECL_SOURCE_LOCATION (default_node->decl),
25053 "%<target_clones%> attribute needs GLIBC (2.23 and newer) that "
25054 "exports hardware capability bits");
25055 #else
25056
25057 if (targetm.has_ifunc_p ())
25058 {
25059 struct cgraph_function_version_info *it_v = NULL;
25060 struct cgraph_node *dispatcher_node = NULL;
25061 struct cgraph_function_version_info *dispatcher_version_info = NULL;
25062
25063 /* Right now, the dispatching is done via ifunc. */
25064 dispatch_decl = make_dispatcher_decl (default_node->decl);
25065
25066 dispatcher_node = cgraph_node::get_create (dispatch_decl);
25067 gcc_assert (dispatcher_node != NULL);
25068 dispatcher_node->dispatcher_function = 1;
25069 dispatcher_version_info
25070 = dispatcher_node->insert_new_function_version ();
25071 dispatcher_version_info->next = default_version_info;
25072 dispatcher_node->definition = 1;
25073
25074 /* Set the dispatcher for all the versions. */
25075 it_v = default_version_info;
25076 while (it_v != NULL)
25077 {
25078 it_v->dispatcher_resolver = dispatch_decl;
25079 it_v = it_v->next;
25080 }
25081 }
25082 else
25083 {
25084 error_at (DECL_SOURCE_LOCATION (default_node->decl),
25085 "multiversioning needs %<ifunc%> which is not supported "
25086 "on this target");
25087 }
25088 #endif
25089
25090 return dispatch_decl;
25091 }
25092
25093 /* Make the resolver function decl to dispatch the versions of a multi-
25094 versioned function, DEFAULT_DECL. Create an empty basic block in the
25095 resolver and store the pointer in EMPTY_BB. Return the decl of the resolver
25096 function. */
25097
25098 static tree
25099 make_resolver_func (const tree default_decl,
25100 const tree dispatch_decl,
25101 basic_block *empty_bb)
25102 {
25103 /* Make the resolver function static. The resolver function returns
25104 void *. */
25105 tree decl_name = clone_function_name (default_decl, "resolver");
25106 const char *resolver_name = IDENTIFIER_POINTER (decl_name);
25107 tree type = build_function_type_list (ptr_type_node, NULL_TREE);
25108 tree decl = build_fn_decl (resolver_name, type);
25109 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
25110
25111 DECL_NAME (decl) = decl_name;
25112 TREE_USED (decl) = 1;
25113 DECL_ARTIFICIAL (decl) = 1;
25114 DECL_IGNORED_P (decl) = 0;
25115 TREE_PUBLIC (decl) = 0;
25116 DECL_UNINLINABLE (decl) = 1;
25117
25118 /* Resolver is not external, body is generated. */
25119 DECL_EXTERNAL (decl) = 0;
25120 DECL_EXTERNAL (dispatch_decl) = 0;
25121
25122 DECL_CONTEXT (decl) = NULL_TREE;
25123 DECL_INITIAL (decl) = make_node (BLOCK);
25124 DECL_STATIC_CONSTRUCTOR (decl) = 0;
25125
25126 if (DECL_COMDAT_GROUP (default_decl)
25127 || TREE_PUBLIC (default_decl))
25128 {
25129 /* In this case, each translation unit with a call to this
25130 versioned function will put out a resolver. Ensure it
25131 is comdat to keep just one copy. */
25132 DECL_COMDAT (decl) = 1;
25133 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
25134 }
25135 else
25136 TREE_PUBLIC (dispatch_decl) = 0;
25137
25138 /* Build result decl and add to function_decl. */
25139 tree t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
25140 DECL_CONTEXT (t) = decl;
25141 DECL_ARTIFICIAL (t) = 1;
25142 DECL_IGNORED_P (t) = 1;
25143 DECL_RESULT (decl) = t;
25144
25145 gimplify_function_tree (decl);
25146 push_cfun (DECL_STRUCT_FUNCTION (decl));
25147 *empty_bb = init_lowered_empty_function (decl, false,
25148 profile_count::uninitialized ());
25149
25150 cgraph_node::add_new_function (decl, true);
25151 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
25152
25153 pop_cfun ();
25154
25155 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
25156 DECL_ATTRIBUTES (dispatch_decl)
25157 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
25158
25159 cgraph_node::create_same_body_alias (dispatch_decl, decl);
25160
25161 return decl;
25162 }
25163
25164 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL to
25165 return a pointer to VERSION_DECL if we are running on a machine that
25166 supports the index CLONE_ISA hardware architecture bits. This function will
25167 be called during version dispatch to decide which function version to
25168 execute. It returns the basic block at the end, to which more conditions
25169 can be added. */
25170
25171 static basic_block
25172 add_condition_to_bb (tree function_decl, tree version_decl,
25173 int clone_isa, basic_block new_bb)
25174 {
25175 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
25176
25177 gcc_assert (new_bb != NULL);
25178 gimple_seq gseq = bb_seq (new_bb);
25179
25180
25181 tree convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
25182 build_fold_addr_expr (version_decl));
25183 tree result_var = create_tmp_var (ptr_type_node);
25184 gimple *convert_stmt = gimple_build_assign (result_var, convert_expr);
25185 gimple *return_stmt = gimple_build_return (result_var);
25186
25187 if (clone_isa == CLONE_DEFAULT)
25188 {
25189 gimple_seq_add_stmt (&gseq, convert_stmt);
25190 gimple_seq_add_stmt (&gseq, return_stmt);
25191 set_bb_seq (new_bb, gseq);
25192 gimple_set_bb (convert_stmt, new_bb);
25193 gimple_set_bb (return_stmt, new_bb);
25194 pop_cfun ();
25195 return new_bb;
25196 }
25197
25198 tree bool_zero = build_int_cst (bool_int_type_node, 0);
25199 tree cond_var = create_tmp_var (bool_int_type_node);
25200 tree predicate_decl = rs6000_builtin_decls[(int) RS6000_BIF_CPU_SUPPORTS];
25201 const char *arg_str = rs6000_clone_map[clone_isa].name;
25202 tree predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
25203 gimple *call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
25204 gimple_call_set_lhs (call_cond_stmt, cond_var);
25205
25206 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
25207 gimple_set_bb (call_cond_stmt, new_bb);
25208 gimple_seq_add_stmt (&gseq, call_cond_stmt);
25209
25210 gimple *if_else_stmt = gimple_build_cond (NE_EXPR, cond_var, bool_zero,
25211 NULL_TREE, NULL_TREE);
25212 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
25213 gimple_set_bb (if_else_stmt, new_bb);
25214 gimple_seq_add_stmt (&gseq, if_else_stmt);
25215
25216 gimple_seq_add_stmt (&gseq, convert_stmt);
25217 gimple_seq_add_stmt (&gseq, return_stmt);
25218 set_bb_seq (new_bb, gseq);
25219
25220 basic_block bb1 = new_bb;
25221 edge e12 = split_block (bb1, if_else_stmt);
25222 basic_block bb2 = e12->dest;
25223 e12->flags &= ~EDGE_FALLTHRU;
25224 e12->flags |= EDGE_TRUE_VALUE;
25225
25226 edge e23 = split_block (bb2, return_stmt);
25227 gimple_set_bb (convert_stmt, bb2);
25228 gimple_set_bb (return_stmt, bb2);
25229
25230 basic_block bb3 = e23->dest;
25231 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
25232
25233 remove_edge (e23);
25234 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
25235
25236 pop_cfun ();
25237 return bb3;
25238 }
25239
25240 /* This function generates the dispatch function for multi-versioned functions.
25241 DISPATCH_DECL is the function which will contain the dispatch logic.
25242 FNDECLS are the function choices for dispatch, and is a tree chain.
25243 EMPTY_BB is the basic block pointer in DISPATCH_DECL in which the dispatch
25244 code is generated. */
25245
25246 static int
25247 dispatch_function_versions (tree dispatch_decl,
25248 void *fndecls_p,
25249 basic_block *empty_bb)
25250 {
25251 int ix;
25252 tree ele;
25253 vec<tree> *fndecls;
25254 tree clones[CLONE_MAX];
25255
25256 if (TARGET_DEBUG_TARGET)
25257 fputs ("dispatch_function_versions, top\n", stderr);
25258
25259 gcc_assert (dispatch_decl != NULL
25260 && fndecls_p != NULL
25261 && empty_bb != NULL);
25262
25263 /* fndecls_p is actually a vector. */
25264 fndecls = static_cast<vec<tree> *> (fndecls_p);
25265
25266 /* At least one more version other than the default. */
25267 gcc_assert (fndecls->length () >= 2);
25268
25269 /* The first version in the vector is the default decl. */
25270 memset ((void *) clones, '\0', sizeof (clones));
25271 clones[CLONE_DEFAULT] = (*fndecls)[0];
25272
25273 /* On the PowerPC, we do not need to call __builtin_cpu_init, which is a NOP
25274 on the PowerPC (on the x86_64, it is not a NOP). The builtin function
25275 __builtin_cpu_support ensures that the TOC fields are setup by requiring a
25276 recent glibc. If we ever need to call __builtin_cpu_init, we would need
25277 to insert the code here to do the call. */
25278
25279 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
25280 {
25281 int priority = rs6000_clone_priority (ele);
25282 if (!clones[priority])
25283 clones[priority] = ele;
25284 }
25285
25286 for (ix = CLONE_MAX - 1; ix >= 0; ix--)
25287 if (clones[ix])
25288 {
25289 if (TARGET_DEBUG_TARGET)
25290 fprintf (stderr, "dispatch_function_versions, clone %d, %s\n",
25291 ix, get_decl_name (clones[ix]));
25292
25293 *empty_bb = add_condition_to_bb (dispatch_decl, clones[ix], ix,
25294 *empty_bb);
25295 }
25296
25297 return 0;
25298 }
25299
25300 /* Generate the dispatching code body to dispatch multi-versioned function
25301 DECL. The target hook is called to process the "target" attributes and
25302 provide the code to dispatch the right function at run-time. NODE points
25303 to the dispatcher decl whose body will be created. */
25304
25305 static tree
25306 rs6000_generate_version_dispatcher_body (void *node_p)
25307 {
25308 tree resolver;
25309 basic_block empty_bb;
25310 struct cgraph_node *node = (cgraph_node *) node_p;
25311 struct cgraph_function_version_info *ninfo = node->function_version ();
25312
25313 if (ninfo->dispatcher_resolver)
25314 return ninfo->dispatcher_resolver;
25315
25316 /* node is going to be an alias, so remove the finalized bit. */
25317 node->definition = false;
25318
25319 /* The first version in the chain corresponds to the default version. */
25320 ninfo->dispatcher_resolver = resolver
25321 = make_resolver_func (ninfo->next->this_node->decl, node->decl, &empty_bb);
25322
25323 if (TARGET_DEBUG_TARGET)
25324 fprintf (stderr, "rs6000_get_function_versions_dispatcher, %s\n",
25325 get_decl_name (resolver));
25326
25327 push_cfun (DECL_STRUCT_FUNCTION (resolver));
25328 auto_vec<tree, 2> fn_ver_vec;
25329
25330 for (struct cgraph_function_version_info *vinfo = ninfo->next;
25331 vinfo;
25332 vinfo = vinfo->next)
25333 {
25334 struct cgraph_node *version = vinfo->this_node;
25335 /* Check for virtual functions here again, as by this time it should
25336 have been determined if this function needs a vtable index or
25337 not. This happens for methods in derived classes that override
25338 virtual methods in base classes but are not explicitly marked as
25339 virtual. */
25340 if (DECL_VINDEX (version->decl))
25341 sorry ("Virtual function multiversioning not supported");
25342
25343 fn_ver_vec.safe_push (version->decl);
25344 }
25345
25346 dispatch_function_versions (resolver, &fn_ver_vec, &empty_bb);
25347 cgraph_edge::rebuild_edges ();
25348 pop_cfun ();
25349 return resolver;
25350 }
25351
25352 /* Hook to decide if we need to scan function gimple statements to
25353 collect target specific information for inlining, and update the
25354 corresponding RS6000_FN_TARGET_INFO_* bit in INFO if we are able
25355 to predict which ISA feature is used at this time. Return true
25356 if we need to scan, otherwise return false. */
25357
25358 static bool
25359 rs6000_need_ipa_fn_target_info (const_tree decl,
25360 unsigned int &info ATTRIBUTE_UNUSED)
25361 {
25362 tree target = DECL_FUNCTION_SPECIFIC_TARGET (decl);
25363 if (!target)
25364 target = target_option_default_node;
25365 struct cl_target_option *opts = TREE_TARGET_OPTION (target);
25366
25367 /* See PR102059, we only handle HTM for now, so will only do
25368 the consequent scannings when HTM feature enabled. */
25369 if (opts->x_rs6000_isa_flags & OPTION_MASK_HTM)
25370 return true;
25371
25372 return false;
25373 }
25374
25375 /* Hook to update target specific information INFO for inlining by
25376 checking the given STMT. Return false if we don't need to scan
25377 any more, otherwise return true. */
25378
25379 static bool
25380 rs6000_update_ipa_fn_target_info (unsigned int &info, const gimple *stmt)
25381 {
25382 /* Assume inline asm can use any instruction features. */
25383 if (gimple_code (stmt) == GIMPLE_ASM)
25384 {
25385 /* Should set any bits we concerned, for now OPTION_MASK_HTM is
25386 the only bit we care about. */
25387 info |= RS6000_FN_TARGET_INFO_HTM;
25388 return false;
25389 }
25390 else if (gimple_code (stmt) == GIMPLE_CALL)
25391 {
25392 tree fndecl = gimple_call_fndecl (stmt);
25393 if (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD))
25394 {
25395 enum rs6000_gen_builtins fcode
25396 = (enum rs6000_gen_builtins) DECL_MD_FUNCTION_CODE (fndecl);
25397 /* HTM bifs definitely exploit HTM insns. */
25398 if (bif_is_htm (rs6000_builtin_info[fcode]))
25399 {
25400 info |= RS6000_FN_TARGET_INFO_HTM;
25401 return false;
25402 }
25403 }
25404 }
25405
25406 return true;
25407 }
25408
25409 /* Hook to determine if one function can safely inline another. */
25410
25411 static bool
25412 rs6000_can_inline_p (tree caller, tree callee)
25413 {
25414 bool ret = false;
25415 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
25416 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
25417
25418 /* If the callee has no option attributes, then it is ok to inline. */
25419 if (!callee_tree)
25420 ret = true;
25421
25422 else
25423 {
25424 HOST_WIDE_INT caller_isa;
25425 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
25426 HOST_WIDE_INT callee_isa = callee_opts->x_rs6000_isa_flags;
25427 HOST_WIDE_INT explicit_isa = callee_opts->x_rs6000_isa_flags_explicit;
25428
25429 /* If the caller has option attributes, then use them.
25430 Otherwise, use the command line options. */
25431 if (caller_tree)
25432 caller_isa = TREE_TARGET_OPTION (caller_tree)->x_rs6000_isa_flags;
25433 else
25434 caller_isa = rs6000_isa_flags;
25435
25436 cgraph_node *callee_node = cgraph_node::get (callee);
25437 if (ipa_fn_summaries && ipa_fn_summaries->get (callee_node) != NULL)
25438 {
25439 unsigned int info = ipa_fn_summaries->get (callee_node)->target_info;
25440 if ((info & RS6000_FN_TARGET_INFO_HTM) == 0)
25441 {
25442 callee_isa &= ~OPTION_MASK_HTM;
25443 explicit_isa &= ~OPTION_MASK_HTM;
25444 }
25445 }
25446
25447 /* The callee's options must be a subset of the caller's options, i.e.
25448 a vsx function may inline an altivec function, but a no-vsx function
25449 must not inline a vsx function. However, for those options that the
25450 callee has explicitly enabled or disabled, then we must enforce that
25451 the callee's and caller's options match exactly; see PR70010. */
25452 if (((caller_isa & callee_isa) == callee_isa)
25453 && (caller_isa & explicit_isa) == (callee_isa & explicit_isa))
25454 ret = true;
25455 }
25456
25457 if (TARGET_DEBUG_TARGET)
25458 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
25459 get_decl_name (caller), get_decl_name (callee),
25460 (ret ? "can" : "cannot"));
25461
25462 return ret;
25463 }
25464 \f
25465 /* Allocate a stack temp and fixup the address so it meets the particular
25466 memory requirements (either offetable or REG+REG addressing). */
25467
25468 rtx
25469 rs6000_allocate_stack_temp (machine_mode mode,
25470 bool offsettable_p,
25471 bool reg_reg_p)
25472 {
25473 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
25474 rtx addr = XEXP (stack, 0);
25475 int strict_p = reload_completed;
25476
25477 if (!legitimate_indirect_address_p (addr, strict_p))
25478 {
25479 if (offsettable_p
25480 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
25481 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
25482
25483 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
25484 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
25485 }
25486
25487 return stack;
25488 }
25489
25490 /* Given a memory reference, if it is not a reg or reg+reg addressing,
25491 convert to such a form to deal with memory reference instructions
25492 like STFIWX and LDBRX that only take reg+reg addressing. */
25493
25494 rtx
25495 rs6000_force_indexed_or_indirect_mem (rtx x)
25496 {
25497 machine_mode mode = GET_MODE (x);
25498
25499 gcc_assert (MEM_P (x));
25500 if (can_create_pseudo_p () && !indexed_or_indirect_operand (x, mode))
25501 {
25502 rtx addr = XEXP (x, 0);
25503 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
25504 {
25505 rtx reg = XEXP (addr, 0);
25506 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
25507 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
25508 gcc_assert (REG_P (reg));
25509 emit_insn (gen_add3_insn (reg, reg, size_rtx));
25510 addr = reg;
25511 }
25512 else if (GET_CODE (addr) == PRE_MODIFY)
25513 {
25514 rtx reg = XEXP (addr, 0);
25515 rtx expr = XEXP (addr, 1);
25516 gcc_assert (REG_P (reg));
25517 gcc_assert (GET_CODE (expr) == PLUS);
25518 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
25519 addr = reg;
25520 }
25521
25522 if (GET_CODE (addr) == PLUS)
25523 {
25524 rtx op0 = XEXP (addr, 0);
25525 rtx op1 = XEXP (addr, 1);
25526 op0 = force_reg (Pmode, op0);
25527 op1 = force_reg (Pmode, op1);
25528 x = replace_equiv_address (x, gen_rtx_PLUS (Pmode, op0, op1));
25529 }
25530 else
25531 x = replace_equiv_address (x, force_reg (Pmode, addr));
25532 }
25533
25534 return x;
25535 }
25536
25537 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
25538
25539 On the RS/6000, all integer constants are acceptable, most won't be valid
25540 for particular insns, though. Only easy FP constants are acceptable. */
25541
25542 static bool
25543 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
25544 {
25545 if (TARGET_ELF && tls_referenced_p (x))
25546 return false;
25547
25548 if (CONST_DOUBLE_P (x))
25549 return easy_fp_constant (x, mode);
25550
25551 if (GET_CODE (x) == CONST_VECTOR)
25552 return easy_vector_constant (x, mode);
25553
25554 return true;
25555 }
25556
25557 #if TARGET_AIX_OS
25558 /* Implement TARGET_PRECOMPUTE_TLS_P.
25559
25560 On the AIX, TLS symbols are in the TOC, which is maintained in the
25561 constant pool. AIX TOC TLS symbols need to be pre-computed, but
25562 must be considered legitimate constants. */
25563
25564 static bool
25565 rs6000_aix_precompute_tls_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
25566 {
25567 return tls_referenced_p (x);
25568 }
25569 #endif
25570
25571 \f
25572 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
25573
25574 static bool
25575 chain_already_loaded (rtx_insn *last)
25576 {
25577 for (; last != NULL; last = PREV_INSN (last))
25578 {
25579 if (NONJUMP_INSN_P (last))
25580 {
25581 rtx patt = PATTERN (last);
25582
25583 if (GET_CODE (patt) == SET)
25584 {
25585 rtx lhs = XEXP (patt, 0);
25586
25587 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
25588 return true;
25589 }
25590 }
25591 }
25592 return false;
25593 }
25594
25595 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
25596
25597 void
25598 rs6000_call_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
25599 {
25600 rtx func = func_desc;
25601 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
25602 rtx toc_load = NULL_RTX;
25603 rtx toc_restore = NULL_RTX;
25604 rtx func_addr;
25605 rtx abi_reg = NULL_RTX;
25606 rtx call[5];
25607 int n_call;
25608 rtx insn;
25609 bool is_pltseq_longcall;
25610
25611 if (global_tlsarg)
25612 tlsarg = global_tlsarg;
25613
25614 /* Handle longcall attributes. */
25615 is_pltseq_longcall = false;
25616 if ((INTVAL (cookie) & CALL_LONG) != 0
25617 && GET_CODE (func_desc) == SYMBOL_REF)
25618 {
25619 func = rs6000_longcall_ref (func_desc, tlsarg);
25620 if (TARGET_PLTSEQ)
25621 is_pltseq_longcall = true;
25622 }
25623
25624 /* Handle indirect calls. */
25625 if (!SYMBOL_REF_P (func)
25626 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func)))
25627 {
25628 if (!rs6000_pcrel_p ())
25629 {
25630 /* Save the TOC into its reserved slot before the call,
25631 and prepare to restore it after the call. */
25632 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
25633 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
25634 gen_rtvec (1, stack_toc_offset),
25635 UNSPEC_TOCSLOT);
25636 toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
25637
25638 /* Can we optimize saving the TOC in the prologue or
25639 do we need to do it at every call? */
25640 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
25641 cfun->machine->save_toc_in_prologue = true;
25642 else
25643 {
25644 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
25645 rtx stack_toc_mem = gen_frame_mem (Pmode,
25646 gen_rtx_PLUS (Pmode, stack_ptr,
25647 stack_toc_offset));
25648 MEM_VOLATILE_P (stack_toc_mem) = 1;
25649 if (is_pltseq_longcall)
25650 {
25651 rtvec v = gen_rtvec (3, toc_reg, func_desc, tlsarg);
25652 rtx mark_toc_reg = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25653 emit_insn (gen_rtx_SET (stack_toc_mem, mark_toc_reg));
25654 }
25655 else
25656 emit_move_insn (stack_toc_mem, toc_reg);
25657 }
25658 }
25659
25660 if (DEFAULT_ABI == ABI_ELFv2)
25661 {
25662 /* A function pointer in the ELFv2 ABI is just a plain address, but
25663 the ABI requires it to be loaded into r12 before the call. */
25664 func_addr = gen_rtx_REG (Pmode, 12);
25665 emit_move_insn (func_addr, func);
25666 abi_reg = func_addr;
25667 /* Indirect calls via CTR are strongly preferred over indirect
25668 calls via LR, so move the address there. Needed to mark
25669 this insn for linker plt sequence editing too. */
25670 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
25671 if (is_pltseq_longcall)
25672 {
25673 rtvec v = gen_rtvec (3, abi_reg, func_desc, tlsarg);
25674 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25675 emit_insn (gen_rtx_SET (func_addr, mark_func));
25676 v = gen_rtvec (2, func_addr, func_desc);
25677 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25678 }
25679 else
25680 emit_move_insn (func_addr, abi_reg);
25681 }
25682 else
25683 {
25684 /* A function pointer under AIX is a pointer to a data area whose
25685 first word contains the actual address of the function, whose
25686 second word contains a pointer to its TOC, and whose third word
25687 contains a value to place in the static chain register (r11).
25688 Note that if we load the static chain, our "trampoline" need
25689 not have any executable code. */
25690
25691 /* Load up address of the actual function. */
25692 func = force_reg (Pmode, func);
25693 func_addr = gen_reg_rtx (Pmode);
25694 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func));
25695
25696 /* Indirect calls via CTR are strongly preferred over indirect
25697 calls via LR, so move the address there. */
25698 rtx ctr_reg = gen_rtx_REG (Pmode, CTR_REGNO);
25699 emit_move_insn (ctr_reg, func_addr);
25700 func_addr = ctr_reg;
25701
25702 /* Prepare to load the TOC of the called function. Note that the
25703 TOC load must happen immediately before the actual call so
25704 that unwinding the TOC registers works correctly. See the
25705 comment in frob_update_context. */
25706 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
25707 rtx func_toc_mem = gen_rtx_MEM (Pmode,
25708 gen_rtx_PLUS (Pmode, func,
25709 func_toc_offset));
25710 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
25711
25712 /* If we have a static chain, load it up. But, if the call was
25713 originally direct, the 3rd word has not been written since no
25714 trampoline has been built, so we ought not to load it, lest we
25715 override a static chain value. */
25716 if (!(GET_CODE (func_desc) == SYMBOL_REF
25717 && SYMBOL_REF_FUNCTION_P (func_desc))
25718 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
25719 && !chain_already_loaded (get_current_sequence ()->next->last))
25720 {
25721 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
25722 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
25723 rtx func_sc_mem = gen_rtx_MEM (Pmode,
25724 gen_rtx_PLUS (Pmode, func,
25725 func_sc_offset));
25726 emit_move_insn (sc_reg, func_sc_mem);
25727 abi_reg = sc_reg;
25728 }
25729 }
25730 }
25731 else
25732 {
25733 /* No TOC register needed for calls from PC-relative callers. */
25734 if (!rs6000_pcrel_p ())
25735 /* Direct calls use the TOC: for local calls, the callee will
25736 assume the TOC register is set; for non-local calls, the
25737 PLT stub needs the TOC register. */
25738 abi_reg = toc_reg;
25739 func_addr = func;
25740 }
25741
25742 /* Create the call. */
25743 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
25744 if (value != NULL_RTX)
25745 call[0] = gen_rtx_SET (value, call[0]);
25746 call[1] = gen_rtx_USE (VOIDmode, cookie);
25747 n_call = 2;
25748
25749 if (toc_load)
25750 call[n_call++] = toc_load;
25751 if (toc_restore)
25752 call[n_call++] = toc_restore;
25753
25754 call[n_call++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
25755
25756 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
25757 insn = emit_call_insn (insn);
25758
25759 /* Mention all registers defined by the ABI to hold information
25760 as uses in CALL_INSN_FUNCTION_USAGE. */
25761 if (abi_reg)
25762 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
25763 }
25764
25765 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
25766
25767 void
25768 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
25769 {
25770 rtx call[2];
25771 rtx insn;
25772 rtx r12 = NULL_RTX;
25773 rtx func_addr = func_desc;
25774
25775 gcc_assert (INTVAL (cookie) == 0);
25776
25777 if (global_tlsarg)
25778 tlsarg = global_tlsarg;
25779
25780 /* For ELFv2, r12 and CTR need to hold the function address
25781 for an indirect call. */
25782 if (GET_CODE (func_desc) != SYMBOL_REF && DEFAULT_ABI == ABI_ELFv2)
25783 {
25784 r12 = gen_rtx_REG (Pmode, 12);
25785 emit_move_insn (r12, func_desc);
25786 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
25787 emit_move_insn (func_addr, r12);
25788 }
25789
25790 /* Create the call. */
25791 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
25792 if (value != NULL_RTX)
25793 call[0] = gen_rtx_SET (value, call[0]);
25794
25795 call[1] = simple_return_rtx;
25796
25797 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
25798 insn = emit_call_insn (insn);
25799
25800 /* Note use of the TOC register. */
25801 if (!rs6000_pcrel_p ())
25802 use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
25803 gen_rtx_REG (Pmode, TOC_REGNUM));
25804
25805 /* Note use of r12. */
25806 if (r12)
25807 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r12);
25808 }
25809
25810 /* Expand code to perform a call under the SYSV4 ABI. */
25811
25812 void
25813 rs6000_call_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
25814 {
25815 rtx func = func_desc;
25816 rtx func_addr;
25817 rtx call[4];
25818 rtx insn;
25819 rtx abi_reg = NULL_RTX;
25820 int n;
25821
25822 if (global_tlsarg)
25823 tlsarg = global_tlsarg;
25824
25825 /* Handle longcall attributes. */
25826 if ((INTVAL (cookie) & CALL_LONG) != 0
25827 && GET_CODE (func_desc) == SYMBOL_REF)
25828 {
25829 func = rs6000_longcall_ref (func_desc, tlsarg);
25830 /* If the longcall was implemented as an inline PLT call using
25831 PLT unspecs then func will be REG:r11. If not, func will be
25832 a pseudo reg. The inline PLT call sequence supports lazy
25833 linking (and longcalls to functions in dlopen'd libraries).
25834 The other style of longcalls don't. The lazy linking entry
25835 to the dynamic symbol resolver requires r11 be the function
25836 address (as it is for linker generated PLT stubs). Ensure
25837 r11 stays valid to the bctrl by marking r11 used by the call. */
25838 if (TARGET_PLTSEQ)
25839 abi_reg = func;
25840 }
25841
25842 /* Handle indirect calls. */
25843 if (GET_CODE (func) != SYMBOL_REF)
25844 {
25845 func = force_reg (Pmode, func);
25846
25847 /* Indirect calls via CTR are strongly preferred over indirect
25848 calls via LR, so move the address there. That can't be left
25849 to reload because we want to mark every instruction in an
25850 inline PLT call sequence with a reloc, enabling the linker to
25851 edit the sequence back to a direct call when that makes sense. */
25852 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
25853 if (abi_reg)
25854 {
25855 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
25856 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25857 emit_insn (gen_rtx_SET (func_addr, mark_func));
25858 v = gen_rtvec (2, func_addr, func_desc);
25859 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25860 }
25861 else
25862 emit_move_insn (func_addr, func);
25863 }
25864 else
25865 func_addr = func;
25866
25867 /* Create the call. */
25868 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
25869 if (value != NULL_RTX)
25870 call[0] = gen_rtx_SET (value, call[0]);
25871
25872 call[1] = gen_rtx_USE (VOIDmode, cookie);
25873 n = 2;
25874 if (TARGET_SECURE_PLT
25875 && flag_pic
25876 && GET_CODE (func_addr) == SYMBOL_REF
25877 && !SYMBOL_REF_LOCAL_P (func_addr))
25878 call[n++] = gen_rtx_USE (VOIDmode, pic_offset_table_rtx);
25879
25880 call[n++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
25881
25882 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n, call));
25883 insn = emit_call_insn (insn);
25884 if (abi_reg)
25885 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
25886 }
25887
25888 /* Expand code to perform a sibling call under the SysV4 ABI. */
25889
25890 void
25891 rs6000_sibcall_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
25892 {
25893 rtx func = func_desc;
25894 rtx func_addr;
25895 rtx call[3];
25896 rtx insn;
25897 rtx abi_reg = NULL_RTX;
25898
25899 if (global_tlsarg)
25900 tlsarg = global_tlsarg;
25901
25902 /* Handle longcall attributes. */
25903 if ((INTVAL (cookie) & CALL_LONG) != 0
25904 && GET_CODE (func_desc) == SYMBOL_REF)
25905 {
25906 func = rs6000_longcall_ref (func_desc, tlsarg);
25907 /* If the longcall was implemented as an inline PLT call using
25908 PLT unspecs then func will be REG:r11. If not, func will be
25909 a pseudo reg. The inline PLT call sequence supports lazy
25910 linking (and longcalls to functions in dlopen'd libraries).
25911 The other style of longcalls don't. The lazy linking entry
25912 to the dynamic symbol resolver requires r11 be the function
25913 address (as it is for linker generated PLT stubs). Ensure
25914 r11 stays valid to the bctr by marking r11 used by the call. */
25915 if (TARGET_PLTSEQ)
25916 abi_reg = func;
25917 }
25918
25919 /* Handle indirect calls. */
25920 if (GET_CODE (func) != SYMBOL_REF)
25921 {
25922 func = force_reg (Pmode, func);
25923
25924 /* Indirect sibcalls must go via CTR. That can't be left to
25925 reload because we want to mark every instruction in an inline
25926 PLT call sequence with a reloc, enabling the linker to edit
25927 the sequence back to a direct call when that makes sense. */
25928 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
25929 if (abi_reg)
25930 {
25931 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
25932 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25933 emit_insn (gen_rtx_SET (func_addr, mark_func));
25934 v = gen_rtvec (2, func_addr, func_desc);
25935 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25936 }
25937 else
25938 emit_move_insn (func_addr, func);
25939 }
25940 else
25941 func_addr = func;
25942
25943 /* Create the call. */
25944 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
25945 if (value != NULL_RTX)
25946 call[0] = gen_rtx_SET (value, call[0]);
25947
25948 call[1] = gen_rtx_USE (VOIDmode, cookie);
25949 call[2] = simple_return_rtx;
25950
25951 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
25952 insn = emit_call_insn (insn);
25953 if (abi_reg)
25954 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
25955 }
25956
25957 #if TARGET_MACHO
25958
25959 /* Expand code to perform a call under the Darwin ABI.
25960 Modulo handling of mlongcall, this is much the same as sysv.
25961 if/when the longcall optimisation is removed, we could drop this
25962 code and use the sysv case (taking care to avoid the tls stuff).
25963
25964 We can use this for sibcalls too, if needed. */
25965
25966 void
25967 rs6000_call_darwin_1 (rtx value, rtx func_desc, rtx tlsarg,
25968 rtx cookie, bool sibcall)
25969 {
25970 rtx func = func_desc;
25971 rtx func_addr;
25972 rtx call[3];
25973 rtx insn;
25974 int cookie_val = INTVAL (cookie);
25975 bool make_island = false;
25976
25977 /* Handle longcall attributes, there are two cases for Darwin:
25978 1) Newer linkers are capable of synthesising any branch islands needed.
25979 2) We need a helper branch island synthesised by the compiler.
25980 The second case has mostly been retired and we don't use it for m64.
25981 In fact, it's is an optimisation, we could just indirect as sysv does..
25982 ... however, backwards compatibility for now.
25983 If we're going to use this, then we need to keep the CALL_LONG bit set,
25984 so that we can pick up the special insn form later. */
25985 if ((cookie_val & CALL_LONG) != 0
25986 && GET_CODE (func_desc) == SYMBOL_REF)
25987 {
25988 /* FIXME: the longcall opt should not hang off this flag, it is most
25989 likely incorrect for kernel-mode code-generation. */
25990 if (darwin_symbol_stubs && TARGET_32BIT)
25991 make_island = true; /* Do nothing yet, retain the CALL_LONG flag. */
25992 else
25993 {
25994 /* The linker is capable of doing this, but the user explicitly
25995 asked for -mlongcall, so we'll do the 'normal' version. */
25996 func = rs6000_longcall_ref (func_desc, NULL_RTX);
25997 cookie_val &= ~CALL_LONG; /* Handled, zap it. */
25998 }
25999 }
26000
26001 /* Handle indirect calls. */
26002 if (GET_CODE (func) != SYMBOL_REF)
26003 {
26004 func = force_reg (Pmode, func);
26005
26006 /* Indirect calls via CTR are strongly preferred over indirect
26007 calls via LR, and are required for indirect sibcalls, so move
26008 the address there. */
26009 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
26010 emit_move_insn (func_addr, func);
26011 }
26012 else
26013 func_addr = func;
26014
26015 /* Create the call. */
26016 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
26017 if (value != NULL_RTX)
26018 call[0] = gen_rtx_SET (value, call[0]);
26019
26020 call[1] = gen_rtx_USE (VOIDmode, GEN_INT (cookie_val));
26021
26022 if (sibcall)
26023 call[2] = simple_return_rtx;
26024 else
26025 call[2] = gen_hard_reg_clobber (Pmode, LR_REGNO);
26026
26027 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
26028 insn = emit_call_insn (insn);
26029 /* Now we have the debug info in the insn, we can set up the branch island
26030 if we're using one. */
26031 if (make_island)
26032 {
26033 tree funname = get_identifier (XSTR (func_desc, 0));
26034
26035 if (no_previous_def (funname))
26036 {
26037 rtx label_rtx = gen_label_rtx ();
26038 char *label_buf, temp_buf[256];
26039 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
26040 CODE_LABEL_NUMBER (label_rtx));
26041 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
26042 tree labelname = get_identifier (label_buf);
26043 add_compiler_branch_island (labelname, funname,
26044 insn_line ((const rtx_insn*)insn));
26045 }
26046 }
26047 }
26048 #endif
26049
26050 void
26051 rs6000_call_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
26052 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
26053 {
26054 #if TARGET_MACHO
26055 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, false);
26056 #else
26057 gcc_unreachable();
26058 #endif
26059 }
26060
26061
26062 void
26063 rs6000_sibcall_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
26064 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
26065 {
26066 #if TARGET_MACHO
26067 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, true);
26068 #else
26069 gcc_unreachable();
26070 #endif
26071 }
26072
26073 /* Return whether we should generate PC-relative code for FNDECL. */
26074 bool
26075 rs6000_fndecl_pcrel_p (const_tree fndecl)
26076 {
26077 if (DEFAULT_ABI != ABI_ELFv2)
26078 return false;
26079
26080 struct cl_target_option *opts = target_opts_for_fn (fndecl);
26081
26082 return ((opts->x_rs6000_isa_flags & OPTION_MASK_PCREL) != 0
26083 && TARGET_CMODEL == CMODEL_MEDIUM);
26084 }
26085
26086 /* Return whether we should generate PC-relative code for *FN. */
26087 bool
26088 rs6000_function_pcrel_p (struct function *fn)
26089 {
26090 if (DEFAULT_ABI != ABI_ELFv2)
26091 return false;
26092
26093 /* Optimize usual case. */
26094 if (fn == cfun)
26095 return ((rs6000_isa_flags & OPTION_MASK_PCREL) != 0
26096 && TARGET_CMODEL == CMODEL_MEDIUM);
26097
26098 return rs6000_fndecl_pcrel_p (fn->decl);
26099 }
26100
26101 /* Return whether we should generate PC-relative code for the current
26102 function. */
26103 bool
26104 rs6000_pcrel_p ()
26105 {
26106 return (DEFAULT_ABI == ABI_ELFv2
26107 && (rs6000_isa_flags & OPTION_MASK_PCREL) != 0
26108 && TARGET_CMODEL == CMODEL_MEDIUM);
26109 }
26110
26111 \f
26112 /* Given an address (ADDR), a mode (MODE), and what the format of the
26113 non-prefixed address (NON_PREFIXED_FORMAT) is, return the instruction format
26114 for the address. */
26115
26116 enum insn_form
26117 address_to_insn_form (rtx addr,
26118 machine_mode mode,
26119 enum non_prefixed_form non_prefixed_format)
26120 {
26121 /* Single register is easy. */
26122 if (REG_P (addr) || SUBREG_P (addr))
26123 return INSN_FORM_BASE_REG;
26124
26125 /* If the non prefixed instruction format doesn't support offset addressing,
26126 make sure only indexed addressing is allowed.
26127
26128 We special case SDmode so that the register allocator does not try to move
26129 SDmode through GPR registers, but instead uses the 32-bit integer load and
26130 store instructions for the floating point registers. */
26131 if (non_prefixed_format == NON_PREFIXED_X || (mode == SDmode && TARGET_DFP))
26132 {
26133 if (GET_CODE (addr) != PLUS)
26134 return INSN_FORM_BAD;
26135
26136 rtx op0 = XEXP (addr, 0);
26137 rtx op1 = XEXP (addr, 1);
26138 if (!REG_P (op0) && !SUBREG_P (op0))
26139 return INSN_FORM_BAD;
26140
26141 if (!REG_P (op1) && !SUBREG_P (op1))
26142 return INSN_FORM_BAD;
26143
26144 return INSN_FORM_X;
26145 }
26146
26147 /* Deal with update forms. */
26148 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
26149 return INSN_FORM_UPDATE;
26150
26151 /* Handle PC-relative symbols and labels. Check for both local and
26152 external symbols. Assume labels are always local. TLS symbols
26153 are not PC-relative for rs6000. */
26154 if (TARGET_PCREL)
26155 {
26156 if (LABEL_REF_P (addr))
26157 return INSN_FORM_PCREL_LOCAL;
26158
26159 if (SYMBOL_REF_P (addr) && !SYMBOL_REF_TLS_MODEL (addr))
26160 {
26161 if (!SYMBOL_REF_LOCAL_P (addr))
26162 return INSN_FORM_PCREL_EXTERNAL;
26163 else
26164 return INSN_FORM_PCREL_LOCAL;
26165 }
26166 }
26167
26168 if (GET_CODE (addr) == CONST)
26169 addr = XEXP (addr, 0);
26170
26171 /* Recognize LO_SUM addresses used with TOC and 32-bit addressing. */
26172 if (GET_CODE (addr) == LO_SUM)
26173 return INSN_FORM_LO_SUM;
26174
26175 /* Everything below must be an offset address of some form. */
26176 if (GET_CODE (addr) != PLUS)
26177 return INSN_FORM_BAD;
26178
26179 rtx op0 = XEXP (addr, 0);
26180 rtx op1 = XEXP (addr, 1);
26181
26182 /* Check for indexed addresses. */
26183 if (REG_P (op1) || SUBREG_P (op1))
26184 {
26185 if (REG_P (op0) || SUBREG_P (op0))
26186 return INSN_FORM_X;
26187
26188 return INSN_FORM_BAD;
26189 }
26190
26191 if (!CONST_INT_P (op1))
26192 return INSN_FORM_BAD;
26193
26194 HOST_WIDE_INT offset = INTVAL (op1);
26195 if (!SIGNED_INTEGER_34BIT_P (offset))
26196 return INSN_FORM_BAD;
26197
26198 /* Check for local and external PC-relative addresses. Labels are always
26199 local. TLS symbols are not PC-relative for rs6000. */
26200 if (TARGET_PCREL)
26201 {
26202 if (LABEL_REF_P (op0))
26203 return INSN_FORM_PCREL_LOCAL;
26204
26205 if (SYMBOL_REF_P (op0) && !SYMBOL_REF_TLS_MODEL (op0))
26206 {
26207 if (!SYMBOL_REF_LOCAL_P (op0))
26208 return INSN_FORM_PCREL_EXTERNAL;
26209 else
26210 return INSN_FORM_PCREL_LOCAL;
26211 }
26212 }
26213
26214 /* If it isn't PC-relative, the address must use a base register. */
26215 if (!REG_P (op0) && !SUBREG_P (op0))
26216 return INSN_FORM_BAD;
26217
26218 /* Large offsets must be prefixed. */
26219 if (!SIGNED_INTEGER_16BIT_P (offset))
26220 {
26221 if (TARGET_PREFIXED)
26222 return INSN_FORM_PREFIXED_NUMERIC;
26223
26224 return INSN_FORM_BAD;
26225 }
26226
26227 /* We have a 16-bit offset, see what default instruction format to use. */
26228 if (non_prefixed_format == NON_PREFIXED_DEFAULT)
26229 {
26230 unsigned size = GET_MODE_SIZE (mode);
26231
26232 /* On 64-bit systems, assume 64-bit integers need to use DS form
26233 addresses (for LD/STD). VSX vectors need to use DQ form addresses
26234 (for LXV and STXV). TImode is problematical in that its normal usage
26235 is expected to be GPRs where it wants a DS instruction format, but if
26236 it goes into the vector registers, it wants a DQ instruction
26237 format. */
26238 if (TARGET_POWERPC64 && size >= 8 && GET_MODE_CLASS (mode) == MODE_INT)
26239 non_prefixed_format = NON_PREFIXED_DS;
26240
26241 else if (TARGET_VSX && size >= 16
26242 && (VECTOR_MODE_P (mode) || VECTOR_ALIGNMENT_P (mode)))
26243 non_prefixed_format = NON_PREFIXED_DQ;
26244
26245 else
26246 non_prefixed_format = NON_PREFIXED_D;
26247 }
26248
26249 /* Classify the D/DS/DQ-form addresses. */
26250 switch (non_prefixed_format)
26251 {
26252 /* Instruction format D, all 16 bits are valid. */
26253 case NON_PREFIXED_D:
26254 return INSN_FORM_D;
26255
26256 /* Instruction format DS, bottom 2 bits must be 0. */
26257 case NON_PREFIXED_DS:
26258 if ((offset & 3) == 0)
26259 return INSN_FORM_DS;
26260
26261 else if (TARGET_PREFIXED)
26262 return INSN_FORM_PREFIXED_NUMERIC;
26263
26264 else
26265 return INSN_FORM_BAD;
26266
26267 /* Instruction format DQ, bottom 4 bits must be 0. */
26268 case NON_PREFIXED_DQ:
26269 if ((offset & 15) == 0)
26270 return INSN_FORM_DQ;
26271
26272 else if (TARGET_PREFIXED)
26273 return INSN_FORM_PREFIXED_NUMERIC;
26274
26275 else
26276 return INSN_FORM_BAD;
26277
26278 default:
26279 break;
26280 }
26281
26282 return INSN_FORM_BAD;
26283 }
26284
26285 /* Given address rtx ADDR for a load of MODE, is this legitimate for a
26286 non-prefixed D-form or X-form instruction? NON_PREFIXED_FORMAT is
26287 given NON_PREFIXED_D or NON_PREFIXED_DS to indicate whether we want
26288 a D-form or DS-form instruction. X-form and base_reg are always
26289 allowed. */
26290 bool
26291 address_is_non_pfx_d_or_x (rtx addr, machine_mode mode,
26292 enum non_prefixed_form non_prefixed_format)
26293 {
26294 enum insn_form result_form;
26295
26296 result_form = address_to_insn_form (addr, mode, non_prefixed_format);
26297
26298 switch (non_prefixed_format)
26299 {
26300 case NON_PREFIXED_D:
26301 switch (result_form)
26302 {
26303 case INSN_FORM_X:
26304 case INSN_FORM_D:
26305 case INSN_FORM_DS:
26306 case INSN_FORM_BASE_REG:
26307 return true;
26308 default:
26309 return false;
26310 }
26311 break;
26312 case NON_PREFIXED_DS:
26313 switch (result_form)
26314 {
26315 case INSN_FORM_X:
26316 case INSN_FORM_DS:
26317 case INSN_FORM_BASE_REG:
26318 return true;
26319 default:
26320 return false;
26321 }
26322 break;
26323 default:
26324 break;
26325 }
26326 return false;
26327 }
26328
26329 /* Return true if an REG with a given MODE is loaded from or stored into a MEM
26330 location uses a non-prefixed D/DS/DQ-form address. This is used to validate
26331 the load or store with the PCREL_OPT optimization to make sure it is an
26332 instruction that can be optimized.
26333
26334 We need to specify the MODE separately from the REG to allow for loads that
26335 include zero/sign/float extension. */
26336
26337 bool
26338 pcrel_opt_valid_mem_p (rtx reg, machine_mode mode, rtx mem)
26339 {
26340 /* If the instruction is indexed only like LFIWAX/LXSIWAX we cannot do the
26341 PCREL_OPT optimization. */
26342 enum non_prefixed_form non_prefixed = reg_to_non_prefixed (reg, mode);
26343 if (non_prefixed == NON_PREFIXED_X)
26344 return false;
26345
26346 /* Check if this is a non-prefixed D/DS/DQ-form instruction. */
26347 rtx addr = XEXP (mem, 0);
26348 enum insn_form iform = address_to_insn_form (addr, mode, non_prefixed);
26349 return (iform == INSN_FORM_BASE_REG
26350 || iform == INSN_FORM_D
26351 || iform == INSN_FORM_DS
26352 || iform == INSN_FORM_DQ);
26353 }
26354
26355 /* Helper function to see if we're potentially looking at lfs/stfs.
26356 - PARALLEL containing a SET and a CLOBBER
26357 - stfs:
26358 - SET is from UNSPEC_SI_FROM_SF to MEM:SI
26359 - CLOBBER is a V4SF
26360 - lfs:
26361 - SET is from UNSPEC_SF_FROM_SI to REG:SF
26362 - CLOBBER is a DI
26363 */
26364
26365 static bool
26366 is_lfs_stfs_insn (rtx_insn *insn)
26367 {
26368 rtx pattern = PATTERN (insn);
26369 if (GET_CODE (pattern) != PARALLEL)
26370 return false;
26371
26372 /* This should be a parallel with exactly one set and one clobber. */
26373 if (XVECLEN (pattern, 0) != 2)
26374 return false;
26375
26376 rtx set = XVECEXP (pattern, 0, 0);
26377 if (GET_CODE (set) != SET)
26378 return false;
26379
26380 rtx clobber = XVECEXP (pattern, 0, 1);
26381 if (GET_CODE (clobber) != CLOBBER)
26382 return false;
26383
26384 /* All we care is that the destination of the SET is a mem:SI,
26385 the source should be an UNSPEC_SI_FROM_SF, and the clobber
26386 should be a scratch:V4SF. */
26387
26388 rtx dest = SET_DEST (set);
26389 rtx src = SET_SRC (set);
26390 rtx scratch = SET_DEST (clobber);
26391
26392 if (GET_CODE (src) != UNSPEC)
26393 return false;
26394
26395 /* stfs case. */
26396 if (XINT (src, 1) == UNSPEC_SI_FROM_SF
26397 && GET_CODE (dest) == MEM && GET_MODE (dest) == SImode
26398 && GET_CODE (scratch) == SCRATCH && GET_MODE (scratch) == V4SFmode)
26399 return true;
26400
26401 /* lfs case. */
26402 if (XINT (src, 1) == UNSPEC_SF_FROM_SI
26403 && GET_CODE (dest) == REG && GET_MODE (dest) == SFmode
26404 && GET_CODE (scratch) == SCRATCH && GET_MODE (scratch) == DImode)
26405 return true;
26406
26407 return false;
26408 }
26409
26410 /* Helper function to take a REG and a MODE and turn it into the non-prefixed
26411 instruction format (D/DS/DQ) used for offset memory. */
26412
26413 enum non_prefixed_form
26414 reg_to_non_prefixed (rtx reg, machine_mode mode)
26415 {
26416 /* If it isn't a register, use the defaults. */
26417 if (!REG_P (reg) && !SUBREG_P (reg))
26418 return NON_PREFIXED_DEFAULT;
26419
26420 unsigned int r = reg_or_subregno (reg);
26421
26422 /* If we have a pseudo, use the default instruction format. */
26423 if (!HARD_REGISTER_NUM_P (r))
26424 return NON_PREFIXED_DEFAULT;
26425
26426 unsigned size = GET_MODE_SIZE (mode);
26427
26428 /* FPR registers use D-mode for scalars, and DQ-mode for vectors, IEEE
26429 128-bit floating point, and 128-bit integers. Before power9, only indexed
26430 addressing was available for vectors. */
26431 if (FP_REGNO_P (r))
26432 {
26433 if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
26434 return NON_PREFIXED_D;
26435
26436 else if (size < 8)
26437 return NON_PREFIXED_X;
26438
26439 else if (TARGET_VSX && size >= 16
26440 && (VECTOR_MODE_P (mode)
26441 || VECTOR_ALIGNMENT_P (mode)
26442 || mode == TImode || mode == CTImode))
26443 return (TARGET_P9_VECTOR) ? NON_PREFIXED_DQ : NON_PREFIXED_X;
26444
26445 else
26446 return NON_PREFIXED_DEFAULT;
26447 }
26448
26449 /* Altivec registers use DS-mode for scalars, and DQ-mode for vectors, IEEE
26450 128-bit floating point, and 128-bit integers. Before power9, only indexed
26451 addressing was available. */
26452 else if (ALTIVEC_REGNO_P (r))
26453 {
26454 if (!TARGET_P9_VECTOR)
26455 return NON_PREFIXED_X;
26456
26457 if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
26458 return NON_PREFIXED_DS;
26459
26460 else if (size < 8)
26461 return NON_PREFIXED_X;
26462
26463 else if (TARGET_VSX && size >= 16
26464 && (VECTOR_MODE_P (mode)
26465 || VECTOR_ALIGNMENT_P (mode)
26466 || mode == TImode || mode == CTImode))
26467 return NON_PREFIXED_DQ;
26468
26469 else
26470 return NON_PREFIXED_DEFAULT;
26471 }
26472
26473 /* GPR registers use DS-mode for 64-bit items on 64-bit systems, and D-mode
26474 otherwise. Assume that any other register, such as LR, CRs, etc. will go
26475 through the GPR registers for memory operations. */
26476 else if (TARGET_POWERPC64 && size >= 8)
26477 return NON_PREFIXED_DS;
26478
26479 return NON_PREFIXED_D;
26480 }
26481
26482 \f
26483 /* Whether a load instruction is a prefixed instruction. This is called from
26484 the prefixed attribute processing. */
26485
26486 bool
26487 prefixed_load_p (rtx_insn *insn)
26488 {
26489 /* Validate the insn to make sure it is a normal load insn. */
26490 extract_insn_cached (insn);
26491 if (recog_data.n_operands < 2)
26492 return false;
26493
26494 rtx reg = recog_data.operand[0];
26495 rtx mem = recog_data.operand[1];
26496
26497 if (!REG_P (reg) && !SUBREG_P (reg))
26498 return false;
26499
26500 if (!MEM_P (mem))
26501 return false;
26502
26503 /* Prefixed load instructions do not support update or indexed forms. */
26504 if (get_attr_indexed (insn) == INDEXED_YES
26505 || get_attr_update (insn) == UPDATE_YES)
26506 return false;
26507
26508 /* LWA uses the DS format instead of the D format that LWZ uses. */
26509 enum non_prefixed_form non_prefixed;
26510 machine_mode reg_mode = GET_MODE (reg);
26511 machine_mode mem_mode = GET_MODE (mem);
26512
26513 if (mem_mode == SImode && reg_mode == DImode
26514 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
26515 non_prefixed = NON_PREFIXED_DS;
26516
26517 else
26518 non_prefixed = reg_to_non_prefixed (reg, mem_mode);
26519
26520 if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn))
26521 return address_is_prefixed (XEXP (mem, 0), mem_mode, NON_PREFIXED_DEFAULT);
26522 else
26523 return address_is_prefixed (XEXP (mem, 0), mem_mode, non_prefixed);
26524 }
26525
26526 /* Whether a store instruction is a prefixed instruction. This is called from
26527 the prefixed attribute processing. */
26528
26529 bool
26530 prefixed_store_p (rtx_insn *insn)
26531 {
26532 /* Validate the insn to make sure it is a normal store insn. */
26533 extract_insn_cached (insn);
26534 if (recog_data.n_operands < 2)
26535 return false;
26536
26537 rtx mem = recog_data.operand[0];
26538 rtx reg = recog_data.operand[1];
26539
26540 if (!REG_P (reg) && !SUBREG_P (reg))
26541 return false;
26542
26543 if (!MEM_P (mem))
26544 return false;
26545
26546 /* Prefixed store instructions do not support update or indexed forms. */
26547 if (get_attr_indexed (insn) == INDEXED_YES
26548 || get_attr_update (insn) == UPDATE_YES)
26549 return false;
26550
26551 machine_mode mem_mode = GET_MODE (mem);
26552 rtx addr = XEXP (mem, 0);
26553 enum non_prefixed_form non_prefixed = reg_to_non_prefixed (reg, mem_mode);
26554
26555 /* Need to make sure we aren't looking at a stfs which doesn't look
26556 like the other things reg_to_non_prefixed/address_is_prefixed
26557 looks for. */
26558 if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn))
26559 return address_is_prefixed (addr, mem_mode, NON_PREFIXED_DEFAULT);
26560 else
26561 return address_is_prefixed (addr, mem_mode, non_prefixed);
26562 }
26563
26564 /* Whether a load immediate or add instruction is a prefixed instruction. This
26565 is called from the prefixed attribute processing. */
26566
26567 bool
26568 prefixed_paddi_p (rtx_insn *insn)
26569 {
26570 rtx set = single_set (insn);
26571 if (!set)
26572 return false;
26573
26574 rtx dest = SET_DEST (set);
26575 rtx src = SET_SRC (set);
26576
26577 if (!REG_P (dest) && !SUBREG_P (dest))
26578 return false;
26579
26580 /* Is this a load immediate that can't be done with a simple ADDI or
26581 ADDIS? */
26582 if (CONST_INT_P (src))
26583 return (satisfies_constraint_eI (src)
26584 && !satisfies_constraint_I (src)
26585 && !satisfies_constraint_L (src));
26586
26587 /* Is this a PADDI instruction that can't be done with a simple ADDI or
26588 ADDIS? */
26589 if (GET_CODE (src) == PLUS)
26590 {
26591 rtx op1 = XEXP (src, 1);
26592
26593 return (CONST_INT_P (op1)
26594 && satisfies_constraint_eI (op1)
26595 && !satisfies_constraint_I (op1)
26596 && !satisfies_constraint_L (op1));
26597 }
26598
26599 /* If not, is it a load of a PC-relative address? */
26600 if (!TARGET_PCREL || GET_MODE (dest) != Pmode)
26601 return false;
26602
26603 if (!SYMBOL_REF_P (src) && !LABEL_REF_P (src) && GET_CODE (src) != CONST)
26604 return false;
26605
26606 enum insn_form iform = address_to_insn_form (src, Pmode,
26607 NON_PREFIXED_DEFAULT);
26608
26609 return (iform == INSN_FORM_PCREL_EXTERNAL || iform == INSN_FORM_PCREL_LOCAL);
26610 }
26611
26612 /* Whether an instruction is a prefixed XXSPLTI* instruction. This is called
26613 from the prefixed attribute processing. */
26614
26615 bool
26616 prefixed_xxsplti_p (rtx_insn *insn)
26617 {
26618 rtx set = single_set (insn);
26619 if (!set)
26620 return false;
26621
26622 rtx dest = SET_DEST (set);
26623 rtx src = SET_SRC (set);
26624 machine_mode mode = GET_MODE (dest);
26625
26626 if (!REG_P (dest) && !SUBREG_P (dest))
26627 return false;
26628
26629 if (GET_CODE (src) == UNSPEC)
26630 {
26631 int unspec = XINT (src, 1);
26632 return (unspec == UNSPEC_XXSPLTIW
26633 || unspec == UNSPEC_XXSPLTIDP
26634 || unspec == UNSPEC_XXSPLTI32DX);
26635 }
26636
26637 vec_const_128bit_type vsx_const;
26638 if (vec_const_128bit_to_bytes (src, mode, &vsx_const))
26639 {
26640 if (constant_generates_xxspltiw (&vsx_const))
26641 return true;
26642
26643 if (constant_generates_xxspltidp (&vsx_const))
26644 return true;
26645 }
26646
26647 return false;
26648 }
26649
26650 /* Whether the next instruction needs a 'p' prefix issued before the
26651 instruction is printed out. */
26652 static bool prepend_p_to_next_insn;
26653
26654 /* Define FINAL_PRESCAN_INSN if some processing needs to be done before
26655 outputting the assembler code. On the PowerPC, we remember if the current
26656 insn is a prefixed insn where we need to emit a 'p' before the insn.
26657
26658 In addition, if the insn is part of a PC-relative reference to an external
26659 label optimization, this is recorded also. */
26660 void
26661 rs6000_final_prescan_insn (rtx_insn *insn, rtx [], int)
26662 {
26663 prepend_p_to_next_insn = (get_attr_maybe_prefixed (insn)
26664 == MAYBE_PREFIXED_YES
26665 && get_attr_prefixed (insn) == PREFIXED_YES);
26666 return;
26667 }
26668
26669 /* Define ASM_OUTPUT_OPCODE to do anything special before emitting an opcode.
26670 We use it to emit a 'p' for prefixed insns that is set in
26671 FINAL_PRESCAN_INSN. */
26672 void
26673 rs6000_asm_output_opcode (FILE *stream)
26674 {
26675 if (prepend_p_to_next_insn)
26676 {
26677 fprintf (stream, "p");
26678
26679 /* Reset the flag in the case where there are separate insn lines in the
26680 sequence, so the 'p' is only emitted for the first line. This shows up
26681 when we are doing the PCREL_OPT optimization, in that the label created
26682 with %r<n> would have a leading 'p' printed. */
26683 prepend_p_to_next_insn = false;
26684 }
26685
26686 return;
26687 }
26688
26689 /* Emit the relocation to tie the next instruction to a previous instruction
26690 that loads up an external address. This is used to do the PCREL_OPT
26691 optimization. Note, the label is generated after the PLD of the got
26692 pc-relative address to allow for the assembler to insert NOPs before the PLD
26693 instruction. The operand is a constant integer that is the label
26694 number. */
26695
26696 void
26697 output_pcrel_opt_reloc (rtx label_num)
26698 {
26699 rtx operands[1] = { label_num };
26700 output_asm_insn (".reloc .Lpcrel%0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel%0-8)",
26701 operands);
26702 }
26703
26704 /* Adjust the length of an INSN. LENGTH is the currently-computed length and
26705 should be adjusted to reflect any required changes. This macro is used when
26706 there is some systematic length adjustment required that would be difficult
26707 to express in the length attribute.
26708
26709 In the PowerPC, we use this to adjust the length of an instruction if one or
26710 more prefixed instructions are generated, using the attribute
26711 num_prefixed_insns. A prefixed instruction is 8 bytes instead of 4, but the
26712 hardware requires that a prefied instruciton does not cross a 64-byte
26713 boundary. This means the compiler has to assume the length of the first
26714 prefixed instruction is 12 bytes instead of 8 bytes. Since the length is
26715 already set for the non-prefixed instruction, we just need to udpate for the
26716 difference. */
26717
26718 int
26719 rs6000_adjust_insn_length (rtx_insn *insn, int length)
26720 {
26721 if (TARGET_PREFIXED && NONJUMP_INSN_P (insn))
26722 {
26723 rtx pattern = PATTERN (insn);
26724 if (GET_CODE (pattern) != USE && GET_CODE (pattern) != CLOBBER
26725 && get_attr_prefixed (insn) == PREFIXED_YES)
26726 {
26727 int num_prefixed = get_attr_max_prefixed_insns (insn);
26728 length += 4 * (num_prefixed + 1);
26729 }
26730 }
26731
26732 return length;
26733 }
26734
26735 \f
26736 #ifdef HAVE_GAS_HIDDEN
26737 # define USE_HIDDEN_LINKONCE 1
26738 #else
26739 # define USE_HIDDEN_LINKONCE 0
26740 #endif
26741
26742 /* Fills in the label name that should be used for a 476 link stack thunk. */
26743
26744 void
26745 get_ppc476_thunk_name (char name[32])
26746 {
26747 gcc_assert (TARGET_LINK_STACK);
26748
26749 if (USE_HIDDEN_LINKONCE)
26750 sprintf (name, "__ppc476.get_thunk");
26751 else
26752 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
26753 }
26754
26755 /* This function emits the simple thunk routine that is used to preserve
26756 the link stack on the 476 cpu. */
26757
26758 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
26759 static void
26760 rs6000_code_end (void)
26761 {
26762 char name[32];
26763 tree decl;
26764
26765 if (!TARGET_LINK_STACK)
26766 return;
26767
26768 get_ppc476_thunk_name (name);
26769
26770 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
26771 build_function_type_list (void_type_node, NULL_TREE));
26772 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
26773 NULL_TREE, void_type_node);
26774 TREE_PUBLIC (decl) = 1;
26775 TREE_STATIC (decl) = 1;
26776
26777 #if RS6000_WEAK
26778 if (USE_HIDDEN_LINKONCE && !TARGET_XCOFF)
26779 {
26780 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
26781 targetm.asm_out.unique_section (decl, 0);
26782 switch_to_section (get_named_section (decl, NULL, 0));
26783 DECL_WEAK (decl) = 1;
26784 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
26785 targetm.asm_out.globalize_label (asm_out_file, name);
26786 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
26787 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
26788 }
26789 else
26790 #endif
26791 {
26792 switch_to_section (text_section);
26793 ASM_OUTPUT_LABEL (asm_out_file, name);
26794 }
26795
26796 DECL_INITIAL (decl) = make_node (BLOCK);
26797 current_function_decl = decl;
26798 allocate_struct_function (decl, false);
26799 init_function_start (decl);
26800 first_function_block_is_cold = false;
26801 /* Make sure unwind info is emitted for the thunk if needed. */
26802 final_start_function (emit_barrier (), asm_out_file, 1);
26803
26804 fputs ("\tblr\n", asm_out_file);
26805
26806 final_end_function ();
26807 init_insn_lengths ();
26808 free_after_compilation (cfun);
26809 set_cfun (NULL);
26810 current_function_decl = NULL;
26811 }
26812
26813 /* Add r30 to hard reg set if the prologue sets it up and it is not
26814 pic_offset_table_rtx. */
26815
26816 static void
26817 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
26818 {
26819 if (!TARGET_SINGLE_PIC_BASE
26820 && TARGET_TOC
26821 && TARGET_MINIMAL_TOC
26822 && !constant_pool_empty_p ())
26823 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
26824 if (cfun->machine->split_stack_argp_used)
26825 add_to_hard_reg_set (&set->set, Pmode, 12);
26826
26827 /* Make sure the hard reg set doesn't include r2, which was possibly added
26828 via PIC_OFFSET_TABLE_REGNUM. */
26829 if (TARGET_TOC)
26830 remove_from_hard_reg_set (&set->set, Pmode, TOC_REGNUM);
26831 }
26832
26833 \f
26834 /* Helper function for rs6000_split_logical to emit a logical instruction after
26835 spliting the operation to single GPR registers.
26836
26837 DEST is the destination register.
26838 OP1 and OP2 are the input source registers.
26839 CODE is the base operation (AND, IOR, XOR, NOT).
26840 MODE is the machine mode.
26841 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
26842 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
26843 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
26844
26845 static void
26846 rs6000_split_logical_inner (rtx dest,
26847 rtx op1,
26848 rtx op2,
26849 enum rtx_code code,
26850 machine_mode mode,
26851 bool complement_final_p,
26852 bool complement_op1_p,
26853 bool complement_op2_p)
26854 {
26855 rtx bool_rtx;
26856
26857 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
26858 if (op2 && CONST_INT_P (op2)
26859 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
26860 && !complement_final_p && !complement_op1_p && !complement_op2_p)
26861 {
26862 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
26863 HOST_WIDE_INT value = INTVAL (op2) & mask;
26864
26865 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
26866 if (code == AND)
26867 {
26868 if (value == 0)
26869 {
26870 emit_insn (gen_rtx_SET (dest, const0_rtx));
26871 return;
26872 }
26873
26874 else if (value == mask)
26875 {
26876 if (!rtx_equal_p (dest, op1))
26877 emit_insn (gen_rtx_SET (dest, op1));
26878 return;
26879 }
26880 }
26881
26882 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
26883 into separate ORI/ORIS or XORI/XORIS instrucitons. */
26884 else if (code == IOR || code == XOR)
26885 {
26886 if (value == 0)
26887 {
26888 if (!rtx_equal_p (dest, op1))
26889 emit_insn (gen_rtx_SET (dest, op1));
26890 return;
26891 }
26892 }
26893 }
26894
26895 if (code == AND && mode == SImode
26896 && !complement_final_p && !complement_op1_p && !complement_op2_p)
26897 {
26898 emit_insn (gen_andsi3 (dest, op1, op2));
26899 return;
26900 }
26901
26902 if (complement_op1_p)
26903 op1 = gen_rtx_NOT (mode, op1);
26904
26905 if (complement_op2_p)
26906 op2 = gen_rtx_NOT (mode, op2);
26907
26908 /* For canonical RTL, if only one arm is inverted it is the first. */
26909 if (!complement_op1_p && complement_op2_p)
26910 std::swap (op1, op2);
26911
26912 bool_rtx = ((code == NOT)
26913 ? gen_rtx_NOT (mode, op1)
26914 : gen_rtx_fmt_ee (code, mode, op1, op2));
26915
26916 if (complement_final_p)
26917 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
26918
26919 emit_insn (gen_rtx_SET (dest, bool_rtx));
26920 }
26921
26922 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
26923 operations are split immediately during RTL generation to allow for more
26924 optimizations of the AND/IOR/XOR.
26925
26926 OPERANDS is an array containing the destination and two input operands.
26927 CODE is the base operation (AND, IOR, XOR, NOT).
26928 MODE is the machine mode.
26929 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
26930 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
26931 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
26932 CLOBBER_REG is either NULL or a scratch register of type CC to allow
26933 formation of the AND instructions. */
26934
26935 static void
26936 rs6000_split_logical_di (rtx operands[3],
26937 enum rtx_code code,
26938 bool complement_final_p,
26939 bool complement_op1_p,
26940 bool complement_op2_p)
26941 {
26942 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
26943 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
26944 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
26945 enum hi_lo { hi = 0, lo = 1 };
26946 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
26947 size_t i;
26948
26949 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
26950 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
26951 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
26952 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
26953
26954 if (code == NOT)
26955 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
26956 else
26957 {
26958 if (!CONST_INT_P (operands[2]))
26959 {
26960 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
26961 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
26962 }
26963 else
26964 {
26965 HOST_WIDE_INT value = INTVAL (operands[2]);
26966 HOST_WIDE_INT value_hi_lo[2];
26967
26968 gcc_assert (!complement_final_p);
26969 gcc_assert (!complement_op1_p);
26970 gcc_assert (!complement_op2_p);
26971
26972 value_hi_lo[hi] = value >> 32;
26973 value_hi_lo[lo] = value & lower_32bits;
26974
26975 for (i = 0; i < 2; i++)
26976 {
26977 HOST_WIDE_INT sub_value = value_hi_lo[i];
26978
26979 if (sub_value & sign_bit)
26980 sub_value |= upper_32bits;
26981
26982 op2_hi_lo[i] = GEN_INT (sub_value);
26983
26984 /* If this is an AND instruction, check to see if we need to load
26985 the value in a register. */
26986 if (code == AND && sub_value != -1 && sub_value != 0
26987 && !and_operand (op2_hi_lo[i], SImode))
26988 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
26989 }
26990 }
26991 }
26992
26993 for (i = 0; i < 2; i++)
26994 {
26995 /* Split large IOR/XOR operations. */
26996 if ((code == IOR || code == XOR)
26997 && CONST_INT_P (op2_hi_lo[i])
26998 && !complement_final_p
26999 && !complement_op1_p
27000 && !complement_op2_p
27001 && !logical_const_operand (op2_hi_lo[i], SImode))
27002 {
27003 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
27004 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
27005 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
27006 rtx tmp = gen_reg_rtx (SImode);
27007
27008 /* Make sure the constant is sign extended. */
27009 if ((hi_16bits & sign_bit) != 0)
27010 hi_16bits |= upper_32bits;
27011
27012 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
27013 code, SImode, false, false, false);
27014
27015 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
27016 code, SImode, false, false, false);
27017 }
27018 else
27019 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
27020 code, SImode, complement_final_p,
27021 complement_op1_p, complement_op2_p);
27022 }
27023
27024 return;
27025 }
27026
27027 /* Split the insns that make up boolean operations operating on multiple GPR
27028 registers. The boolean MD patterns ensure that the inputs either are
27029 exactly the same as the output registers, or there is no overlap.
27030
27031 OPERANDS is an array containing the destination and two input operands.
27032 CODE is the base operation (AND, IOR, XOR, NOT).
27033 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
27034 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
27035 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
27036
27037 void
27038 rs6000_split_logical (rtx operands[3],
27039 enum rtx_code code,
27040 bool complement_final_p,
27041 bool complement_op1_p,
27042 bool complement_op2_p)
27043 {
27044 machine_mode mode = GET_MODE (operands[0]);
27045 machine_mode sub_mode;
27046 rtx op0, op1, op2;
27047 int sub_size, regno0, regno1, nregs, i;
27048
27049 /* If this is DImode, use the specialized version that can run before
27050 register allocation. */
27051 if (mode == DImode && !TARGET_POWERPC64)
27052 {
27053 rs6000_split_logical_di (operands, code, complement_final_p,
27054 complement_op1_p, complement_op2_p);
27055 return;
27056 }
27057
27058 op0 = operands[0];
27059 op1 = operands[1];
27060 op2 = (code == NOT) ? NULL_RTX : operands[2];
27061 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
27062 sub_size = GET_MODE_SIZE (sub_mode);
27063 regno0 = REGNO (op0);
27064 regno1 = REGNO (op1);
27065
27066 gcc_assert (reload_completed);
27067 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
27068 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
27069
27070 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
27071 gcc_assert (nregs > 1);
27072
27073 if (op2 && REG_P (op2))
27074 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
27075
27076 for (i = 0; i < nregs; i++)
27077 {
27078 int offset = i * sub_size;
27079 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
27080 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
27081 rtx sub_op2 = ((code == NOT)
27082 ? NULL_RTX
27083 : simplify_subreg (sub_mode, op2, mode, offset));
27084
27085 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
27086 complement_final_p, complement_op1_p,
27087 complement_op2_p);
27088 }
27089
27090 return;
27091 }
27092
27093 /* Emit instructions to move SRC to DST. Called by splitters for
27094 multi-register moves. It will emit at most one instruction for
27095 each register that is accessed; that is, it won't emit li/lis pairs
27096 (or equivalent for 64-bit code). One of SRC or DST must be a hard
27097 register. */
27098
27099 void
27100 rs6000_split_multireg_move (rtx dst, rtx src)
27101 {
27102 /* The register number of the first register being moved. */
27103 int reg;
27104 /* The mode that is to be moved. */
27105 machine_mode mode;
27106 /* The mode that the move is being done in, and its size. */
27107 machine_mode reg_mode;
27108 int reg_mode_size;
27109 /* The number of registers that will be moved. */
27110 int nregs;
27111
27112 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
27113 mode = GET_MODE (dst);
27114 nregs = hard_regno_nregs (reg, mode);
27115
27116 /* If we have a vector quad register for MMA, and this is a load or store,
27117 see if we can use vector paired load/stores. */
27118 if (mode == XOmode && TARGET_MMA
27119 && (MEM_P (dst) || MEM_P (src)))
27120 {
27121 reg_mode = OOmode;
27122 nregs /= 2;
27123 }
27124 /* If we have a vector pair/quad mode, split it into two/four separate
27125 vectors. */
27126 else if (mode == OOmode || mode == XOmode)
27127 reg_mode = V1TImode;
27128 else if (FP_REGNO_P (reg))
27129 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
27130 (TARGET_HARD_FLOAT ? DFmode : SFmode);
27131 else if (ALTIVEC_REGNO_P (reg))
27132 reg_mode = V16QImode;
27133 else
27134 reg_mode = word_mode;
27135 reg_mode_size = GET_MODE_SIZE (reg_mode);
27136
27137 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
27138
27139 /* TDmode residing in FP registers is special, since the ISA requires that
27140 the lower-numbered word of a register pair is always the most significant
27141 word, even in little-endian mode. This does not match the usual subreg
27142 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
27143 the appropriate constituent registers "by hand" in little-endian mode.
27144
27145 Note we do not need to check for destructive overlap here since TDmode
27146 can only reside in even/odd register pairs. */
27147 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
27148 {
27149 rtx p_src, p_dst;
27150 int i;
27151
27152 for (i = 0; i < nregs; i++)
27153 {
27154 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
27155 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
27156 else
27157 p_src = simplify_gen_subreg (reg_mode, src, mode,
27158 i * reg_mode_size);
27159
27160 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
27161 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
27162 else
27163 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
27164 i * reg_mode_size);
27165
27166 emit_insn (gen_rtx_SET (p_dst, p_src));
27167 }
27168
27169 return;
27170 }
27171
27172 /* The __vector_pair and __vector_quad modes are multi-register
27173 modes, so if we have to load or store the registers, we have to be
27174 careful to properly swap them if we're in little endian mode
27175 below. This means the last register gets the first memory
27176 location. We also need to be careful of using the right register
27177 numbers if we are splitting XO to OO. */
27178 if (mode == OOmode || mode == XOmode)
27179 {
27180 nregs = hard_regno_nregs (reg, mode);
27181 int reg_mode_nregs = hard_regno_nregs (reg, reg_mode);
27182 if (MEM_P (dst))
27183 {
27184 unsigned offset = 0;
27185 unsigned size = GET_MODE_SIZE (reg_mode);
27186
27187 /* If we are reading an accumulator register, we have to
27188 deprime it before we can access it. */
27189 if (TARGET_MMA
27190 && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
27191 emit_insn (gen_mma_xxmfacc (src, src));
27192
27193 for (int i = 0; i < nregs; i += reg_mode_nregs)
27194 {
27195 unsigned subreg
27196 = WORDS_BIG_ENDIAN ? i : (nregs - reg_mode_nregs - i);
27197 rtx dst2 = adjust_address (dst, reg_mode, offset);
27198 rtx src2 = gen_rtx_REG (reg_mode, reg + subreg);
27199 offset += size;
27200 emit_insn (gen_rtx_SET (dst2, src2));
27201 }
27202
27203 return;
27204 }
27205
27206 if (MEM_P (src))
27207 {
27208 unsigned offset = 0;
27209 unsigned size = GET_MODE_SIZE (reg_mode);
27210
27211 for (int i = 0; i < nregs; i += reg_mode_nregs)
27212 {
27213 unsigned subreg
27214 = WORDS_BIG_ENDIAN ? i : (nregs - reg_mode_nregs - i);
27215 rtx dst2 = gen_rtx_REG (reg_mode, reg + subreg);
27216 rtx src2 = adjust_address (src, reg_mode, offset);
27217 offset += size;
27218 emit_insn (gen_rtx_SET (dst2, src2));
27219 }
27220
27221 /* If we are writing an accumulator register, we have to
27222 prime it after we've written it. */
27223 if (TARGET_MMA
27224 && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
27225 emit_insn (gen_mma_xxmtacc (dst, dst));
27226
27227 return;
27228 }
27229
27230 if (GET_CODE (src) == UNSPEC
27231 || GET_CODE (src) == UNSPEC_VOLATILE)
27232 {
27233 gcc_assert (XINT (src, 1) == UNSPEC_VSX_ASSEMBLE
27234 || XINT (src, 1) == UNSPECV_MMA_ASSEMBLE);
27235 gcc_assert (REG_P (dst));
27236 if (GET_MODE (src) == XOmode)
27237 gcc_assert (FP_REGNO_P (REGNO (dst)));
27238 if (GET_MODE (src) == OOmode)
27239 gcc_assert (VSX_REGNO_P (REGNO (dst)));
27240
27241 int nvecs = XVECLEN (src, 0);
27242 for (int i = 0; i < nvecs; i++)
27243 {
27244 rtx op;
27245 int regno = reg + i;
27246
27247 if (WORDS_BIG_ENDIAN)
27248 {
27249 op = XVECEXP (src, 0, i);
27250
27251 /* If we are loading an even VSX register and the memory location
27252 is adjacent to the next register's memory location (if any),
27253 then we can load them both with one LXVP instruction. */
27254 if ((regno & 1) == 0)
27255 {
27256 rtx op2 = XVECEXP (src, 0, i + 1);
27257 if (adjacent_mem_locations (op, op2) == op)
27258 {
27259 op = adjust_address (op, OOmode, 0);
27260 /* Skip the next register, since we're going to
27261 load it together with this register. */
27262 i++;
27263 }
27264 }
27265 }
27266 else
27267 {
27268 op = XVECEXP (src, 0, nvecs - i - 1);
27269
27270 /* If we are loading an even VSX register and the memory location
27271 is adjacent to the next register's memory location (if any),
27272 then we can load them both with one LXVP instruction. */
27273 if ((regno & 1) == 0)
27274 {
27275 rtx op2 = XVECEXP (src, 0, nvecs - i - 2);
27276 if (adjacent_mem_locations (op2, op) == op2)
27277 {
27278 op = adjust_address (op2, OOmode, 0);
27279 /* Skip the next register, since we're going to
27280 load it together with this register. */
27281 i++;
27282 }
27283 }
27284 }
27285
27286 rtx dst_i = gen_rtx_REG (GET_MODE (op), regno);
27287 emit_insn (gen_rtx_SET (dst_i, op));
27288 }
27289
27290 /* We are writing an accumulator register, so we have to
27291 prime it after we've written it. */
27292 if (GET_MODE (src) == XOmode)
27293 emit_insn (gen_mma_xxmtacc (dst, dst));
27294
27295 return;
27296 }
27297
27298 /* Register -> register moves can use common code. */
27299 }
27300
27301 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
27302 {
27303 /* If we are reading an accumulator register, we have to
27304 deprime it before we can access it. */
27305 if (TARGET_MMA
27306 && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
27307 emit_insn (gen_mma_xxmfacc (src, src));
27308
27309 /* Move register range backwards, if we might have destructive
27310 overlap. */
27311 int i;
27312 /* XO/OO are opaque so cannot use subregs. */
27313 if (mode == OOmode || mode == XOmode )
27314 {
27315 for (i = nregs - 1; i >= 0; i--)
27316 {
27317 rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + i);
27318 rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + i);
27319 emit_insn (gen_rtx_SET (dst_i, src_i));
27320 }
27321 }
27322 else
27323 {
27324 for (i = nregs - 1; i >= 0; i--)
27325 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
27326 i * reg_mode_size),
27327 simplify_gen_subreg (reg_mode, src, mode,
27328 i * reg_mode_size)));
27329 }
27330
27331 /* If we are writing an accumulator register, we have to
27332 prime it after we've written it. */
27333 if (TARGET_MMA
27334 && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
27335 emit_insn (gen_mma_xxmtacc (dst, dst));
27336 }
27337 else
27338 {
27339 int i;
27340 int j = -1;
27341 bool used_update = false;
27342 rtx restore_basereg = NULL_RTX;
27343
27344 if (MEM_P (src) && INT_REGNO_P (reg))
27345 {
27346 rtx breg;
27347
27348 if (GET_CODE (XEXP (src, 0)) == PRE_INC
27349 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
27350 {
27351 rtx delta_rtx;
27352 breg = XEXP (XEXP (src, 0), 0);
27353 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
27354 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
27355 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
27356 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
27357 src = replace_equiv_address (src, breg);
27358 }
27359 else if (! rs6000_offsettable_memref_p (src, reg_mode, true))
27360 {
27361 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
27362 {
27363 rtx basereg = XEXP (XEXP (src, 0), 0);
27364 if (TARGET_UPDATE)
27365 {
27366 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
27367 emit_insn (gen_rtx_SET (ndst,
27368 gen_rtx_MEM (reg_mode,
27369 XEXP (src, 0))));
27370 used_update = true;
27371 }
27372 else
27373 emit_insn (gen_rtx_SET (basereg,
27374 XEXP (XEXP (src, 0), 1)));
27375 src = replace_equiv_address (src, basereg);
27376 }
27377 else
27378 {
27379 rtx basereg = gen_rtx_REG (Pmode, reg);
27380 emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
27381 src = replace_equiv_address (src, basereg);
27382 }
27383 }
27384
27385 breg = XEXP (src, 0);
27386 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
27387 breg = XEXP (breg, 0);
27388
27389 /* If the base register we are using to address memory is
27390 also a destination reg, then change that register last. */
27391 if (REG_P (breg)
27392 && REGNO (breg) >= REGNO (dst)
27393 && REGNO (breg) < REGNO (dst) + nregs)
27394 j = REGNO (breg) - REGNO (dst);
27395 }
27396 else if (MEM_P (dst) && INT_REGNO_P (reg))
27397 {
27398 rtx breg;
27399
27400 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
27401 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
27402 {
27403 rtx delta_rtx;
27404 breg = XEXP (XEXP (dst, 0), 0);
27405 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
27406 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
27407 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
27408
27409 /* We have to update the breg before doing the store.
27410 Use store with update, if available. */
27411
27412 if (TARGET_UPDATE)
27413 {
27414 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
27415 emit_insn (TARGET_32BIT
27416 ? (TARGET_POWERPC64
27417 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
27418 : gen_movsi_si_update (breg, breg, delta_rtx, nsrc))
27419 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
27420 used_update = true;
27421 }
27422 else
27423 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
27424 dst = replace_equiv_address (dst, breg);
27425 }
27426 else if (!rs6000_offsettable_memref_p (dst, reg_mode, true)
27427 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
27428 {
27429 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
27430 {
27431 rtx basereg = XEXP (XEXP (dst, 0), 0);
27432 if (TARGET_UPDATE)
27433 {
27434 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
27435 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
27436 XEXP (dst, 0)),
27437 nsrc));
27438 used_update = true;
27439 }
27440 else
27441 emit_insn (gen_rtx_SET (basereg,
27442 XEXP (XEXP (dst, 0), 1)));
27443 dst = replace_equiv_address (dst, basereg);
27444 }
27445 else
27446 {
27447 rtx basereg = XEXP (XEXP (dst, 0), 0);
27448 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
27449 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
27450 && REG_P (basereg)
27451 && REG_P (offsetreg)
27452 && REGNO (basereg) != REGNO (offsetreg));
27453 if (REGNO (basereg) == 0)
27454 {
27455 rtx tmp = offsetreg;
27456 offsetreg = basereg;
27457 basereg = tmp;
27458 }
27459 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
27460 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
27461 dst = replace_equiv_address (dst, basereg);
27462 }
27463 }
27464 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
27465 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode, true));
27466 }
27467
27468 /* If we are reading an accumulator register, we have to
27469 deprime it before we can access it. */
27470 if (TARGET_MMA && REG_P (src)
27471 && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
27472 emit_insn (gen_mma_xxmfacc (src, src));
27473
27474 for (i = 0; i < nregs; i++)
27475 {
27476 /* Calculate index to next subword. */
27477 ++j;
27478 if (j == nregs)
27479 j = 0;
27480
27481 /* If compiler already emitted move of first word by
27482 store with update, no need to do anything. */
27483 if (j == 0 && used_update)
27484 continue;
27485
27486 /* XO/OO are opaque so cannot use subregs. */
27487 if (mode == OOmode || mode == XOmode )
27488 {
27489 rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + j);
27490 rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + j);
27491 emit_insn (gen_rtx_SET (dst_i, src_i));
27492 }
27493 else
27494 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
27495 j * reg_mode_size),
27496 simplify_gen_subreg (reg_mode, src, mode,
27497 j * reg_mode_size)));
27498 }
27499
27500 /* If we are writing an accumulator register, we have to
27501 prime it after we've written it. */
27502 if (TARGET_MMA && REG_P (dst)
27503 && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
27504 emit_insn (gen_mma_xxmtacc (dst, dst));
27505
27506 if (restore_basereg != NULL_RTX)
27507 emit_insn (restore_basereg);
27508 }
27509 }
27510 \f
27511 /* Return true if the peephole2 can combine a load involving a combination of
27512 an addis instruction and a load with an offset that can be fused together on
27513 a power8. */
27514
27515 bool
27516 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
27517 rtx addis_value, /* addis value. */
27518 rtx target, /* target register that is loaded. */
27519 rtx mem) /* bottom part of the memory addr. */
27520 {
27521 rtx addr;
27522 rtx base_reg;
27523
27524 /* Validate arguments. */
27525 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
27526 return false;
27527
27528 if (!base_reg_operand (target, GET_MODE (target)))
27529 return false;
27530
27531 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
27532 return false;
27533
27534 /* Allow sign/zero extension. */
27535 if (GET_CODE (mem) == ZERO_EXTEND
27536 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
27537 mem = XEXP (mem, 0);
27538
27539 if (!MEM_P (mem))
27540 return false;
27541
27542 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
27543 return false;
27544
27545 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
27546 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
27547 return false;
27548
27549 /* Validate that the register used to load the high value is either the
27550 register being loaded, or we can safely replace its use.
27551
27552 This function is only called from the peephole2 pass and we assume that
27553 there are 2 instructions in the peephole (addis and load), so we want to
27554 check if the target register was not used in the memory address and the
27555 register to hold the addis result is dead after the peephole. */
27556 if (REGNO (addis_reg) != REGNO (target))
27557 {
27558 if (reg_mentioned_p (target, mem))
27559 return false;
27560
27561 if (!peep2_reg_dead_p (2, addis_reg))
27562 return false;
27563
27564 /* If the target register being loaded is the stack pointer, we must
27565 avoid loading any other value into it, even temporarily. */
27566 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
27567 return false;
27568 }
27569
27570 base_reg = XEXP (addr, 0);
27571 return REGNO (addis_reg) == REGNO (base_reg);
27572 }
27573
27574 /* During the peephole2 pass, adjust and expand the insns for a load fusion
27575 sequence. We adjust the addis register to use the target register. If the
27576 load sign extends, we adjust the code to do the zero extending load, and an
27577 explicit sign extension later since the fusion only covers zero extending
27578 loads.
27579
27580 The operands are:
27581 operands[0] register set with addis (to be replaced with target)
27582 operands[1] value set via addis
27583 operands[2] target register being loaded
27584 operands[3] D-form memory reference using operands[0]. */
27585
27586 void
27587 expand_fusion_gpr_load (rtx *operands)
27588 {
27589 rtx addis_value = operands[1];
27590 rtx target = operands[2];
27591 rtx orig_mem = operands[3];
27592 rtx new_addr, new_mem, orig_addr, offset;
27593 enum rtx_code plus_or_lo_sum;
27594 machine_mode target_mode = GET_MODE (target);
27595 machine_mode extend_mode = target_mode;
27596 machine_mode ptr_mode = Pmode;
27597 enum rtx_code extend = UNKNOWN;
27598
27599 if (GET_CODE (orig_mem) == ZERO_EXTEND
27600 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
27601 {
27602 extend = GET_CODE (orig_mem);
27603 orig_mem = XEXP (orig_mem, 0);
27604 target_mode = GET_MODE (orig_mem);
27605 }
27606
27607 gcc_assert (MEM_P (orig_mem));
27608
27609 orig_addr = XEXP (orig_mem, 0);
27610 plus_or_lo_sum = GET_CODE (orig_addr);
27611 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
27612
27613 offset = XEXP (orig_addr, 1);
27614 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
27615 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
27616
27617 if (extend != UNKNOWN)
27618 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
27619
27620 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
27621 UNSPEC_FUSION_GPR);
27622 emit_insn (gen_rtx_SET (target, new_mem));
27623
27624 if (extend == SIGN_EXTEND)
27625 {
27626 int sub_off = ((BYTES_BIG_ENDIAN)
27627 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
27628 : 0);
27629 rtx sign_reg
27630 = simplify_subreg (target_mode, target, extend_mode, sub_off);
27631
27632 emit_insn (gen_rtx_SET (target,
27633 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
27634 }
27635
27636 return;
27637 }
27638
27639 /* Emit the addis instruction that will be part of a fused instruction
27640 sequence. */
27641
27642 void
27643 emit_fusion_addis (rtx target, rtx addis_value)
27644 {
27645 rtx fuse_ops[10];
27646 const char *addis_str = NULL;
27647
27648 /* Emit the addis instruction. */
27649 fuse_ops[0] = target;
27650 if (satisfies_constraint_L (addis_value))
27651 {
27652 fuse_ops[1] = addis_value;
27653 addis_str = "lis %0,%v1";
27654 }
27655
27656 else if (GET_CODE (addis_value) == PLUS)
27657 {
27658 rtx op0 = XEXP (addis_value, 0);
27659 rtx op1 = XEXP (addis_value, 1);
27660
27661 if (REG_P (op0) && CONST_INT_P (op1)
27662 && satisfies_constraint_L (op1))
27663 {
27664 fuse_ops[1] = op0;
27665 fuse_ops[2] = op1;
27666 addis_str = "addis %0,%1,%v2";
27667 }
27668 }
27669
27670 else if (GET_CODE (addis_value) == HIGH)
27671 {
27672 rtx value = XEXP (addis_value, 0);
27673 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
27674 {
27675 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
27676 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
27677 if (TARGET_ELF)
27678 addis_str = "addis %0,%2,%1@toc@ha";
27679
27680 else if (TARGET_XCOFF)
27681 addis_str = "addis %0,%1@u(%2)";
27682
27683 else
27684 gcc_unreachable ();
27685 }
27686
27687 else if (GET_CODE (value) == PLUS)
27688 {
27689 rtx op0 = XEXP (value, 0);
27690 rtx op1 = XEXP (value, 1);
27691
27692 if (GET_CODE (op0) == UNSPEC
27693 && XINT (op0, 1) == UNSPEC_TOCREL
27694 && CONST_INT_P (op1))
27695 {
27696 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
27697 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
27698 fuse_ops[3] = op1;
27699 if (TARGET_ELF)
27700 addis_str = "addis %0,%2,%1+%3@toc@ha";
27701
27702 else if (TARGET_XCOFF)
27703 addis_str = "addis %0,%1+%3@u(%2)";
27704
27705 else
27706 gcc_unreachable ();
27707 }
27708 }
27709
27710 else if (satisfies_constraint_L (value))
27711 {
27712 fuse_ops[1] = value;
27713 addis_str = "lis %0,%v1";
27714 }
27715
27716 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
27717 {
27718 fuse_ops[1] = value;
27719 addis_str = "lis %0,%1@ha";
27720 }
27721 }
27722
27723 if (!addis_str)
27724 fatal_insn ("Could not generate addis value for fusion", addis_value);
27725
27726 output_asm_insn (addis_str, fuse_ops);
27727 }
27728
27729 /* Emit a D-form load or store instruction that is the second instruction
27730 of a fusion sequence. */
27731
27732 static void
27733 emit_fusion_load (rtx load_reg, rtx addis_reg, rtx offset, const char *insn_str)
27734 {
27735 rtx fuse_ops[10];
27736 char insn_template[80];
27737
27738 fuse_ops[0] = load_reg;
27739 fuse_ops[1] = addis_reg;
27740
27741 if (CONST_INT_P (offset) && satisfies_constraint_I (offset))
27742 {
27743 sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str);
27744 fuse_ops[2] = offset;
27745 output_asm_insn (insn_template, fuse_ops);
27746 }
27747
27748 else if (GET_CODE (offset) == UNSPEC
27749 && XINT (offset, 1) == UNSPEC_TOCREL)
27750 {
27751 if (TARGET_ELF)
27752 sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str);
27753
27754 else if (TARGET_XCOFF)
27755 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
27756
27757 else
27758 gcc_unreachable ();
27759
27760 fuse_ops[2] = XVECEXP (offset, 0, 0);
27761 output_asm_insn (insn_template, fuse_ops);
27762 }
27763
27764 else if (GET_CODE (offset) == PLUS
27765 && GET_CODE (XEXP (offset, 0)) == UNSPEC
27766 && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL
27767 && CONST_INT_P (XEXP (offset, 1)))
27768 {
27769 rtx tocrel_unspec = XEXP (offset, 0);
27770 if (TARGET_ELF)
27771 sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str);
27772
27773 else if (TARGET_XCOFF)
27774 sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str);
27775
27776 else
27777 gcc_unreachable ();
27778
27779 fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0);
27780 fuse_ops[3] = XEXP (offset, 1);
27781 output_asm_insn (insn_template, fuse_ops);
27782 }
27783
27784 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset))
27785 {
27786 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
27787
27788 fuse_ops[2] = offset;
27789 output_asm_insn (insn_template, fuse_ops);
27790 }
27791
27792 else
27793 fatal_insn ("Unable to generate load/store offset for fusion", offset);
27794
27795 return;
27796 }
27797
27798 /* Given an address, convert it into the addis and load offset parts. Addresses
27799 created during the peephole2 process look like:
27800 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
27801 (unspec [(...)] UNSPEC_TOCREL)) */
27802
27803 static void
27804 fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo)
27805 {
27806 rtx hi, lo;
27807
27808 if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
27809 {
27810 hi = XEXP (addr, 0);
27811 lo = XEXP (addr, 1);
27812 }
27813 else
27814 gcc_unreachable ();
27815
27816 *p_hi = hi;
27817 *p_lo = lo;
27818 }
27819
27820 /* Return a string to fuse an addis instruction with a gpr load to the same
27821 register that we loaded up the addis instruction. The address that is used
27822 is the logical address that was formed during peephole2:
27823 (lo_sum (high) (low-part))
27824
27825 The code is complicated, so we call output_asm_insn directly, and just
27826 return "". */
27827
27828 const char *
27829 emit_fusion_gpr_load (rtx target, rtx mem)
27830 {
27831 rtx addis_value;
27832 rtx addr;
27833 rtx load_offset;
27834 const char *load_str = NULL;
27835 machine_mode mode;
27836
27837 if (GET_CODE (mem) == ZERO_EXTEND)
27838 mem = XEXP (mem, 0);
27839
27840 gcc_assert (REG_P (target) && MEM_P (mem));
27841
27842 addr = XEXP (mem, 0);
27843 fusion_split_address (addr, &addis_value, &load_offset);
27844
27845 /* Now emit the load instruction to the same register. */
27846 mode = GET_MODE (mem);
27847 switch (mode)
27848 {
27849 case E_QImode:
27850 load_str = "lbz";
27851 break;
27852
27853 case E_HImode:
27854 load_str = "lhz";
27855 break;
27856
27857 case E_SImode:
27858 case E_SFmode:
27859 load_str = "lwz";
27860 break;
27861
27862 case E_DImode:
27863 case E_DFmode:
27864 gcc_assert (TARGET_POWERPC64);
27865 load_str = "ld";
27866 break;
27867
27868 default:
27869 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem));
27870 }
27871
27872 /* Emit the addis instruction. */
27873 emit_fusion_addis (target, addis_value);
27874
27875 /* Emit the D-form load instruction. */
27876 emit_fusion_load (target, target, load_offset, load_str);
27877
27878 return "";
27879 }
27880 \f
27881
27882 #ifdef RS6000_GLIBC_ATOMIC_FENV
27883 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
27884 static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl;
27885 #endif
27886
27887 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
27888
27889 static void
27890 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
27891 {
27892 if (!TARGET_HARD_FLOAT)
27893 {
27894 #ifdef RS6000_GLIBC_ATOMIC_FENV
27895 if (atomic_hold_decl == NULL_TREE)
27896 {
27897 atomic_hold_decl
27898 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
27899 get_identifier ("__atomic_feholdexcept"),
27900 build_function_type_list (void_type_node,
27901 double_ptr_type_node,
27902 NULL_TREE));
27903 TREE_PUBLIC (atomic_hold_decl) = 1;
27904 DECL_EXTERNAL (atomic_hold_decl) = 1;
27905 }
27906
27907 if (atomic_clear_decl == NULL_TREE)
27908 {
27909 atomic_clear_decl
27910 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
27911 get_identifier ("__atomic_feclearexcept"),
27912 build_function_type_list (void_type_node,
27913 NULL_TREE));
27914 TREE_PUBLIC (atomic_clear_decl) = 1;
27915 DECL_EXTERNAL (atomic_clear_decl) = 1;
27916 }
27917
27918 tree const_double = build_qualified_type (double_type_node,
27919 TYPE_QUAL_CONST);
27920 tree const_double_ptr = build_pointer_type (const_double);
27921 if (atomic_update_decl == NULL_TREE)
27922 {
27923 atomic_update_decl
27924 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
27925 get_identifier ("__atomic_feupdateenv"),
27926 build_function_type_list (void_type_node,
27927 const_double_ptr,
27928 NULL_TREE));
27929 TREE_PUBLIC (atomic_update_decl) = 1;
27930 DECL_EXTERNAL (atomic_update_decl) = 1;
27931 }
27932
27933 tree fenv_var = create_tmp_var_raw (double_type_node);
27934 TREE_ADDRESSABLE (fenv_var) = 1;
27935 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node,
27936 build4 (TARGET_EXPR, double_type_node, fenv_var,
27937 void_node, NULL_TREE, NULL_TREE));
27938
27939 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
27940 *clear = build_call_expr (atomic_clear_decl, 0);
27941 *update = build_call_expr (atomic_update_decl, 1,
27942 fold_convert (const_double_ptr, fenv_addr));
27943 #endif
27944 return;
27945 }
27946
27947 tree mffs = rs6000_builtin_decls[RS6000_BIF_MFFS];
27948 tree mtfsf = rs6000_builtin_decls[RS6000_BIF_MTFSF];
27949 tree call_mffs = build_call_expr (mffs, 0);
27950
27951 /* Generates the equivalent of feholdexcept (&fenv_var)
27952
27953 *fenv_var = __builtin_mffs ();
27954 double fenv_hold;
27955 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
27956 __builtin_mtfsf (0xff, fenv_hold); */
27957
27958 /* Mask to clear everything except for the rounding modes and non-IEEE
27959 arithmetic flag. */
27960 const unsigned HOST_WIDE_INT hold_exception_mask
27961 = HOST_WIDE_INT_C (0xffffffff00000007);
27962
27963 tree fenv_var = create_tmp_var_raw (double_type_node);
27964
27965 tree hold_mffs = build4 (TARGET_EXPR, double_type_node, fenv_var, call_mffs,
27966 NULL_TREE, NULL_TREE);
27967
27968 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
27969 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
27970 build_int_cst (uint64_type_node,
27971 hold_exception_mask));
27972
27973 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
27974 fenv_llu_and);
27975
27976 tree hold_mtfsf = build_call_expr (mtfsf, 2,
27977 build_int_cst (unsigned_type_node, 0xff),
27978 fenv_hold_mtfsf);
27979
27980 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
27981
27982 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
27983
27984 double fenv_clear = __builtin_mffs ();
27985 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
27986 __builtin_mtfsf (0xff, fenv_clear); */
27987
27988 /* Mask to clear everything except for the rounding modes and non-IEEE
27989 arithmetic flag. */
27990 const unsigned HOST_WIDE_INT clear_exception_mask
27991 = HOST_WIDE_INT_C (0xffffffff00000000);
27992
27993 tree fenv_clear = create_tmp_var_raw (double_type_node);
27994
27995 tree clear_mffs = build4 (TARGET_EXPR, double_type_node, fenv_clear,
27996 call_mffs, NULL_TREE, NULL_TREE);
27997
27998 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
27999 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
28000 fenv_clean_llu,
28001 build_int_cst (uint64_type_node,
28002 clear_exception_mask));
28003
28004 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
28005 fenv_clear_llu_and);
28006
28007 tree clear_mtfsf = build_call_expr (mtfsf, 2,
28008 build_int_cst (unsigned_type_node, 0xff),
28009 fenv_clear_mtfsf);
28010
28011 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
28012
28013 /* Generates the equivalent of feupdateenv (&fenv_var)
28014
28015 double old_fenv = __builtin_mffs ();
28016 double fenv_update;
28017 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
28018 (*(uint64_t*)fenv_var 0x1ff80fff);
28019 __builtin_mtfsf (0xff, fenv_update); */
28020
28021 const unsigned HOST_WIDE_INT update_exception_mask
28022 = HOST_WIDE_INT_C (0xffffffff1fffff00);
28023 const unsigned HOST_WIDE_INT new_exception_mask
28024 = HOST_WIDE_INT_C (0x1ff80fff);
28025
28026 tree old_fenv = create_tmp_var_raw (double_type_node);
28027 tree update_mffs = build4 (TARGET_EXPR, double_type_node, old_fenv,
28028 call_mffs, NULL_TREE, NULL_TREE);
28029
28030 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
28031 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
28032 build_int_cst (uint64_type_node,
28033 update_exception_mask));
28034
28035 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
28036 build_int_cst (uint64_type_node,
28037 new_exception_mask));
28038
28039 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
28040 old_llu_and, new_llu_and);
28041
28042 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
28043 new_llu_mask);
28044
28045 tree update_mtfsf = build_call_expr (mtfsf, 2,
28046 build_int_cst (unsigned_type_node, 0xff),
28047 fenv_update_mtfsf);
28048
28049 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
28050 }
28051
28052 void
28053 rs6000_generate_float2_double_code (rtx dst, rtx src1, rtx src2)
28054 {
28055 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
28056
28057 rtx_tmp0 = gen_reg_rtx (V2DFmode);
28058 rtx_tmp1 = gen_reg_rtx (V2DFmode);
28059
28060 /* The destination of the vmrgew instruction layout is:
28061 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
28062 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
28063 vmrgew instruction will be correct. */
28064 if (BYTES_BIG_ENDIAN)
28065 {
28066 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp0, src1, src2,
28067 GEN_INT (0)));
28068 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp1, src1, src2,
28069 GEN_INT (3)));
28070 }
28071 else
28072 {
28073 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (3)));
28074 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (0)));
28075 }
28076
28077 rtx_tmp2 = gen_reg_rtx (V4SFmode);
28078 rtx_tmp3 = gen_reg_rtx (V4SFmode);
28079
28080 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp2, rtx_tmp0));
28081 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp3, rtx_tmp1));
28082
28083 if (BYTES_BIG_ENDIAN)
28084 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
28085 else
28086 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
28087 }
28088
28089 void
28090 rs6000_generate_float2_code (bool signed_convert, rtx dst, rtx src1, rtx src2)
28091 {
28092 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
28093
28094 rtx_tmp0 = gen_reg_rtx (V2DImode);
28095 rtx_tmp1 = gen_reg_rtx (V2DImode);
28096
28097 /* The destination of the vmrgew instruction layout is:
28098 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
28099 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
28100 vmrgew instruction will be correct. */
28101 if (BYTES_BIG_ENDIAN)
28102 {
28103 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp0, src1, src2, GEN_INT (0)));
28104 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp1, src1, src2, GEN_INT (3)));
28105 }
28106 else
28107 {
28108 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp0, src1, src2, GEN_INT (3)));
28109 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp1, src1, src2, GEN_INT (0)));
28110 }
28111
28112 rtx_tmp2 = gen_reg_rtx (V4SFmode);
28113 rtx_tmp3 = gen_reg_rtx (V4SFmode);
28114
28115 if (signed_convert)
28116 {
28117 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp2, rtx_tmp0));
28118 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp3, rtx_tmp1));
28119 }
28120 else
28121 {
28122 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp2, rtx_tmp0));
28123 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp3, rtx_tmp1));
28124 }
28125
28126 if (BYTES_BIG_ENDIAN)
28127 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
28128 else
28129 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
28130 }
28131
28132 void
28133 rs6000_generate_vsigned2_code (bool signed_convert, rtx dst, rtx src1,
28134 rtx src2)
28135 {
28136 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
28137
28138 rtx_tmp0 = gen_reg_rtx (V2DFmode);
28139 rtx_tmp1 = gen_reg_rtx (V2DFmode);
28140
28141 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (0)));
28142 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (3)));
28143
28144 rtx_tmp2 = gen_reg_rtx (V4SImode);
28145 rtx_tmp3 = gen_reg_rtx (V4SImode);
28146
28147 if (signed_convert)
28148 {
28149 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp2, rtx_tmp0));
28150 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp3, rtx_tmp1));
28151 }
28152 else
28153 {
28154 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp2, rtx_tmp0));
28155 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp3, rtx_tmp1));
28156 }
28157
28158 emit_insn (gen_p8_vmrgew_v4si (dst, rtx_tmp2, rtx_tmp3));
28159 }
28160
28161 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
28162
28163 static bool
28164 rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode,
28165 optimization_type opt_type)
28166 {
28167 switch (op)
28168 {
28169 case rsqrt_optab:
28170 return (opt_type == OPTIMIZE_FOR_SPEED
28171 && RS6000_RECIP_AUTO_RSQRTE_P (mode1));
28172
28173 default:
28174 return true;
28175 }
28176 }
28177
28178 /* Implement TARGET_CONSTANT_ALIGNMENT. */
28179
28180 static HOST_WIDE_INT
28181 rs6000_constant_alignment (const_tree exp, HOST_WIDE_INT align)
28182 {
28183 if (TREE_CODE (exp) == STRING_CST
28184 && (STRICT_ALIGNMENT || !optimize_size))
28185 return MAX (align, BITS_PER_WORD);
28186 return align;
28187 }
28188
28189 /* Implement TARGET_STARTING_FRAME_OFFSET. */
28190
28191 static HOST_WIDE_INT
28192 rs6000_starting_frame_offset (void)
28193 {
28194 if (FRAME_GROWS_DOWNWARD)
28195 return 0;
28196 return RS6000_STARTING_FRAME_OFFSET;
28197 }
28198 \f
28199
28200 /* Create an alias for a mangled name where we have changed the mangling (in
28201 GCC 8.1, we used U10__float128, and now we use u9__ieee128). This is called
28202 via the target hook TARGET_ASM_GLOBALIZE_DECL_NAME. */
28203
28204 #if TARGET_ELF && RS6000_WEAK
28205 static void
28206 rs6000_globalize_decl_name (FILE * stream, tree decl)
28207 {
28208 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
28209
28210 targetm.asm_out.globalize_label (stream, name);
28211
28212 if (rs6000_passes_ieee128 && name[0] == '_' && name[1] == 'Z')
28213 {
28214 tree save_asm_name = DECL_ASSEMBLER_NAME (decl);
28215 const char *old_name;
28216
28217 ieee128_mangling_gcc_8_1 = true;
28218 lang_hooks.set_decl_assembler_name (decl);
28219 old_name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
28220 SET_DECL_ASSEMBLER_NAME (decl, save_asm_name);
28221 ieee128_mangling_gcc_8_1 = false;
28222
28223 if (strcmp (name, old_name) != 0)
28224 {
28225 fprintf (stream, "\t.weak %s\n", old_name);
28226 fprintf (stream, "\t.set %s,%s\n", old_name, name);
28227 }
28228 }
28229 }
28230 #endif
28231
28232 \f
28233 /* On 64-bit Linux and Freebsd systems, possibly switch the long double library
28234 function names from <foo>l to <foo>f128 if the default long double type is
28235 IEEE 128-bit. Typically, with the C and C++ languages, the standard math.h
28236 include file switches the names on systems that support long double as IEEE
28237 128-bit, but that doesn't work if the user uses __builtin_<foo>l directly.
28238 In the future, glibc will export names like __ieee128_sinf128 and we can
28239 switch to using those instead of using sinf128, which pollutes the user's
28240 namespace.
28241
28242 This will switch the names for Fortran math functions as well (which doesn't
28243 use math.h). However, Fortran needs other changes to the compiler and
28244 library before you can switch the real*16 type at compile time.
28245
28246 We use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change this name. We
28247 only do this transformation if the __float128 type is enabled. This
28248 prevents us from doing the transformation on older 32-bit ports that might
28249 have enabled using IEEE 128-bit floating point as the default long double
28250 type. */
28251
28252 static tree
28253 rs6000_mangle_decl_assembler_name (tree decl, tree id)
28254 {
28255 if (TARGET_FLOAT128_TYPE && TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128
28256 && TREE_CODE (decl) == FUNCTION_DECL
28257 && DECL_IS_UNDECLARED_BUILTIN (decl)
28258 && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
28259 {
28260 size_t len = IDENTIFIER_LENGTH (id);
28261 const char *name = IDENTIFIER_POINTER (id);
28262 char *newname = NULL;
28263
28264 /* See if it is one of the built-in functions with an unusual name. */
28265 switch (DECL_FUNCTION_CODE (decl))
28266 {
28267 case BUILT_IN_DREML:
28268 newname = xstrdup ("__remainderieee128");
28269 break;
28270
28271 case BUILT_IN_GAMMAL:
28272 newname = xstrdup ("__lgammaieee128");
28273 break;
28274
28275 case BUILT_IN_GAMMAL_R:
28276 case BUILT_IN_LGAMMAL_R:
28277 newname = xstrdup ("__lgammaieee128_r");
28278 break;
28279
28280 case BUILT_IN_NEXTTOWARD:
28281 newname = xstrdup ("__nexttoward_to_ieee128");
28282 break;
28283
28284 case BUILT_IN_NEXTTOWARDF:
28285 newname = xstrdup ("__nexttowardf_to_ieee128");
28286 break;
28287
28288 case BUILT_IN_NEXTTOWARDL:
28289 newname = xstrdup ("__nexttowardieee128");
28290 break;
28291
28292 case BUILT_IN_POW10L:
28293 newname = xstrdup ("__exp10ieee128");
28294 break;
28295
28296 case BUILT_IN_SCALBL:
28297 newname = xstrdup ("__scalbieee128");
28298 break;
28299
28300 case BUILT_IN_SIGNIFICANDL:
28301 newname = xstrdup ("__significandieee128");
28302 break;
28303
28304 case BUILT_IN_SINCOSL:
28305 newname = xstrdup ("__sincosieee128");
28306 break;
28307
28308 default:
28309 break;
28310 }
28311
28312 /* Update the __builtin_*printf and __builtin_*scanf functions. */
28313 if (!newname)
28314 {
28315 size_t printf_len = strlen ("printf");
28316 size_t scanf_len = strlen ("scanf");
28317
28318 if (len >= printf_len
28319 && strcmp (name + len - printf_len, "printf") == 0)
28320 newname = xasprintf ("__%sieee128", name);
28321
28322 else if (len >= scanf_len
28323 && strcmp (name + len - scanf_len, "scanf") == 0)
28324 newname = xasprintf ("__isoc99_%sieee128", name);
28325
28326 else if (name[len - 1] == 'l')
28327 {
28328 bool uses_ieee128_p = false;
28329 tree type = TREE_TYPE (decl);
28330 machine_mode ret_mode = TYPE_MODE (type);
28331
28332 /* See if the function returns a IEEE 128-bit floating point type or
28333 complex type. */
28334 if (ret_mode == TFmode || ret_mode == TCmode)
28335 uses_ieee128_p = true;
28336 else
28337 {
28338 function_args_iterator args_iter;
28339 tree arg;
28340
28341 /* See if the function passes a IEEE 128-bit floating point type
28342 or complex type. */
28343 FOREACH_FUNCTION_ARGS (type, arg, args_iter)
28344 {
28345 machine_mode arg_mode = TYPE_MODE (arg);
28346 if (arg_mode == TFmode || arg_mode == TCmode)
28347 {
28348 uses_ieee128_p = true;
28349 break;
28350 }
28351 }
28352 }
28353
28354 /* If we passed or returned an IEEE 128-bit floating point type,
28355 change the name. Use __<name>ieee128, instead of <name>l. */
28356 if (uses_ieee128_p)
28357 newname = xasprintf ("__%.*sieee128", (int)(len - 1), name);
28358 }
28359 }
28360
28361 if (newname)
28362 {
28363 if (TARGET_DEBUG_BUILTIN)
28364 fprintf (stderr, "Map %s => %s\n", name, newname);
28365
28366 id = get_identifier (newname);
28367 free (newname);
28368 }
28369 }
28370
28371 return id;
28372 }
28373
28374 /* Predict whether the given loop in gimple will be transformed in the RTL
28375 doloop_optimize pass. */
28376
28377 static bool
28378 rs6000_predict_doloop_p (struct loop *loop)
28379 {
28380 gcc_assert (loop);
28381
28382 /* On rs6000, targetm.can_use_doloop_p is actually
28383 can_use_doloop_if_innermost. Just ensure the loop is innermost. */
28384 if (loop->inner != NULL)
28385 {
28386 if (dump_file && (dump_flags & TDF_DETAILS))
28387 fprintf (dump_file, "Predict doloop failure due to"
28388 " loop nesting.\n");
28389 return false;
28390 }
28391
28392 return true;
28393 }
28394
28395 /* Implement TARGET_PREFERRED_DOLOOP_MODE. */
28396
28397 static machine_mode
28398 rs6000_preferred_doloop_mode (machine_mode)
28399 {
28400 return word_mode;
28401 }
28402
28403 /* Implement TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P. */
28404
28405 static bool
28406 rs6000_cannot_substitute_mem_equiv_p (rtx mem)
28407 {
28408 gcc_assert (MEM_P (mem));
28409
28410 /* curr_insn_transform()'s handling of subregs cannot handle altivec AND:
28411 type addresses, so don't allow MEMs with those address types to be
28412 substituted as an equivalent expression. See PR93974 for details. */
28413 if (GET_CODE (XEXP (mem, 0)) == AND)
28414 return true;
28415
28416 return false;
28417 }
28418
28419 /* Implement TARGET_INVALID_CONVERSION. */
28420
28421 static const char *
28422 rs6000_invalid_conversion (const_tree fromtype, const_tree totype)
28423 {
28424 /* Make sure we're working with the canonical types. */
28425 if (TYPE_CANONICAL (fromtype) != NULL_TREE)
28426 fromtype = TYPE_CANONICAL (fromtype);
28427 if (TYPE_CANONICAL (totype) != NULL_TREE)
28428 totype = TYPE_CANONICAL (totype);
28429
28430 machine_mode frommode = TYPE_MODE (fromtype);
28431 machine_mode tomode = TYPE_MODE (totype);
28432
28433 if (frommode != tomode)
28434 {
28435 /* Do not allow conversions to/from XOmode and OOmode types. */
28436 if (frommode == XOmode)
28437 return N_("invalid conversion from type %<__vector_quad%>");
28438 if (tomode == XOmode)
28439 return N_("invalid conversion to type %<__vector_quad%>");
28440 if (frommode == OOmode)
28441 return N_("invalid conversion from type %<__vector_pair%>");
28442 if (tomode == OOmode)
28443 return N_("invalid conversion to type %<__vector_pair%>");
28444 }
28445 else if (POINTER_TYPE_P (fromtype) && POINTER_TYPE_P (totype))
28446 {
28447 /* We really care about the modes of the base types. */
28448 frommode = TYPE_MODE (TREE_TYPE (fromtype));
28449 tomode = TYPE_MODE (TREE_TYPE (totype));
28450
28451 /* Do not allow conversions to/from XOmode and OOmode pointer
28452 types, except to/from void pointers. */
28453 if (frommode != tomode
28454 && frommode != VOIDmode
28455 && tomode != VOIDmode)
28456 {
28457 if (frommode == XOmode)
28458 return N_("invalid conversion from type %<* __vector_quad%>");
28459 if (tomode == XOmode)
28460 return N_("invalid conversion to type %<* __vector_quad%>");
28461 if (frommode == OOmode)
28462 return N_("invalid conversion from type %<* __vector_pair%>");
28463 if (tomode == OOmode)
28464 return N_("invalid conversion to type %<* __vector_pair%>");
28465 }
28466 }
28467
28468 /* Conversion allowed. */
28469 return NULL;
28470 }
28471
28472 /* Convert a SFmode constant to the integer bit pattern. */
28473
28474 long
28475 rs6000_const_f32_to_i32 (rtx operand)
28476 {
28477 long value;
28478 const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (operand);
28479
28480 gcc_assert (GET_MODE (operand) == SFmode);
28481 REAL_VALUE_TO_TARGET_SINGLE (*rv, value);
28482 return value;
28483 }
28484
28485 void
28486 rs6000_emit_xxspltidp_v2df (rtx dst, long value)
28487 {
28488 if (((value & 0x7F800000) == 0) && ((value & 0x7FFFFF) != 0))
28489 inform (input_location,
28490 "the result for the xxspltidp instruction "
28491 "is undefined for subnormal input values");
28492 emit_insn( gen_xxspltidp_v2df_inst (dst, GEN_INT (value)));
28493 }
28494
28495 /* Implement TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC. */
28496
28497 static bool
28498 rs6000_gen_pic_addr_diff_vec (void)
28499 {
28500 return rs6000_relative_jumptables;
28501 }
28502
28503 void
28504 rs6000_output_addr_vec_elt (FILE *file, int value)
28505 {
28506 const char *directive = TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t";
28507 char buf[100];
28508
28509 fprintf (file, "%s", directive);
28510 ASM_GENERATE_INTERNAL_LABEL (buf, "L", value);
28511 assemble_name (file, buf);
28512 fprintf (file, "\n");
28513 }
28514
28515 \f
28516 /* Copy an integer constant to the vector constant structure. */
28517
28518 static void
28519 constant_int_to_128bit_vector (rtx op,
28520 machine_mode mode,
28521 size_t byte_num,
28522 vec_const_128bit_type *info)
28523 {
28524 unsigned HOST_WIDE_INT uvalue = UINTVAL (op);
28525 unsigned bitsize = GET_MODE_BITSIZE (mode);
28526
28527 for (int shift = bitsize - 8; shift >= 0; shift -= 8)
28528 info->bytes[byte_num++] = (uvalue >> shift) & 0xff;
28529 }
28530
28531 /* Copy a floating point constant to the vector constant structure. */
28532
28533 static void
28534 constant_fp_to_128bit_vector (rtx op,
28535 machine_mode mode,
28536 size_t byte_num,
28537 vec_const_128bit_type *info)
28538 {
28539 unsigned bitsize = GET_MODE_BITSIZE (mode);
28540 unsigned num_words = bitsize / 32;
28541 const REAL_VALUE_TYPE *rtype = CONST_DOUBLE_REAL_VALUE (op);
28542 long real_words[VECTOR_128BIT_WORDS];
28543
28544 /* Make sure we don't overflow the real_words array and that it is
28545 filled completely. */
28546 gcc_assert (num_words <= VECTOR_128BIT_WORDS && (bitsize % 32) == 0);
28547
28548 real_to_target (real_words, rtype, mode);
28549
28550 /* Iterate over each 32-bit word in the floating point constant. The
28551 real_to_target function puts out words in target endian fashion. We need
28552 to arrange the order so that the bytes are written in big endian order. */
28553 for (unsigned num = 0; num < num_words; num++)
28554 {
28555 unsigned endian_num = (BYTES_BIG_ENDIAN
28556 ? num
28557 : num_words - 1 - num);
28558
28559 unsigned uvalue = real_words[endian_num];
28560 for (int shift = 32 - 8; shift >= 0; shift -= 8)
28561 info->bytes[byte_num++] = (uvalue >> shift) & 0xff;
28562 }
28563
28564 /* Mark that this constant involves floating point. */
28565 info->fp_constant_p = true;
28566 }
28567
28568 /* Convert a vector constant OP with mode MODE to a vector 128-bit constant
28569 structure INFO.
28570
28571 Break out the constant out to bytes, half words, words, and double words.
28572 Return true if we have successfully converted the constant.
28573
28574 We handle CONST_INT, CONST_DOUBLE, CONST_VECTOR, and VEC_DUPLICATE of
28575 constants. Integer and floating point scalar constants are splatted to fill
28576 out the vector. */
28577
28578 bool
28579 vec_const_128bit_to_bytes (rtx op,
28580 machine_mode mode,
28581 vec_const_128bit_type *info)
28582 {
28583 /* Initialize the constant structure. */
28584 memset ((void *)info, 0, sizeof (vec_const_128bit_type));
28585
28586 /* Assume CONST_INTs are DImode. */
28587 if (mode == VOIDmode)
28588 mode = CONST_INT_P (op) ? DImode : GET_MODE (op);
28589
28590 if (mode == VOIDmode)
28591 return false;
28592
28593 unsigned size = GET_MODE_SIZE (mode);
28594 bool splat_p = false;
28595
28596 if (size > VECTOR_128BIT_BYTES)
28597 return false;
28598
28599 /* Set up the bits. */
28600 switch (GET_CODE (op))
28601 {
28602 /* Integer constants, default to double word. */
28603 case CONST_INT:
28604 {
28605 constant_int_to_128bit_vector (op, mode, 0, info);
28606 splat_p = true;
28607 break;
28608 }
28609
28610 /* Floating point constants. */
28611 case CONST_DOUBLE:
28612 {
28613 /* Fail if the floating point constant is the wrong mode. */
28614 if (GET_MODE (op) != mode)
28615 return false;
28616
28617 /* SFmode stored as scalars are stored in DFmode format. */
28618 if (mode == SFmode)
28619 {
28620 mode = DFmode;
28621 size = GET_MODE_SIZE (DFmode);
28622 }
28623
28624 constant_fp_to_128bit_vector (op, mode, 0, info);
28625 splat_p = true;
28626 break;
28627 }
28628
28629 /* Vector constants, iterate over each element. On little endian
28630 systems, we have to reverse the element numbers. */
28631 case CONST_VECTOR:
28632 {
28633 /* Fail if the vector constant is the wrong mode or size. */
28634 if (GET_MODE (op) != mode
28635 || GET_MODE_SIZE (mode) != VECTOR_128BIT_BYTES)
28636 return false;
28637
28638 machine_mode ele_mode = GET_MODE_INNER (mode);
28639 size_t ele_size = GET_MODE_SIZE (ele_mode);
28640 size_t nunits = GET_MODE_NUNITS (mode);
28641
28642 for (size_t num = 0; num < nunits; num++)
28643 {
28644 rtx ele = CONST_VECTOR_ELT (op, num);
28645 size_t byte_num = (BYTES_BIG_ENDIAN
28646 ? num
28647 : nunits - 1 - num) * ele_size;
28648
28649 if (CONST_INT_P (ele))
28650 constant_int_to_128bit_vector (ele, ele_mode, byte_num, info);
28651 else if (CONST_DOUBLE_P (ele))
28652 constant_fp_to_128bit_vector (ele, ele_mode, byte_num, info);
28653 else
28654 return false;
28655 }
28656
28657 break;
28658 }
28659
28660 /* Treat VEC_DUPLICATE of a constant just like a vector constant.
28661 Since we are duplicating the element, we don't have to worry about
28662 endian issues. */
28663 case VEC_DUPLICATE:
28664 {
28665 /* Fail if the vector duplicate is the wrong mode or size. */
28666 if (GET_MODE (op) != mode
28667 || GET_MODE_SIZE (mode) != VECTOR_128BIT_BYTES)
28668 return false;
28669
28670 machine_mode ele_mode = GET_MODE_INNER (mode);
28671 size_t ele_size = GET_MODE_SIZE (ele_mode);
28672 rtx ele = XEXP (op, 0);
28673 size_t nunits = GET_MODE_NUNITS (mode);
28674
28675 if (!CONST_INT_P (ele) && !CONST_DOUBLE_P (ele))
28676 return false;
28677
28678 for (size_t num = 0; num < nunits; num++)
28679 {
28680 size_t byte_num = num * ele_size;
28681
28682 if (CONST_INT_P (ele))
28683 constant_int_to_128bit_vector (ele, ele_mode, byte_num, info);
28684 else
28685 constant_fp_to_128bit_vector (ele, ele_mode, byte_num, info);
28686 }
28687
28688 break;
28689 }
28690
28691 /* Any thing else, just return failure. */
28692 default:
28693 return false;
28694 }
28695
28696 /* Splat the constant to fill 128 bits if desired. */
28697 if (splat_p && size < VECTOR_128BIT_BYTES)
28698 {
28699 if ((VECTOR_128BIT_BYTES % size) != 0)
28700 return false;
28701
28702 for (size_t offset = size;
28703 offset < VECTOR_128BIT_BYTES;
28704 offset += size)
28705 memcpy ((void *) &info->bytes[offset],
28706 (void *) &info->bytes[0],
28707 size);
28708 }
28709
28710 /* Remember original size. */
28711 info->original_size = size;
28712
28713 /* Determine if the bytes are all the same. */
28714 unsigned char first_byte = info->bytes[0];
28715 info->all_bytes_same = true;
28716 for (size_t i = 1; i < VECTOR_128BIT_BYTES; i++)
28717 if (first_byte != info->bytes[i])
28718 {
28719 info->all_bytes_same = false;
28720 break;
28721 }
28722
28723 /* Pack half words together & determine if all of the half words are the
28724 same. */
28725 for (size_t i = 0; i < VECTOR_128BIT_HALF_WORDS; i++)
28726 info->half_words[i] = ((info->bytes[i * 2] << 8)
28727 | info->bytes[(i * 2) + 1]);
28728
28729 unsigned short first_hword = info->half_words[0];
28730 info->all_half_words_same = true;
28731 for (size_t i = 1; i < VECTOR_128BIT_HALF_WORDS; i++)
28732 if (first_hword != info->half_words[i])
28733 {
28734 info->all_half_words_same = false;
28735 break;
28736 }
28737
28738 /* Pack words together & determine if all of the words are the same. */
28739 for (size_t i = 0; i < VECTOR_128BIT_WORDS; i++)
28740 info->words[i] = ((info->bytes[i * 4] << 24)
28741 | (info->bytes[(i * 4) + 1] << 16)
28742 | (info->bytes[(i * 4) + 2] << 8)
28743 | info->bytes[(i * 4) + 3]);
28744
28745 info->all_words_same
28746 = (info->words[0] == info->words[1]
28747 && info->words[0] == info->words[1]
28748 && info->words[0] == info->words[2]
28749 && info->words[0] == info->words[3]);
28750
28751 /* Pack double words together & determine if all of the double words are the
28752 same. */
28753 for (size_t i = 0; i < VECTOR_128BIT_DOUBLE_WORDS; i++)
28754 {
28755 unsigned HOST_WIDE_INT d_word = 0;
28756 for (size_t j = 0; j < 8; j++)
28757 d_word = (d_word << 8) | info->bytes[(i * 8) + j];
28758
28759 info->double_words[i] = d_word;
28760 }
28761
28762 info->all_double_words_same
28763 = (info->double_words[0] == info->double_words[1]);
28764
28765 return true;
28766 }
28767
28768 /* Determine if an IEEE 128-bit constant can be loaded with LXVKQ. Return zero
28769 if the LXVKQ instruction cannot be used. Otherwise return the immediate
28770 value to be used with the LXVKQ instruction. */
28771
28772 unsigned
28773 constant_generates_lxvkq (vec_const_128bit_type *vsx_const)
28774 {
28775 /* Is the instruction supported with power10 code generation, IEEE 128-bit
28776 floating point hardware and VSX registers are available. */
28777 if (!TARGET_IEEE128_CONSTANT || !TARGET_FLOAT128_HW || !TARGET_POWER10
28778 || !TARGET_VSX)
28779 return 0;
28780
28781 /* All of the constants that are generated by LXVKQ have the bottom 3 words
28782 that are 0. */
28783 if (vsx_const->words[1] != 0
28784 || vsx_const->words[2] != 0
28785 || vsx_const->words[3] != 0)
28786 return 0;
28787
28788 /* See if we have a match for the first word. */
28789 switch (vsx_const->words[0])
28790 {
28791 case 0x3FFF0000U: return 1; /* IEEE 128-bit +1.0. */
28792 case 0x40000000U: return 2; /* IEEE 128-bit +2.0. */
28793 case 0x40008000U: return 3; /* IEEE 128-bit +3.0. */
28794 case 0x40010000U: return 4; /* IEEE 128-bit +4.0. */
28795 case 0x40014000U: return 5; /* IEEE 128-bit +5.0. */
28796 case 0x40018000U: return 6; /* IEEE 128-bit +6.0. */
28797 case 0x4001C000U: return 7; /* IEEE 128-bit +7.0. */
28798 case 0x7FFF0000U: return 8; /* IEEE 128-bit +Infinity. */
28799 case 0x7FFF8000U: return 9; /* IEEE 128-bit quiet NaN. */
28800 case 0x80000000U: return 16; /* IEEE 128-bit -0.0. */
28801 case 0xBFFF0000U: return 17; /* IEEE 128-bit -1.0. */
28802 case 0xC0000000U: return 18; /* IEEE 128-bit -2.0. */
28803 case 0xC0008000U: return 19; /* IEEE 128-bit -3.0. */
28804 case 0xC0010000U: return 20; /* IEEE 128-bit -4.0. */
28805 case 0xC0014000U: return 21; /* IEEE 128-bit -5.0. */
28806 case 0xC0018000U: return 22; /* IEEE 128-bit -6.0. */
28807 case 0xC001C000U: return 23; /* IEEE 128-bit -7.0. */
28808 case 0xFFFF0000U: return 24; /* IEEE 128-bit -Infinity. */
28809
28810 /* anything else cannot be loaded. */
28811 default:
28812 break;
28813 }
28814
28815 return 0;
28816 }
28817
28818 /* Determine if a vector constant can be loaded with XXSPLTIW. Return zero if
28819 the XXSPLTIW instruction cannot be used. Otherwise return the immediate
28820 value to be used with the XXSPLTIW instruction. */
28821
28822 unsigned
28823 constant_generates_xxspltiw (vec_const_128bit_type *vsx_const)
28824 {
28825 if (!TARGET_SPLAT_WORD_CONSTANT || !TARGET_PREFIXED || !TARGET_VSX)
28826 return 0;
28827
28828 if (!vsx_const->all_words_same)
28829 return 0;
28830
28831 /* If we can use XXSPLTIB, don't generate XXSPLTIW. */
28832 if (vsx_const->all_bytes_same)
28833 return 0;
28834
28835 /* See if we can use VSPLTISH or VSPLTISW. */
28836 if (vsx_const->all_half_words_same)
28837 {
28838 unsigned short h_word = vsx_const->half_words[0];
28839 short sign_h_word = ((h_word & 0xffff) ^ 0x8000) - 0x8000;
28840 if (EASY_VECTOR_15 (sign_h_word))
28841 return 0;
28842 }
28843
28844 unsigned int word = vsx_const->words[0];
28845 int sign_word = ((word & 0xffffffff) ^ 0x80000000) - 0x80000000;
28846 if (EASY_VECTOR_15 (sign_word))
28847 return 0;
28848
28849 return vsx_const->words[0];
28850 }
28851
28852 /* Determine if a vector constant can be loaded with XXSPLTIDP. Return zero if
28853 the XXSPLTIDP instruction cannot be used. Otherwise return the immediate
28854 value to be used with the XXSPLTIDP instruction. */
28855
28856 unsigned
28857 constant_generates_xxspltidp (vec_const_128bit_type *vsx_const)
28858 {
28859 if (!TARGET_SPLAT_FLOAT_CONSTANT || !TARGET_PREFIXED || !TARGET_VSX)
28860 return 0;
28861
28862 /* Reject if the two 64-bit segments are not the same. */
28863 if (!vsx_const->all_double_words_same)
28864 return 0;
28865
28866 /* If the bytes, half words, or words are all the same, don't use XXSPLTIDP.
28867 Use a simpler instruction (XXSPLTIB, VSPLTISB, VSPLTISH, or VSPLTISW). */
28868 if (vsx_const->all_bytes_same
28869 || vsx_const->all_half_words_same
28870 || vsx_const->all_words_same)
28871 return 0;
28872
28873 unsigned HOST_WIDE_INT value = vsx_const->double_words[0];
28874
28875 /* Avoid values that look like DFmode NaN's, except for the normal NaN bit
28876 pattern and the signalling NaN bit pattern. Recognize infinity and
28877 negative infinity. */
28878
28879 /* Bit representation of DFmode normal quiet NaN. */
28880 #define RS6000_CONST_DF_NAN HOST_WIDE_INT_UC (0x7ff8000000000000)
28881
28882 /* Bit representation of DFmode normal signaling NaN. */
28883 #define RS6000_CONST_DF_NANS HOST_WIDE_INT_UC (0x7ff4000000000000)
28884
28885 /* Bit representation of DFmode positive infinity. */
28886 #define RS6000_CONST_DF_INF HOST_WIDE_INT_UC (0x7ff0000000000000)
28887
28888 /* Bit representation of DFmode negative infinity. */
28889 #define RS6000_CONST_DF_NEG_INF HOST_WIDE_INT_UC (0xfff0000000000000)
28890
28891 if (value != RS6000_CONST_DF_NAN
28892 && value != RS6000_CONST_DF_NANS
28893 && value != RS6000_CONST_DF_INF
28894 && value != RS6000_CONST_DF_NEG_INF)
28895 {
28896 /* The IEEE 754 64-bit floating format has 1 bit for sign, 11 bits for
28897 the exponent, and 52 bits for the mantissa (not counting the hidden
28898 bit used for normal numbers). NaN values have the exponent set to all
28899 1 bits, and the mantissa non-zero (mantissa == 0 is infinity). */
28900
28901 int df_exponent = (value >> 52) & 0x7ff;
28902 unsigned HOST_WIDE_INT
28903 df_mantissa = value & ((HOST_WIDE_INT_1U << 52) - HOST_WIDE_INT_1U);
28904
28905 if (df_exponent == 0x7ff && df_mantissa != 0) /* other NaNs. */
28906 return 0;
28907
28908 /* Avoid values that are DFmode subnormal values. Subnormal numbers have
28909 the exponent all 0 bits, and the mantissa non-zero. If the value is
28910 subnormal, then the hidden bit in the mantissa is not set. */
28911 if (df_exponent == 0 && df_mantissa != 0) /* subnormal. */
28912 return 0;
28913 }
28914
28915 /* Change the representation to DFmode constant. */
28916 long df_words[2] = { vsx_const->words[0], vsx_const->words[1] };
28917
28918 /* real_from_target takes the target words in target order. */
28919 if (!BYTES_BIG_ENDIAN)
28920 std::swap (df_words[0], df_words[1]);
28921
28922 REAL_VALUE_TYPE rv_type;
28923 real_from_target (&rv_type, df_words, DFmode);
28924
28925 const REAL_VALUE_TYPE *rv = &rv_type;
28926
28927 /* Validate that the number can be stored as a SFmode value. */
28928 if (!exact_real_truncate (SFmode, rv))
28929 return 0;
28930
28931 /* Validate that the number is not a SFmode subnormal value (exponent is 0,
28932 mantissa field is non-zero) which is undefined for the XXSPLTIDP
28933 instruction. */
28934 long sf_value;
28935 real_to_target (&sf_value, rv, SFmode);
28936
28937 /* IEEE 754 32-bit values have 1 bit for the sign, 8 bits for the exponent,
28938 and 23 bits for the mantissa. Subnormal numbers have the exponent all
28939 0 bits, and the mantissa non-zero. */
28940 long sf_exponent = (sf_value >> 23) & 0xFF;
28941 long sf_mantissa = sf_value & 0x7FFFFF;
28942
28943 if (sf_exponent == 0 && sf_mantissa != 0)
28944 return 0;
28945
28946 /* Return the immediate to be used. */
28947 return sf_value;
28948 }
28949
28950 \f
28951 struct gcc_target targetm = TARGET_INITIALIZER;
28952
28953 #include "gt-rs6000.h"
This page took 1.304014 seconds and 6 git commands to generate.