]> gcc.gnu.org Git - gcc.git/blob - gcc/config/rs6000/rs6000.c
rs6000.c (rs6000_secondary_reload_direct_move): Remove redundant code.
[gcc.git] / gcc / config / rs6000 / rs6000.c
1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2015 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "cfghooks.h"
29 #include "cfgloop.h"
30 #include "df.h"
31 #include "tm_p.h"
32 #include "stringpool.h"
33 #include "expmed.h"
34 #include "optabs.h"
35 #include "regs.h"
36 #include "ira.h"
37 #include "recog.h"
38 #include "cgraph.h"
39 #include "diagnostic-core.h"
40 #include "insn-attr.h"
41 #include "flags.h"
42 #include "alias.h"
43 #include "fold-const.h"
44 #include "stor-layout.h"
45 #include "calls.h"
46 #include "print-tree.h"
47 #include "varasm.h"
48 #include "explow.h"
49 #include "expr.h"
50 #include "output.h"
51 #include "dbxout.h"
52 #include "common/common-target.h"
53 #include "langhooks.h"
54 #include "reload.h"
55 #include "sched-int.h"
56 #include "gimplify.h"
57 #include "gimple-iterator.h"
58 #include "gimple-walk.h"
59 #include "intl.h"
60 #include "params.h"
61 #include "tm-constrs.h"
62 #include "tree-vectorizer.h"
63 #include "target-globals.h"
64 #include "builtins.h"
65 #include "context.h"
66 #include "tree-pass.h"
67 #if TARGET_XCOFF
68 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
69 #endif
70 #if TARGET_MACHO
71 #include "gstab.h" /* for N_SLINE */
72 #endif
73
74 /* This file should be included last. */
75 #include "target-def.h"
76
77 #ifndef TARGET_NO_PROTOTYPE
78 #define TARGET_NO_PROTOTYPE 0
79 #endif
80
81 #define min(A,B) ((A) < (B) ? (A) : (B))
82 #define max(A,B) ((A) > (B) ? (A) : (B))
83
84 /* Structure used to define the rs6000 stack */
85 typedef struct rs6000_stack {
86 int reload_completed; /* stack info won't change from here on */
87 int first_gp_reg_save; /* first callee saved GP register used */
88 int first_fp_reg_save; /* first callee saved FP register used */
89 int first_altivec_reg_save; /* first callee saved AltiVec register used */
90 int lr_save_p; /* true if the link reg needs to be saved */
91 int cr_save_p; /* true if the CR reg needs to be saved */
92 unsigned int vrsave_mask; /* mask of vec registers to save */
93 int push_p; /* true if we need to allocate stack space */
94 int calls_p; /* true if the function makes any calls */
95 int world_save_p; /* true if we're saving *everything*:
96 r13-r31, cr, f14-f31, vrsave, v20-v31 */
97 enum rs6000_abi abi; /* which ABI to use */
98 int gp_save_offset; /* offset to save GP regs from initial SP */
99 int fp_save_offset; /* offset to save FP regs from initial SP */
100 int altivec_save_offset; /* offset to save AltiVec regs from initial SP */
101 int lr_save_offset; /* offset to save LR from initial SP */
102 int cr_save_offset; /* offset to save CR from initial SP */
103 int vrsave_save_offset; /* offset to save VRSAVE from initial SP */
104 int spe_gp_save_offset; /* offset to save spe 64-bit gprs */
105 int varargs_save_offset; /* offset to save the varargs registers */
106 int ehrd_offset; /* offset to EH return data */
107 int ehcr_offset; /* offset to EH CR field data */
108 int reg_size; /* register size (4 or 8) */
109 HOST_WIDE_INT vars_size; /* variable save area size */
110 int parm_size; /* outgoing parameter size */
111 int save_size; /* save area size */
112 int fixed_size; /* fixed size of stack frame */
113 int gp_size; /* size of saved GP registers */
114 int fp_size; /* size of saved FP registers */
115 int altivec_size; /* size of saved AltiVec registers */
116 int cr_size; /* size to hold CR if not in fixed area */
117 int vrsave_size; /* size to hold VRSAVE */
118 int altivec_padding_size; /* size of altivec alignment padding */
119 int spe_gp_size; /* size of 64-bit GPR save size for SPE */
120 int spe_padding_size;
121 HOST_WIDE_INT total_size; /* total bytes allocated for stack */
122 int spe_64bit_regs_used;
123 int savres_strategy;
124 } rs6000_stack_t;
125
126 /* A C structure for machine-specific, per-function data.
127 This is added to the cfun structure. */
128 typedef struct GTY(()) machine_function
129 {
130 /* Whether the instruction chain has been scanned already. */
131 int insn_chain_scanned_p;
132 /* Flags if __builtin_return_address (n) with n >= 1 was used. */
133 int ra_needs_full_frame;
134 /* Flags if __builtin_return_address (0) was used. */
135 int ra_need_lr;
136 /* Cache lr_save_p after expansion of builtin_eh_return. */
137 int lr_save_state;
138 /* Whether we need to save the TOC to the reserved stack location in the
139 function prologue. */
140 bool save_toc_in_prologue;
141 /* Offset from virtual_stack_vars_rtx to the start of the ABI_V4
142 varargs save area. */
143 HOST_WIDE_INT varargs_save_offset;
144 /* Temporary stack slot to use for SDmode copies. This slot is
145 64-bits wide and is allocated early enough so that the offset
146 does not overflow the 16-bit load/store offset field. */
147 rtx sdmode_stack_slot;
148 /* Alternative internal arg pointer for -fsplit-stack. */
149 rtx split_stack_arg_pointer;
150 bool split_stack_argp_used;
151 /* Flag if r2 setup is needed with ELFv2 ABI. */
152 bool r2_setup_needed;
153 } machine_function;
154
155 /* Support targetm.vectorize.builtin_mask_for_load. */
156 static GTY(()) tree altivec_builtin_mask_for_load;
157
158 /* Set to nonzero once AIX common-mode calls have been defined. */
159 static GTY(()) int common_mode_defined;
160
161 /* Label number of label created for -mrelocatable, to call to so we can
162 get the address of the GOT section */
163 static int rs6000_pic_labelno;
164
165 #ifdef USING_ELFOS_H
166 /* Counter for labels which are to be placed in .fixup. */
167 int fixuplabelno = 0;
168 #endif
169
170 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
171 int dot_symbols;
172
173 /* Specify the machine mode that pointers have. After generation of rtl, the
174 compiler makes no further distinction between pointers and any other objects
175 of this machine mode. The type is unsigned since not all things that
176 include rs6000.h also include machmode.h. */
177 unsigned rs6000_pmode;
178
179 /* Width in bits of a pointer. */
180 unsigned rs6000_pointer_size;
181
182 #ifdef HAVE_AS_GNU_ATTRIBUTE
183 /* Flag whether floating point values have been passed/returned. */
184 static bool rs6000_passes_float;
185 /* Flag whether vector values have been passed/returned. */
186 static bool rs6000_passes_vector;
187 /* Flag whether small (<= 8 byte) structures have been returned. */
188 static bool rs6000_returns_struct;
189 #endif
190
191 /* Value is TRUE if register/mode pair is acceptable. */
192 bool rs6000_hard_regno_mode_ok_p[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
193
194 /* Maximum number of registers needed for a given register class and mode. */
195 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
196
197 /* How many registers are needed for a given register and mode. */
198 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
199
200 /* Map register number to register class. */
201 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
202
203 static int dbg_cost_ctrl;
204
205 /* Built in types. */
206 tree rs6000_builtin_types[RS6000_BTI_MAX];
207 tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
208
209 /* Flag to say the TOC is initialized */
210 int toc_initialized;
211 char toc_label_name[10];
212
213 /* Cached value of rs6000_variable_issue. This is cached in
214 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
215 static short cached_can_issue_more;
216
217 static GTY(()) section *read_only_data_section;
218 static GTY(()) section *private_data_section;
219 static GTY(()) section *tls_data_section;
220 static GTY(()) section *tls_private_data_section;
221 static GTY(()) section *read_only_private_data_section;
222 static GTY(()) section *sdata2_section;
223 static GTY(()) section *toc_section;
224
225 struct builtin_description
226 {
227 const HOST_WIDE_INT mask;
228 const enum insn_code icode;
229 const char *const name;
230 const enum rs6000_builtins code;
231 };
232
233 /* Describe the vector unit used for modes. */
234 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
235 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
236
237 /* Register classes for various constraints that are based on the target
238 switches. */
239 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
240
241 /* Describe the alignment of a vector. */
242 int rs6000_vector_align[NUM_MACHINE_MODES];
243
244 /* Map selected modes to types for builtins. */
245 static GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
246
247 /* What modes to automatically generate reciprocal divide estimate (fre) and
248 reciprocal sqrt (frsqrte) for. */
249 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
250
251 /* Masks to determine which reciprocal esitmate instructions to generate
252 automatically. */
253 enum rs6000_recip_mask {
254 RECIP_SF_DIV = 0x001, /* Use divide estimate */
255 RECIP_DF_DIV = 0x002,
256 RECIP_V4SF_DIV = 0x004,
257 RECIP_V2DF_DIV = 0x008,
258
259 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
260 RECIP_DF_RSQRT = 0x020,
261 RECIP_V4SF_RSQRT = 0x040,
262 RECIP_V2DF_RSQRT = 0x080,
263
264 /* Various combination of flags for -mrecip=xxx. */
265 RECIP_NONE = 0,
266 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
267 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
268 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
269
270 RECIP_HIGH_PRECISION = RECIP_ALL,
271
272 /* On low precision machines like the power5, don't enable double precision
273 reciprocal square root estimate, since it isn't accurate enough. */
274 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
275 };
276
277 /* -mrecip options. */
278 static struct
279 {
280 const char *string; /* option name */
281 unsigned int mask; /* mask bits to set */
282 } recip_options[] = {
283 { "all", RECIP_ALL },
284 { "none", RECIP_NONE },
285 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
286 | RECIP_V2DF_DIV) },
287 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
288 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
289 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
290 | RECIP_V2DF_RSQRT) },
291 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
292 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
293 };
294
295 /* Pointer to function (in rs6000-c.c) that can define or undefine target
296 macros that have changed. Languages that don't support the preprocessor
297 don't link in rs6000-c.c, so we can't call it directly. */
298 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
299
300 /* Simplfy register classes into simpler classifications. We assume
301 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
302 check for standard register classes (gpr/floating/altivec/vsx) and
303 floating/vector classes (float/altivec/vsx). */
304
305 enum rs6000_reg_type {
306 NO_REG_TYPE,
307 PSEUDO_REG_TYPE,
308 GPR_REG_TYPE,
309 VSX_REG_TYPE,
310 ALTIVEC_REG_TYPE,
311 FPR_REG_TYPE,
312 SPR_REG_TYPE,
313 CR_REG_TYPE,
314 SPE_ACC_TYPE,
315 SPEFSCR_REG_TYPE
316 };
317
318 /* Map register class to register type. */
319 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
320
321 /* First/last register type for the 'normal' register types (i.e. general
322 purpose, floating point, altivec, and VSX registers). */
323 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
324
325 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
326
327
328 /* Register classes we care about in secondary reload or go if legitimate
329 address. We only need to worry about GPR, FPR, and Altivec registers here,
330 along an ANY field that is the OR of the 3 register classes. */
331
332 enum rs6000_reload_reg_type {
333 RELOAD_REG_GPR, /* General purpose registers. */
334 RELOAD_REG_FPR, /* Traditional floating point regs. */
335 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
336 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
337 N_RELOAD_REG
338 };
339
340 /* For setting up register classes, loop through the 3 register classes mapping
341 into real registers, and skip the ANY class, which is just an OR of the
342 bits. */
343 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
344 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
345
346 /* Map reload register type to a register in the register class. */
347 struct reload_reg_map_type {
348 const char *name; /* Register class name. */
349 int reg; /* Register in the register class. */
350 };
351
352 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
353 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
354 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
355 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
356 { "Any", -1 }, /* RELOAD_REG_ANY. */
357 };
358
359 /* Mask bits for each register class, indexed per mode. Historically the
360 compiler has been more restrictive which types can do PRE_MODIFY instead of
361 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
362 typedef unsigned char addr_mask_type;
363
364 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
365 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
366 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
367 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
368 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
369 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
370 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
371
372 /* Register type masks based on the type, of valid addressing modes. */
373 struct rs6000_reg_addr {
374 enum insn_code reload_load; /* INSN to reload for loading. */
375 enum insn_code reload_store; /* INSN to reload for storing. */
376 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
377 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
378 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
379 enum insn_code fusion_gpr_ld; /* INSN for fusing gpr ADDIS/loads. */
380 /* INSNs for fusing addi with loads
381 or stores for each reg. class. */
382 enum insn_code fusion_addi_ld[(int)N_RELOAD_REG];
383 enum insn_code fusion_addi_st[(int)N_RELOAD_REG];
384 /* INSNs for fusing addis with loads
385 or stores for each reg. class. */
386 enum insn_code fusion_addis_ld[(int)N_RELOAD_REG];
387 enum insn_code fusion_addis_st[(int)N_RELOAD_REG];
388 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
389 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
390 bool fused_toc; /* Mode supports TOC fusion. */
391 };
392
393 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
394
395 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
396 static inline bool
397 mode_supports_pre_incdec_p (machine_mode mode)
398 {
399 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
400 != 0);
401 }
402
403 /* Helper function to say whether a mode supports PRE_MODIFY. */
404 static inline bool
405 mode_supports_pre_modify_p (machine_mode mode)
406 {
407 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
408 != 0);
409 }
410
411 \f
412 /* Target cpu costs. */
413
414 struct processor_costs {
415 const int mulsi; /* cost of SImode multiplication. */
416 const int mulsi_const; /* cost of SImode multiplication by constant. */
417 const int mulsi_const9; /* cost of SImode mult by short constant. */
418 const int muldi; /* cost of DImode multiplication. */
419 const int divsi; /* cost of SImode division. */
420 const int divdi; /* cost of DImode division. */
421 const int fp; /* cost of simple SFmode and DFmode insns. */
422 const int dmul; /* cost of DFmode multiplication (and fmadd). */
423 const int sdiv; /* cost of SFmode division (fdivs). */
424 const int ddiv; /* cost of DFmode division (fdiv). */
425 const int cache_line_size; /* cache line size in bytes. */
426 const int l1_cache_size; /* size of l1 cache, in kilobytes. */
427 const int l2_cache_size; /* size of l2 cache, in kilobytes. */
428 const int simultaneous_prefetches; /* number of parallel prefetch
429 operations. */
430 const int sfdf_convert; /* cost of SF->DF conversion. */
431 };
432
433 const struct processor_costs *rs6000_cost;
434
435 /* Processor costs (relative to an add) */
436
437 /* Instruction size costs on 32bit processors. */
438 static const
439 struct processor_costs size32_cost = {
440 COSTS_N_INSNS (1), /* mulsi */
441 COSTS_N_INSNS (1), /* mulsi_const */
442 COSTS_N_INSNS (1), /* mulsi_const9 */
443 COSTS_N_INSNS (1), /* muldi */
444 COSTS_N_INSNS (1), /* divsi */
445 COSTS_N_INSNS (1), /* divdi */
446 COSTS_N_INSNS (1), /* fp */
447 COSTS_N_INSNS (1), /* dmul */
448 COSTS_N_INSNS (1), /* sdiv */
449 COSTS_N_INSNS (1), /* ddiv */
450 32, /* cache line size */
451 0, /* l1 cache */
452 0, /* l2 cache */
453 0, /* streams */
454 0, /* SF->DF convert */
455 };
456
457 /* Instruction size costs on 64bit processors. */
458 static const
459 struct processor_costs size64_cost = {
460 COSTS_N_INSNS (1), /* mulsi */
461 COSTS_N_INSNS (1), /* mulsi_const */
462 COSTS_N_INSNS (1), /* mulsi_const9 */
463 COSTS_N_INSNS (1), /* muldi */
464 COSTS_N_INSNS (1), /* divsi */
465 COSTS_N_INSNS (1), /* divdi */
466 COSTS_N_INSNS (1), /* fp */
467 COSTS_N_INSNS (1), /* dmul */
468 COSTS_N_INSNS (1), /* sdiv */
469 COSTS_N_INSNS (1), /* ddiv */
470 128, /* cache line size */
471 0, /* l1 cache */
472 0, /* l2 cache */
473 0, /* streams */
474 0, /* SF->DF convert */
475 };
476
477 /* Instruction costs on RS64A processors. */
478 static const
479 struct processor_costs rs64a_cost = {
480 COSTS_N_INSNS (20), /* mulsi */
481 COSTS_N_INSNS (12), /* mulsi_const */
482 COSTS_N_INSNS (8), /* mulsi_const9 */
483 COSTS_N_INSNS (34), /* muldi */
484 COSTS_N_INSNS (65), /* divsi */
485 COSTS_N_INSNS (67), /* divdi */
486 COSTS_N_INSNS (4), /* fp */
487 COSTS_N_INSNS (4), /* dmul */
488 COSTS_N_INSNS (31), /* sdiv */
489 COSTS_N_INSNS (31), /* ddiv */
490 128, /* cache line size */
491 128, /* l1 cache */
492 2048, /* l2 cache */
493 1, /* streams */
494 0, /* SF->DF convert */
495 };
496
497 /* Instruction costs on MPCCORE processors. */
498 static const
499 struct processor_costs mpccore_cost = {
500 COSTS_N_INSNS (2), /* mulsi */
501 COSTS_N_INSNS (2), /* mulsi_const */
502 COSTS_N_INSNS (2), /* mulsi_const9 */
503 COSTS_N_INSNS (2), /* muldi */
504 COSTS_N_INSNS (6), /* divsi */
505 COSTS_N_INSNS (6), /* divdi */
506 COSTS_N_INSNS (4), /* fp */
507 COSTS_N_INSNS (5), /* dmul */
508 COSTS_N_INSNS (10), /* sdiv */
509 COSTS_N_INSNS (17), /* ddiv */
510 32, /* cache line size */
511 4, /* l1 cache */
512 16, /* l2 cache */
513 1, /* streams */
514 0, /* SF->DF convert */
515 };
516
517 /* Instruction costs on PPC403 processors. */
518 static const
519 struct processor_costs ppc403_cost = {
520 COSTS_N_INSNS (4), /* mulsi */
521 COSTS_N_INSNS (4), /* mulsi_const */
522 COSTS_N_INSNS (4), /* mulsi_const9 */
523 COSTS_N_INSNS (4), /* muldi */
524 COSTS_N_INSNS (33), /* divsi */
525 COSTS_N_INSNS (33), /* divdi */
526 COSTS_N_INSNS (11), /* fp */
527 COSTS_N_INSNS (11), /* dmul */
528 COSTS_N_INSNS (11), /* sdiv */
529 COSTS_N_INSNS (11), /* ddiv */
530 32, /* cache line size */
531 4, /* l1 cache */
532 16, /* l2 cache */
533 1, /* streams */
534 0, /* SF->DF convert */
535 };
536
537 /* Instruction costs on PPC405 processors. */
538 static const
539 struct processor_costs ppc405_cost = {
540 COSTS_N_INSNS (5), /* mulsi */
541 COSTS_N_INSNS (4), /* mulsi_const */
542 COSTS_N_INSNS (3), /* mulsi_const9 */
543 COSTS_N_INSNS (5), /* muldi */
544 COSTS_N_INSNS (35), /* divsi */
545 COSTS_N_INSNS (35), /* divdi */
546 COSTS_N_INSNS (11), /* fp */
547 COSTS_N_INSNS (11), /* dmul */
548 COSTS_N_INSNS (11), /* sdiv */
549 COSTS_N_INSNS (11), /* ddiv */
550 32, /* cache line size */
551 16, /* l1 cache */
552 128, /* l2 cache */
553 1, /* streams */
554 0, /* SF->DF convert */
555 };
556
557 /* Instruction costs on PPC440 processors. */
558 static const
559 struct processor_costs ppc440_cost = {
560 COSTS_N_INSNS (3), /* mulsi */
561 COSTS_N_INSNS (2), /* mulsi_const */
562 COSTS_N_INSNS (2), /* mulsi_const9 */
563 COSTS_N_INSNS (3), /* muldi */
564 COSTS_N_INSNS (34), /* divsi */
565 COSTS_N_INSNS (34), /* divdi */
566 COSTS_N_INSNS (5), /* fp */
567 COSTS_N_INSNS (5), /* dmul */
568 COSTS_N_INSNS (19), /* sdiv */
569 COSTS_N_INSNS (33), /* ddiv */
570 32, /* cache line size */
571 32, /* l1 cache */
572 256, /* l2 cache */
573 1, /* streams */
574 0, /* SF->DF convert */
575 };
576
577 /* Instruction costs on PPC476 processors. */
578 static const
579 struct processor_costs ppc476_cost = {
580 COSTS_N_INSNS (4), /* mulsi */
581 COSTS_N_INSNS (4), /* mulsi_const */
582 COSTS_N_INSNS (4), /* mulsi_const9 */
583 COSTS_N_INSNS (4), /* muldi */
584 COSTS_N_INSNS (11), /* divsi */
585 COSTS_N_INSNS (11), /* divdi */
586 COSTS_N_INSNS (6), /* fp */
587 COSTS_N_INSNS (6), /* dmul */
588 COSTS_N_INSNS (19), /* sdiv */
589 COSTS_N_INSNS (33), /* ddiv */
590 32, /* l1 cache line size */
591 32, /* l1 cache */
592 512, /* l2 cache */
593 1, /* streams */
594 0, /* SF->DF convert */
595 };
596
597 /* Instruction costs on PPC601 processors. */
598 static const
599 struct processor_costs ppc601_cost = {
600 COSTS_N_INSNS (5), /* mulsi */
601 COSTS_N_INSNS (5), /* mulsi_const */
602 COSTS_N_INSNS (5), /* mulsi_const9 */
603 COSTS_N_INSNS (5), /* muldi */
604 COSTS_N_INSNS (36), /* divsi */
605 COSTS_N_INSNS (36), /* divdi */
606 COSTS_N_INSNS (4), /* fp */
607 COSTS_N_INSNS (5), /* dmul */
608 COSTS_N_INSNS (17), /* sdiv */
609 COSTS_N_INSNS (31), /* ddiv */
610 32, /* cache line size */
611 32, /* l1 cache */
612 256, /* l2 cache */
613 1, /* streams */
614 0, /* SF->DF convert */
615 };
616
617 /* Instruction costs on PPC603 processors. */
618 static const
619 struct processor_costs ppc603_cost = {
620 COSTS_N_INSNS (5), /* mulsi */
621 COSTS_N_INSNS (3), /* mulsi_const */
622 COSTS_N_INSNS (2), /* mulsi_const9 */
623 COSTS_N_INSNS (5), /* muldi */
624 COSTS_N_INSNS (37), /* divsi */
625 COSTS_N_INSNS (37), /* divdi */
626 COSTS_N_INSNS (3), /* fp */
627 COSTS_N_INSNS (4), /* dmul */
628 COSTS_N_INSNS (18), /* sdiv */
629 COSTS_N_INSNS (33), /* ddiv */
630 32, /* cache line size */
631 8, /* l1 cache */
632 64, /* l2 cache */
633 1, /* streams */
634 0, /* SF->DF convert */
635 };
636
637 /* Instruction costs on PPC604 processors. */
638 static const
639 struct processor_costs ppc604_cost = {
640 COSTS_N_INSNS (4), /* mulsi */
641 COSTS_N_INSNS (4), /* mulsi_const */
642 COSTS_N_INSNS (4), /* mulsi_const9 */
643 COSTS_N_INSNS (4), /* muldi */
644 COSTS_N_INSNS (20), /* divsi */
645 COSTS_N_INSNS (20), /* divdi */
646 COSTS_N_INSNS (3), /* fp */
647 COSTS_N_INSNS (3), /* dmul */
648 COSTS_N_INSNS (18), /* sdiv */
649 COSTS_N_INSNS (32), /* ddiv */
650 32, /* cache line size */
651 16, /* l1 cache */
652 512, /* l2 cache */
653 1, /* streams */
654 0, /* SF->DF convert */
655 };
656
657 /* Instruction costs on PPC604e processors. */
658 static const
659 struct processor_costs ppc604e_cost = {
660 COSTS_N_INSNS (2), /* mulsi */
661 COSTS_N_INSNS (2), /* mulsi_const */
662 COSTS_N_INSNS (2), /* mulsi_const9 */
663 COSTS_N_INSNS (2), /* muldi */
664 COSTS_N_INSNS (20), /* divsi */
665 COSTS_N_INSNS (20), /* divdi */
666 COSTS_N_INSNS (3), /* fp */
667 COSTS_N_INSNS (3), /* dmul */
668 COSTS_N_INSNS (18), /* sdiv */
669 COSTS_N_INSNS (32), /* ddiv */
670 32, /* cache line size */
671 32, /* l1 cache */
672 1024, /* l2 cache */
673 1, /* streams */
674 0, /* SF->DF convert */
675 };
676
677 /* Instruction costs on PPC620 processors. */
678 static const
679 struct processor_costs ppc620_cost = {
680 COSTS_N_INSNS (5), /* mulsi */
681 COSTS_N_INSNS (4), /* mulsi_const */
682 COSTS_N_INSNS (3), /* mulsi_const9 */
683 COSTS_N_INSNS (7), /* muldi */
684 COSTS_N_INSNS (21), /* divsi */
685 COSTS_N_INSNS (37), /* divdi */
686 COSTS_N_INSNS (3), /* fp */
687 COSTS_N_INSNS (3), /* dmul */
688 COSTS_N_INSNS (18), /* sdiv */
689 COSTS_N_INSNS (32), /* ddiv */
690 128, /* cache line size */
691 32, /* l1 cache */
692 1024, /* l2 cache */
693 1, /* streams */
694 0, /* SF->DF convert */
695 };
696
697 /* Instruction costs on PPC630 processors. */
698 static const
699 struct processor_costs ppc630_cost = {
700 COSTS_N_INSNS (5), /* mulsi */
701 COSTS_N_INSNS (4), /* mulsi_const */
702 COSTS_N_INSNS (3), /* mulsi_const9 */
703 COSTS_N_INSNS (7), /* muldi */
704 COSTS_N_INSNS (21), /* divsi */
705 COSTS_N_INSNS (37), /* divdi */
706 COSTS_N_INSNS (3), /* fp */
707 COSTS_N_INSNS (3), /* dmul */
708 COSTS_N_INSNS (17), /* sdiv */
709 COSTS_N_INSNS (21), /* ddiv */
710 128, /* cache line size */
711 64, /* l1 cache */
712 1024, /* l2 cache */
713 1, /* streams */
714 0, /* SF->DF convert */
715 };
716
717 /* Instruction costs on Cell processor. */
718 /* COSTS_N_INSNS (1) ~ one add. */
719 static const
720 struct processor_costs ppccell_cost = {
721 COSTS_N_INSNS (9/2)+2, /* mulsi */
722 COSTS_N_INSNS (6/2), /* mulsi_const */
723 COSTS_N_INSNS (6/2), /* mulsi_const9 */
724 COSTS_N_INSNS (15/2)+2, /* muldi */
725 COSTS_N_INSNS (38/2), /* divsi */
726 COSTS_N_INSNS (70/2), /* divdi */
727 COSTS_N_INSNS (10/2), /* fp */
728 COSTS_N_INSNS (10/2), /* dmul */
729 COSTS_N_INSNS (74/2), /* sdiv */
730 COSTS_N_INSNS (74/2), /* ddiv */
731 128, /* cache line size */
732 32, /* l1 cache */
733 512, /* l2 cache */
734 6, /* streams */
735 0, /* SF->DF convert */
736 };
737
738 /* Instruction costs on PPC750 and PPC7400 processors. */
739 static const
740 struct processor_costs ppc750_cost = {
741 COSTS_N_INSNS (5), /* mulsi */
742 COSTS_N_INSNS (3), /* mulsi_const */
743 COSTS_N_INSNS (2), /* mulsi_const9 */
744 COSTS_N_INSNS (5), /* muldi */
745 COSTS_N_INSNS (17), /* divsi */
746 COSTS_N_INSNS (17), /* divdi */
747 COSTS_N_INSNS (3), /* fp */
748 COSTS_N_INSNS (3), /* dmul */
749 COSTS_N_INSNS (17), /* sdiv */
750 COSTS_N_INSNS (31), /* ddiv */
751 32, /* cache line size */
752 32, /* l1 cache */
753 512, /* l2 cache */
754 1, /* streams */
755 0, /* SF->DF convert */
756 };
757
758 /* Instruction costs on PPC7450 processors. */
759 static const
760 struct processor_costs ppc7450_cost = {
761 COSTS_N_INSNS (4), /* mulsi */
762 COSTS_N_INSNS (3), /* mulsi_const */
763 COSTS_N_INSNS (3), /* mulsi_const9 */
764 COSTS_N_INSNS (4), /* muldi */
765 COSTS_N_INSNS (23), /* divsi */
766 COSTS_N_INSNS (23), /* divdi */
767 COSTS_N_INSNS (5), /* fp */
768 COSTS_N_INSNS (5), /* dmul */
769 COSTS_N_INSNS (21), /* sdiv */
770 COSTS_N_INSNS (35), /* ddiv */
771 32, /* cache line size */
772 32, /* l1 cache */
773 1024, /* l2 cache */
774 1, /* streams */
775 0, /* SF->DF convert */
776 };
777
778 /* Instruction costs on PPC8540 processors. */
779 static const
780 struct processor_costs ppc8540_cost = {
781 COSTS_N_INSNS (4), /* mulsi */
782 COSTS_N_INSNS (4), /* mulsi_const */
783 COSTS_N_INSNS (4), /* mulsi_const9 */
784 COSTS_N_INSNS (4), /* muldi */
785 COSTS_N_INSNS (19), /* divsi */
786 COSTS_N_INSNS (19), /* divdi */
787 COSTS_N_INSNS (4), /* fp */
788 COSTS_N_INSNS (4), /* dmul */
789 COSTS_N_INSNS (29), /* sdiv */
790 COSTS_N_INSNS (29), /* ddiv */
791 32, /* cache line size */
792 32, /* l1 cache */
793 256, /* l2 cache */
794 1, /* prefetch streams /*/
795 0, /* SF->DF convert */
796 };
797
798 /* Instruction costs on E300C2 and E300C3 cores. */
799 static const
800 struct processor_costs ppce300c2c3_cost = {
801 COSTS_N_INSNS (4), /* mulsi */
802 COSTS_N_INSNS (4), /* mulsi_const */
803 COSTS_N_INSNS (4), /* mulsi_const9 */
804 COSTS_N_INSNS (4), /* muldi */
805 COSTS_N_INSNS (19), /* divsi */
806 COSTS_N_INSNS (19), /* divdi */
807 COSTS_N_INSNS (3), /* fp */
808 COSTS_N_INSNS (4), /* dmul */
809 COSTS_N_INSNS (18), /* sdiv */
810 COSTS_N_INSNS (33), /* ddiv */
811 32,
812 16, /* l1 cache */
813 16, /* l2 cache */
814 1, /* prefetch streams /*/
815 0, /* SF->DF convert */
816 };
817
818 /* Instruction costs on PPCE500MC processors. */
819 static const
820 struct processor_costs ppce500mc_cost = {
821 COSTS_N_INSNS (4), /* mulsi */
822 COSTS_N_INSNS (4), /* mulsi_const */
823 COSTS_N_INSNS (4), /* mulsi_const9 */
824 COSTS_N_INSNS (4), /* muldi */
825 COSTS_N_INSNS (14), /* divsi */
826 COSTS_N_INSNS (14), /* divdi */
827 COSTS_N_INSNS (8), /* fp */
828 COSTS_N_INSNS (10), /* dmul */
829 COSTS_N_INSNS (36), /* sdiv */
830 COSTS_N_INSNS (66), /* ddiv */
831 64, /* cache line size */
832 32, /* l1 cache */
833 128, /* l2 cache */
834 1, /* prefetch streams /*/
835 0, /* SF->DF convert */
836 };
837
838 /* Instruction costs on PPCE500MC64 processors. */
839 static const
840 struct processor_costs ppce500mc64_cost = {
841 COSTS_N_INSNS (4), /* mulsi */
842 COSTS_N_INSNS (4), /* mulsi_const */
843 COSTS_N_INSNS (4), /* mulsi_const9 */
844 COSTS_N_INSNS (4), /* muldi */
845 COSTS_N_INSNS (14), /* divsi */
846 COSTS_N_INSNS (14), /* divdi */
847 COSTS_N_INSNS (4), /* fp */
848 COSTS_N_INSNS (10), /* dmul */
849 COSTS_N_INSNS (36), /* sdiv */
850 COSTS_N_INSNS (66), /* ddiv */
851 64, /* cache line size */
852 32, /* l1 cache */
853 128, /* l2 cache */
854 1, /* prefetch streams /*/
855 0, /* SF->DF convert */
856 };
857
858 /* Instruction costs on PPCE5500 processors. */
859 static const
860 struct processor_costs ppce5500_cost = {
861 COSTS_N_INSNS (5), /* mulsi */
862 COSTS_N_INSNS (5), /* mulsi_const */
863 COSTS_N_INSNS (4), /* mulsi_const9 */
864 COSTS_N_INSNS (5), /* muldi */
865 COSTS_N_INSNS (14), /* divsi */
866 COSTS_N_INSNS (14), /* divdi */
867 COSTS_N_INSNS (7), /* fp */
868 COSTS_N_INSNS (10), /* dmul */
869 COSTS_N_INSNS (36), /* sdiv */
870 COSTS_N_INSNS (66), /* ddiv */
871 64, /* cache line size */
872 32, /* l1 cache */
873 128, /* l2 cache */
874 1, /* prefetch streams /*/
875 0, /* SF->DF convert */
876 };
877
878 /* Instruction costs on PPCE6500 processors. */
879 static const
880 struct processor_costs ppce6500_cost = {
881 COSTS_N_INSNS (5), /* mulsi */
882 COSTS_N_INSNS (5), /* mulsi_const */
883 COSTS_N_INSNS (4), /* mulsi_const9 */
884 COSTS_N_INSNS (5), /* muldi */
885 COSTS_N_INSNS (14), /* divsi */
886 COSTS_N_INSNS (14), /* divdi */
887 COSTS_N_INSNS (7), /* fp */
888 COSTS_N_INSNS (10), /* dmul */
889 COSTS_N_INSNS (36), /* sdiv */
890 COSTS_N_INSNS (66), /* ddiv */
891 64, /* cache line size */
892 32, /* l1 cache */
893 128, /* l2 cache */
894 1, /* prefetch streams /*/
895 0, /* SF->DF convert */
896 };
897
898 /* Instruction costs on AppliedMicro Titan processors. */
899 static const
900 struct processor_costs titan_cost = {
901 COSTS_N_INSNS (5), /* mulsi */
902 COSTS_N_INSNS (5), /* mulsi_const */
903 COSTS_N_INSNS (5), /* mulsi_const9 */
904 COSTS_N_INSNS (5), /* muldi */
905 COSTS_N_INSNS (18), /* divsi */
906 COSTS_N_INSNS (18), /* divdi */
907 COSTS_N_INSNS (10), /* fp */
908 COSTS_N_INSNS (10), /* dmul */
909 COSTS_N_INSNS (46), /* sdiv */
910 COSTS_N_INSNS (72), /* ddiv */
911 32, /* cache line size */
912 32, /* l1 cache */
913 512, /* l2 cache */
914 1, /* prefetch streams /*/
915 0, /* SF->DF convert */
916 };
917
918 /* Instruction costs on POWER4 and POWER5 processors. */
919 static const
920 struct processor_costs power4_cost = {
921 COSTS_N_INSNS (3), /* mulsi */
922 COSTS_N_INSNS (2), /* mulsi_const */
923 COSTS_N_INSNS (2), /* mulsi_const9 */
924 COSTS_N_INSNS (4), /* muldi */
925 COSTS_N_INSNS (18), /* divsi */
926 COSTS_N_INSNS (34), /* divdi */
927 COSTS_N_INSNS (3), /* fp */
928 COSTS_N_INSNS (3), /* dmul */
929 COSTS_N_INSNS (17), /* sdiv */
930 COSTS_N_INSNS (17), /* ddiv */
931 128, /* cache line size */
932 32, /* l1 cache */
933 1024, /* l2 cache */
934 8, /* prefetch streams /*/
935 0, /* SF->DF convert */
936 };
937
938 /* Instruction costs on POWER6 processors. */
939 static const
940 struct processor_costs power6_cost = {
941 COSTS_N_INSNS (8), /* mulsi */
942 COSTS_N_INSNS (8), /* mulsi_const */
943 COSTS_N_INSNS (8), /* mulsi_const9 */
944 COSTS_N_INSNS (8), /* muldi */
945 COSTS_N_INSNS (22), /* divsi */
946 COSTS_N_INSNS (28), /* divdi */
947 COSTS_N_INSNS (3), /* fp */
948 COSTS_N_INSNS (3), /* dmul */
949 COSTS_N_INSNS (13), /* sdiv */
950 COSTS_N_INSNS (16), /* ddiv */
951 128, /* cache line size */
952 64, /* l1 cache */
953 2048, /* l2 cache */
954 16, /* prefetch streams */
955 0, /* SF->DF convert */
956 };
957
958 /* Instruction costs on POWER7 processors. */
959 static const
960 struct processor_costs power7_cost = {
961 COSTS_N_INSNS (2), /* mulsi */
962 COSTS_N_INSNS (2), /* mulsi_const */
963 COSTS_N_INSNS (2), /* mulsi_const9 */
964 COSTS_N_INSNS (2), /* muldi */
965 COSTS_N_INSNS (18), /* divsi */
966 COSTS_N_INSNS (34), /* divdi */
967 COSTS_N_INSNS (3), /* fp */
968 COSTS_N_INSNS (3), /* dmul */
969 COSTS_N_INSNS (13), /* sdiv */
970 COSTS_N_INSNS (16), /* ddiv */
971 128, /* cache line size */
972 32, /* l1 cache */
973 256, /* l2 cache */
974 12, /* prefetch streams */
975 COSTS_N_INSNS (3), /* SF->DF convert */
976 };
977
978 /* Instruction costs on POWER8 processors. */
979 static const
980 struct processor_costs power8_cost = {
981 COSTS_N_INSNS (3), /* mulsi */
982 COSTS_N_INSNS (3), /* mulsi_const */
983 COSTS_N_INSNS (3), /* mulsi_const9 */
984 COSTS_N_INSNS (3), /* muldi */
985 COSTS_N_INSNS (19), /* divsi */
986 COSTS_N_INSNS (35), /* divdi */
987 COSTS_N_INSNS (3), /* fp */
988 COSTS_N_INSNS (3), /* dmul */
989 COSTS_N_INSNS (14), /* sdiv */
990 COSTS_N_INSNS (17), /* ddiv */
991 128, /* cache line size */
992 32, /* l1 cache */
993 256, /* l2 cache */
994 12, /* prefetch streams */
995 COSTS_N_INSNS (3), /* SF->DF convert */
996 };
997
998 /* Instruction costs on POWER9 processors. */
999 static const
1000 struct processor_costs power9_cost = {
1001 COSTS_N_INSNS (3), /* mulsi */
1002 COSTS_N_INSNS (3), /* mulsi_const */
1003 COSTS_N_INSNS (3), /* mulsi_const9 */
1004 COSTS_N_INSNS (3), /* muldi */
1005 COSTS_N_INSNS (19), /* divsi */
1006 COSTS_N_INSNS (35), /* divdi */
1007 COSTS_N_INSNS (3), /* fp */
1008 COSTS_N_INSNS (3), /* dmul */
1009 COSTS_N_INSNS (14), /* sdiv */
1010 COSTS_N_INSNS (17), /* ddiv */
1011 128, /* cache line size */
1012 32, /* l1 cache */
1013 256, /* l2 cache */
1014 12, /* prefetch streams */
1015 COSTS_N_INSNS (3), /* SF->DF convert */
1016 };
1017
1018 /* Instruction costs on POWER A2 processors. */
1019 static const
1020 struct processor_costs ppca2_cost = {
1021 COSTS_N_INSNS (16), /* mulsi */
1022 COSTS_N_INSNS (16), /* mulsi_const */
1023 COSTS_N_INSNS (16), /* mulsi_const9 */
1024 COSTS_N_INSNS (16), /* muldi */
1025 COSTS_N_INSNS (22), /* divsi */
1026 COSTS_N_INSNS (28), /* divdi */
1027 COSTS_N_INSNS (3), /* fp */
1028 COSTS_N_INSNS (3), /* dmul */
1029 COSTS_N_INSNS (59), /* sdiv */
1030 COSTS_N_INSNS (72), /* ddiv */
1031 64,
1032 16, /* l1 cache */
1033 2048, /* l2 cache */
1034 16, /* prefetch streams */
1035 0, /* SF->DF convert */
1036 };
1037
1038 \f
1039 /* Table that classifies rs6000 builtin functions (pure, const, etc.). */
1040 #undef RS6000_BUILTIN_1
1041 #undef RS6000_BUILTIN_2
1042 #undef RS6000_BUILTIN_3
1043 #undef RS6000_BUILTIN_A
1044 #undef RS6000_BUILTIN_D
1045 #undef RS6000_BUILTIN_E
1046 #undef RS6000_BUILTIN_H
1047 #undef RS6000_BUILTIN_P
1048 #undef RS6000_BUILTIN_Q
1049 #undef RS6000_BUILTIN_S
1050 #undef RS6000_BUILTIN_X
1051
1052 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
1053 { NAME, ICODE, MASK, ATTR },
1054
1055 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
1056 { NAME, ICODE, MASK, ATTR },
1057
1058 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
1059 { NAME, ICODE, MASK, ATTR },
1060
1061 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
1062 { NAME, ICODE, MASK, ATTR },
1063
1064 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
1065 { NAME, ICODE, MASK, ATTR },
1066
1067 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
1068 { NAME, ICODE, MASK, ATTR },
1069
1070 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
1071 { NAME, ICODE, MASK, ATTR },
1072
1073 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
1074 { NAME, ICODE, MASK, ATTR },
1075
1076 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
1077 { NAME, ICODE, MASK, ATTR },
1078
1079 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
1080 { NAME, ICODE, MASK, ATTR },
1081
1082 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) \
1083 { NAME, ICODE, MASK, ATTR },
1084
1085 struct rs6000_builtin_info_type {
1086 const char *name;
1087 const enum insn_code icode;
1088 const HOST_WIDE_INT mask;
1089 const unsigned attr;
1090 };
1091
1092 static const struct rs6000_builtin_info_type rs6000_builtin_info[] =
1093 {
1094 #include "rs6000-builtin.def"
1095 };
1096
1097 #undef RS6000_BUILTIN_1
1098 #undef RS6000_BUILTIN_2
1099 #undef RS6000_BUILTIN_3
1100 #undef RS6000_BUILTIN_A
1101 #undef RS6000_BUILTIN_D
1102 #undef RS6000_BUILTIN_E
1103 #undef RS6000_BUILTIN_H
1104 #undef RS6000_BUILTIN_P
1105 #undef RS6000_BUILTIN_Q
1106 #undef RS6000_BUILTIN_S
1107 #undef RS6000_BUILTIN_X
1108
1109 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1110 static tree (*rs6000_veclib_handler) (tree, tree, tree);
1111
1112 \f
1113 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1114 static bool spe_func_has_64bit_regs_p (void);
1115 static struct machine_function * rs6000_init_machine_status (void);
1116 static int rs6000_ra_ever_killed (void);
1117 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1118 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1119 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1120 static tree rs6000_builtin_vectorized_libmass (tree, tree, tree);
1121 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1122 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1123 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
1124 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1125 bool);
1126 static int rs6000_debug_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
1127 static bool is_microcoded_insn (rtx_insn *);
1128 static bool is_nonpipeline_insn (rtx_insn *);
1129 static bool is_cracked_insn (rtx_insn *);
1130 static bool is_load_insn (rtx, rtx *);
1131 static bool is_store_insn (rtx, rtx *);
1132 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1133 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1134 static bool insn_must_be_first_in_group (rtx_insn *);
1135 static bool insn_must_be_last_in_group (rtx_insn *);
1136 static void altivec_init_builtins (void);
1137 static tree builtin_function_type (machine_mode, machine_mode,
1138 machine_mode, machine_mode,
1139 enum rs6000_builtins, const char *name);
1140 static void rs6000_common_init_builtins (void);
1141 static void paired_init_builtins (void);
1142 static rtx paired_expand_predicate_builtin (enum insn_code, tree, rtx);
1143 static void spe_init_builtins (void);
1144 static void htm_init_builtins (void);
1145 static rtx spe_expand_predicate_builtin (enum insn_code, tree, rtx);
1146 static rtx spe_expand_evsel_builtin (enum insn_code, tree, rtx);
1147 static int rs6000_emit_int_cmove (rtx, rtx, rtx, rtx);
1148 static rs6000_stack_t *rs6000_stack_info (void);
1149 static void is_altivec_return_reg (rtx, void *);
1150 int easy_vector_constant (rtx, machine_mode);
1151 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1152 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1153 static rtx rs6000_darwin64_record_arg (CUMULATIVE_ARGS *, const_tree,
1154 bool, bool);
1155 #if TARGET_MACHO
1156 static void macho_branch_islands (void);
1157 #endif
1158 static rtx rs6000_legitimize_reload_address (rtx, machine_mode, int, int,
1159 int, int *);
1160 static rtx rs6000_debug_legitimize_reload_address (rtx, machine_mode, int,
1161 int, int, int *);
1162 static bool rs6000_mode_dependent_address (const_rtx);
1163 static bool rs6000_debug_mode_dependent_address (const_rtx);
1164 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1165 machine_mode, rtx);
1166 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1167 machine_mode,
1168 rtx);
1169 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1170 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1171 enum reg_class);
1172 static bool rs6000_secondary_memory_needed (enum reg_class, enum reg_class,
1173 machine_mode);
1174 static bool rs6000_debug_secondary_memory_needed (enum reg_class,
1175 enum reg_class,
1176 machine_mode);
1177 static bool rs6000_cannot_change_mode_class (machine_mode,
1178 machine_mode,
1179 enum reg_class);
1180 static bool rs6000_debug_cannot_change_mode_class (machine_mode,
1181 machine_mode,
1182 enum reg_class);
1183 static bool rs6000_save_toc_in_prologue_p (void);
1184 static rtx rs6000_internal_arg_pointer (void);
1185
1186 rtx (*rs6000_legitimize_reload_address_ptr) (rtx, machine_mode, int, int,
1187 int, int *)
1188 = rs6000_legitimize_reload_address;
1189
1190 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1191 = rs6000_mode_dependent_address;
1192
1193 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1194 machine_mode, rtx)
1195 = rs6000_secondary_reload_class;
1196
1197 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1198 = rs6000_preferred_reload_class;
1199
1200 bool (*rs6000_secondary_memory_needed_ptr) (enum reg_class, enum reg_class,
1201 machine_mode)
1202 = rs6000_secondary_memory_needed;
1203
1204 bool (*rs6000_cannot_change_mode_class_ptr) (machine_mode,
1205 machine_mode,
1206 enum reg_class)
1207 = rs6000_cannot_change_mode_class;
1208
1209 const int INSN_NOT_AVAILABLE = -1;
1210
1211 static void rs6000_print_isa_options (FILE *, int, const char *,
1212 HOST_WIDE_INT);
1213 static void rs6000_print_builtin_options (FILE *, int, const char *,
1214 HOST_WIDE_INT);
1215
1216 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1217 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1218 enum rs6000_reg_type,
1219 machine_mode,
1220 secondary_reload_info *,
1221 bool);
1222 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1223
1224 /* Hash table stuff for keeping track of TOC entries. */
1225
1226 struct GTY((for_user)) toc_hash_struct
1227 {
1228 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1229 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1230 rtx key;
1231 machine_mode key_mode;
1232 int labelno;
1233 };
1234
1235 struct toc_hasher : ggc_ptr_hash<toc_hash_struct>
1236 {
1237 static hashval_t hash (toc_hash_struct *);
1238 static bool equal (toc_hash_struct *, toc_hash_struct *);
1239 };
1240
1241 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1242
1243 /* Hash table to keep track of the argument types for builtin functions. */
1244
1245 struct GTY((for_user)) builtin_hash_struct
1246 {
1247 tree type;
1248 machine_mode mode[4]; /* return value + 3 arguments. */
1249 unsigned char uns_p[4]; /* and whether the types are unsigned. */
1250 };
1251
1252 struct builtin_hasher : ggc_ptr_hash<builtin_hash_struct>
1253 {
1254 static hashval_t hash (builtin_hash_struct *);
1255 static bool equal (builtin_hash_struct *, builtin_hash_struct *);
1256 };
1257
1258 static GTY (()) hash_table<builtin_hasher> *builtin_hash_table;
1259
1260 \f
1261 /* Default register names. */
1262 char rs6000_reg_names[][8] =
1263 {
1264 "0", "1", "2", "3", "4", "5", "6", "7",
1265 "8", "9", "10", "11", "12", "13", "14", "15",
1266 "16", "17", "18", "19", "20", "21", "22", "23",
1267 "24", "25", "26", "27", "28", "29", "30", "31",
1268 "0", "1", "2", "3", "4", "5", "6", "7",
1269 "8", "9", "10", "11", "12", "13", "14", "15",
1270 "16", "17", "18", "19", "20", "21", "22", "23",
1271 "24", "25", "26", "27", "28", "29", "30", "31",
1272 "mq", "lr", "ctr","ap",
1273 "0", "1", "2", "3", "4", "5", "6", "7",
1274 "ca",
1275 /* AltiVec registers. */
1276 "0", "1", "2", "3", "4", "5", "6", "7",
1277 "8", "9", "10", "11", "12", "13", "14", "15",
1278 "16", "17", "18", "19", "20", "21", "22", "23",
1279 "24", "25", "26", "27", "28", "29", "30", "31",
1280 "vrsave", "vscr",
1281 /* SPE registers. */
1282 "spe_acc", "spefscr",
1283 /* Soft frame pointer. */
1284 "sfp",
1285 /* HTM SPR registers. */
1286 "tfhar", "tfiar", "texasr",
1287 /* SPE High registers. */
1288 "0", "1", "2", "3", "4", "5", "6", "7",
1289 "8", "9", "10", "11", "12", "13", "14", "15",
1290 "16", "17", "18", "19", "20", "21", "22", "23",
1291 "24", "25", "26", "27", "28", "29", "30", "31"
1292 };
1293
1294 #ifdef TARGET_REGNAMES
1295 static const char alt_reg_names[][8] =
1296 {
1297 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1298 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1299 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1300 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1301 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1302 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1303 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1304 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1305 "mq", "lr", "ctr", "ap",
1306 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1307 "ca",
1308 /* AltiVec registers. */
1309 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1310 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1311 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1312 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1313 "vrsave", "vscr",
1314 /* SPE registers. */
1315 "spe_acc", "spefscr",
1316 /* Soft frame pointer. */
1317 "sfp",
1318 /* HTM SPR registers. */
1319 "tfhar", "tfiar", "texasr",
1320 /* SPE High registers. */
1321 "%rh0", "%rh1", "%rh2", "%rh3", "%rh4", "%rh5", "%rh6", "%rh7",
1322 "%rh8", "%rh9", "%rh10", "%r11", "%rh12", "%rh13", "%rh14", "%rh15",
1323 "%rh16", "%rh17", "%rh18", "%rh19", "%rh20", "%rh21", "%rh22", "%rh23",
1324 "%rh24", "%rh25", "%rh26", "%rh27", "%rh28", "%rh29", "%rh30", "%rh31"
1325 };
1326 #endif
1327
1328 /* Table of valid machine attributes. */
1329
1330 static const struct attribute_spec rs6000_attribute_table[] =
1331 {
1332 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
1333 affects_type_identity } */
1334 { "altivec", 1, 1, false, true, false, rs6000_handle_altivec_attribute,
1335 false },
1336 { "longcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1337 false },
1338 { "shortcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1339 false },
1340 { "ms_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1341 false },
1342 { "gcc_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1343 false },
1344 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1345 SUBTARGET_ATTRIBUTE_TABLE,
1346 #endif
1347 { NULL, 0, 0, false, false, false, NULL, false }
1348 };
1349 \f
1350 #ifndef TARGET_PROFILE_KERNEL
1351 #define TARGET_PROFILE_KERNEL 0
1352 #endif
1353
1354 /* The VRSAVE bitmask puts bit %v0 as the most significant bit. */
1355 #define ALTIVEC_REG_BIT(REGNO) (0x80000000 >> ((REGNO) - FIRST_ALTIVEC_REGNO))
1356 \f
1357 /* Initialize the GCC target structure. */
1358 #undef TARGET_ATTRIBUTE_TABLE
1359 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1360 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1361 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1362 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1363 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1364
1365 #undef TARGET_ASM_ALIGNED_DI_OP
1366 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1367
1368 /* Default unaligned ops are only provided for ELF. Find the ops needed
1369 for non-ELF systems. */
1370 #ifndef OBJECT_FORMAT_ELF
1371 #if TARGET_XCOFF
1372 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1373 64-bit targets. */
1374 #undef TARGET_ASM_UNALIGNED_HI_OP
1375 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1376 #undef TARGET_ASM_UNALIGNED_SI_OP
1377 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1378 #undef TARGET_ASM_UNALIGNED_DI_OP
1379 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1380 #else
1381 /* For Darwin. */
1382 #undef TARGET_ASM_UNALIGNED_HI_OP
1383 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1384 #undef TARGET_ASM_UNALIGNED_SI_OP
1385 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1386 #undef TARGET_ASM_UNALIGNED_DI_OP
1387 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1388 #undef TARGET_ASM_ALIGNED_DI_OP
1389 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1390 #endif
1391 #endif
1392
1393 /* This hook deals with fixups for relocatable code and DI-mode objects
1394 in 64-bit code. */
1395 #undef TARGET_ASM_INTEGER
1396 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1397
1398 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1399 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1400 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1401 #endif
1402
1403 #undef TARGET_SET_UP_BY_PROLOGUE
1404 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1405
1406 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1407 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1408
1409 #undef TARGET_INTERNAL_ARG_POINTER
1410 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1411
1412 #undef TARGET_HAVE_TLS
1413 #define TARGET_HAVE_TLS HAVE_AS_TLS
1414
1415 #undef TARGET_CANNOT_FORCE_CONST_MEM
1416 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1417
1418 #undef TARGET_DELEGITIMIZE_ADDRESS
1419 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1420
1421 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1422 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1423
1424 #undef TARGET_ASM_FUNCTION_PROLOGUE
1425 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1426 #undef TARGET_ASM_FUNCTION_EPILOGUE
1427 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1428
1429 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1430 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1431
1432 #undef TARGET_LEGITIMIZE_ADDRESS
1433 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1434
1435 #undef TARGET_SCHED_VARIABLE_ISSUE
1436 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1437
1438 #undef TARGET_SCHED_ISSUE_RATE
1439 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1440 #undef TARGET_SCHED_ADJUST_COST
1441 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1442 #undef TARGET_SCHED_ADJUST_PRIORITY
1443 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1444 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1445 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1446 #undef TARGET_SCHED_INIT
1447 #define TARGET_SCHED_INIT rs6000_sched_init
1448 #undef TARGET_SCHED_FINISH
1449 #define TARGET_SCHED_FINISH rs6000_sched_finish
1450 #undef TARGET_SCHED_REORDER
1451 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1452 #undef TARGET_SCHED_REORDER2
1453 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1454
1455 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1456 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1457
1458 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1459 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1460
1461 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1462 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1463 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1464 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1465 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1466 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1467 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1468 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1469
1470 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1471 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1472 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1473 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1474 rs6000_builtin_support_vector_misalignment
1475 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1476 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1477 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1478 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1479 rs6000_builtin_vectorization_cost
1480 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1481 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1482 rs6000_preferred_simd_mode
1483 #undef TARGET_VECTORIZE_INIT_COST
1484 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1485 #undef TARGET_VECTORIZE_ADD_STMT_COST
1486 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1487 #undef TARGET_VECTORIZE_FINISH_COST
1488 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1489 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1490 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1491
1492 #undef TARGET_INIT_BUILTINS
1493 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1494 #undef TARGET_BUILTIN_DECL
1495 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1496
1497 #undef TARGET_EXPAND_BUILTIN
1498 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1499
1500 #undef TARGET_MANGLE_TYPE
1501 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1502
1503 #undef TARGET_INIT_LIBFUNCS
1504 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1505
1506 #if TARGET_MACHO
1507 #undef TARGET_BINDS_LOCAL_P
1508 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1509 #endif
1510
1511 #undef TARGET_MS_BITFIELD_LAYOUT_P
1512 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1513
1514 #undef TARGET_ASM_OUTPUT_MI_THUNK
1515 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1516
1517 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1518 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1519
1520 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1521 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1522
1523 #undef TARGET_REGISTER_MOVE_COST
1524 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1525 #undef TARGET_MEMORY_MOVE_COST
1526 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1527 #undef TARGET_CANNOT_COPY_INSN_P
1528 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1529 #undef TARGET_RTX_COSTS
1530 #define TARGET_RTX_COSTS rs6000_rtx_costs
1531 #undef TARGET_ADDRESS_COST
1532 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1533
1534 #undef TARGET_DWARF_REGISTER_SPAN
1535 #define TARGET_DWARF_REGISTER_SPAN rs6000_dwarf_register_span
1536
1537 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1538 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1539
1540 #undef TARGET_MEMBER_TYPE_FORCES_BLK
1541 #define TARGET_MEMBER_TYPE_FORCES_BLK rs6000_member_type_forces_blk
1542
1543 #undef TARGET_PROMOTE_FUNCTION_MODE
1544 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1545
1546 #undef TARGET_RETURN_IN_MEMORY
1547 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1548
1549 #undef TARGET_RETURN_IN_MSB
1550 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1551
1552 #undef TARGET_SETUP_INCOMING_VARARGS
1553 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1554
1555 /* Always strict argument naming on rs6000. */
1556 #undef TARGET_STRICT_ARGUMENT_NAMING
1557 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1558 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1559 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1560 #undef TARGET_SPLIT_COMPLEX_ARG
1561 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1562 #undef TARGET_MUST_PASS_IN_STACK
1563 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1564 #undef TARGET_PASS_BY_REFERENCE
1565 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1566 #undef TARGET_ARG_PARTIAL_BYTES
1567 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1568 #undef TARGET_FUNCTION_ARG_ADVANCE
1569 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1570 #undef TARGET_FUNCTION_ARG
1571 #define TARGET_FUNCTION_ARG rs6000_function_arg
1572 #undef TARGET_FUNCTION_ARG_BOUNDARY
1573 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1574
1575 #undef TARGET_BUILD_BUILTIN_VA_LIST
1576 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1577
1578 #undef TARGET_EXPAND_BUILTIN_VA_START
1579 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1580
1581 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1582 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1583
1584 #undef TARGET_EH_RETURN_FILTER_MODE
1585 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1586
1587 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1588 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1589
1590 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1591 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1592
1593 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1594 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1595
1596 #undef TARGET_ASM_LOOP_ALIGN_MAX_SKIP
1597 #define TARGET_ASM_LOOP_ALIGN_MAX_SKIP rs6000_loop_align_max_skip
1598
1599 #undef TARGET_MD_ASM_ADJUST
1600 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1601
1602 #undef TARGET_OPTION_OVERRIDE
1603 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1604
1605 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1606 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1607 rs6000_builtin_vectorized_function
1608
1609 #if !TARGET_MACHO
1610 #undef TARGET_STACK_PROTECT_FAIL
1611 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1612 #endif
1613
1614 #ifdef HAVE_AS_TLS
1615 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1616 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1617 #endif
1618
1619 /* Use a 32-bit anchor range. This leads to sequences like:
1620
1621 addis tmp,anchor,high
1622 add dest,tmp,low
1623
1624 where tmp itself acts as an anchor, and can be shared between
1625 accesses to the same 64k page. */
1626 #undef TARGET_MIN_ANCHOR_OFFSET
1627 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1628 #undef TARGET_MAX_ANCHOR_OFFSET
1629 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1630 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1631 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1632 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1633 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1634
1635 #undef TARGET_BUILTIN_RECIPROCAL
1636 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1637
1638 #undef TARGET_EXPAND_TO_RTL_HOOK
1639 #define TARGET_EXPAND_TO_RTL_HOOK rs6000_alloc_sdmode_stack_slot
1640
1641 #undef TARGET_INSTANTIATE_DECLS
1642 #define TARGET_INSTANTIATE_DECLS rs6000_instantiate_decls
1643
1644 #undef TARGET_SECONDARY_RELOAD
1645 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1646
1647 #undef TARGET_LEGITIMATE_ADDRESS_P
1648 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1649
1650 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1651 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1652
1653 #undef TARGET_LRA_P
1654 #define TARGET_LRA_P rs6000_lra_p
1655
1656 #undef TARGET_CAN_ELIMINATE
1657 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1658
1659 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1660 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1661
1662 #undef TARGET_TRAMPOLINE_INIT
1663 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1664
1665 #undef TARGET_FUNCTION_VALUE
1666 #define TARGET_FUNCTION_VALUE rs6000_function_value
1667
1668 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1669 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1670
1671 #undef TARGET_OPTION_SAVE
1672 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1673
1674 #undef TARGET_OPTION_RESTORE
1675 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1676
1677 #undef TARGET_OPTION_PRINT
1678 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1679
1680 #undef TARGET_CAN_INLINE_P
1681 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1682
1683 #undef TARGET_SET_CURRENT_FUNCTION
1684 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1685
1686 #undef TARGET_LEGITIMATE_CONSTANT_P
1687 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1688
1689 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
1690 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK rs6000_vectorize_vec_perm_const_ok
1691
1692 #undef TARGET_CAN_USE_DOLOOP_P
1693 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1694
1695 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1696 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1697
1698 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1699 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1700 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1701 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1702 #undef TARGET_UNWIND_WORD_MODE
1703 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1704
1705 #undef TARGET_OFFLOAD_OPTIONS
1706 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1707
1708 #undef TARGET_C_MODE_FOR_SUFFIX
1709 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1710
1711 #undef TARGET_INVALID_BINARY_OP
1712 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1713 \f
1714
1715 /* Processor table. */
1716 struct rs6000_ptt
1717 {
1718 const char *const name; /* Canonical processor name. */
1719 const enum processor_type processor; /* Processor type enum value. */
1720 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1721 };
1722
1723 static struct rs6000_ptt const processor_target_table[] =
1724 {
1725 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1726 #include "rs6000-cpus.def"
1727 #undef RS6000_CPU
1728 };
1729
1730 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1731 name is invalid. */
1732
1733 static int
1734 rs6000_cpu_name_lookup (const char *name)
1735 {
1736 size_t i;
1737
1738 if (name != NULL)
1739 {
1740 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
1741 if (! strcmp (name, processor_target_table[i].name))
1742 return (int)i;
1743 }
1744
1745 return -1;
1746 }
1747
1748 \f
1749 /* Return number of consecutive hard regs needed starting at reg REGNO
1750 to hold something of mode MODE.
1751 This is ordinarily the length in words of a value of mode MODE
1752 but can be less for certain modes in special long registers.
1753
1754 For the SPE, GPRs are 64 bits but only 32 bits are visible in
1755 scalar instructions. The upper 32 bits are only available to the
1756 SIMD instructions.
1757
1758 POWER and PowerPC GPRs hold 32 bits worth;
1759 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1760
1761 static int
1762 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
1763 {
1764 unsigned HOST_WIDE_INT reg_size;
1765
1766 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
1767 128-bit floating point that can go in vector registers, which has VSX
1768 memory addressing. */
1769 if (FP_REGNO_P (regno))
1770 reg_size = (VECTOR_MEM_VSX_P (mode)
1771 ? UNITS_PER_VSX_WORD
1772 : UNITS_PER_FP_WORD);
1773
1774 else if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
1775 reg_size = UNITS_PER_SPE_WORD;
1776
1777 else if (ALTIVEC_REGNO_P (regno))
1778 reg_size = UNITS_PER_ALTIVEC_WORD;
1779
1780 /* The value returned for SCmode in the E500 double case is 2 for
1781 ABI compatibility; storing an SCmode value in a single register
1782 would require function_arg and rs6000_spe_function_arg to handle
1783 SCmode so as to pass the value correctly in a pair of
1784 registers. */
1785 else if (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode) && mode != SCmode
1786 && !DECIMAL_FLOAT_MODE_P (mode) && SPE_SIMD_REGNO_P (regno))
1787 reg_size = UNITS_PER_FP_WORD;
1788
1789 else
1790 reg_size = UNITS_PER_WORD;
1791
1792 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
1793 }
1794
1795 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1796 MODE. */
1797 static int
1798 rs6000_hard_regno_mode_ok (int regno, machine_mode mode)
1799 {
1800 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
1801
1802 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1803 register combinations, and use PTImode where we need to deal with quad
1804 word memory operations. Don't allow quad words in the argument or frame
1805 pointer registers, just registers 0..31. */
1806 if (mode == PTImode)
1807 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1808 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1809 && ((regno & 1) == 0));
1810
1811 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1812 implementations. Don't allow an item to be split between a FP register
1813 and an Altivec register. Allow TImode in all VSX registers if the user
1814 asked for it. */
1815 if (TARGET_VSX && VSX_REGNO_P (regno)
1816 && (VECTOR_MEM_VSX_P (mode)
1817 || FLOAT128_VECTOR_P (mode)
1818 || reg_addr[mode].scalar_in_vmx_p
1819 || (TARGET_VSX_TIMODE && mode == TImode)
1820 || (TARGET_VADDUQM && mode == V1TImode)))
1821 {
1822 if (FP_REGNO_P (regno))
1823 return FP_REGNO_P (last_regno);
1824
1825 if (ALTIVEC_REGNO_P (regno))
1826 {
1827 if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p)
1828 return 0;
1829
1830 return ALTIVEC_REGNO_P (last_regno);
1831 }
1832 }
1833
1834 /* The GPRs can hold any mode, but values bigger than one register
1835 cannot go past R31. */
1836 if (INT_REGNO_P (regno))
1837 return INT_REGNO_P (last_regno);
1838
1839 /* The float registers (except for VSX vector modes) can only hold floating
1840 modes and DImode. */
1841 if (FP_REGNO_P (regno))
1842 {
1843 if (FLOAT128_VECTOR_P (mode))
1844 return false;
1845
1846 if (SCALAR_FLOAT_MODE_P (mode)
1847 && (mode != TDmode || (regno % 2) == 0)
1848 && FP_REGNO_P (last_regno))
1849 return 1;
1850
1851 if (GET_MODE_CLASS (mode) == MODE_INT
1852 && GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
1853 return 1;
1854
1855 if (PAIRED_SIMD_REGNO_P (regno) && TARGET_PAIRED_FLOAT
1856 && PAIRED_VECTOR_MODE (mode))
1857 return 1;
1858
1859 return 0;
1860 }
1861
1862 /* The CR register can only hold CC modes. */
1863 if (CR_REGNO_P (regno))
1864 return GET_MODE_CLASS (mode) == MODE_CC;
1865
1866 if (CA_REGNO_P (regno))
1867 return mode == Pmode || mode == SImode;
1868
1869 /* AltiVec only in AldyVec registers. */
1870 if (ALTIVEC_REGNO_P (regno))
1871 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
1872 || mode == V1TImode);
1873
1874 /* ...but GPRs can hold SIMD data on the SPE in one register. */
1875 if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
1876 return 1;
1877
1878 /* We cannot put non-VSX TImode or PTImode anywhere except general register
1879 and it must be able to fit within the register set. */
1880
1881 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
1882 }
1883
1884 /* Print interesting facts about registers. */
1885 static void
1886 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
1887 {
1888 int r, m;
1889
1890 for (r = first_regno; r <= last_regno; ++r)
1891 {
1892 const char *comma = "";
1893 int len;
1894
1895 if (first_regno == last_regno)
1896 fprintf (stderr, "%s:\t", reg_name);
1897 else
1898 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
1899
1900 len = 8;
1901 for (m = 0; m < NUM_MACHINE_MODES; ++m)
1902 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
1903 {
1904 if (len > 70)
1905 {
1906 fprintf (stderr, ",\n\t");
1907 len = 8;
1908 comma = "";
1909 }
1910
1911 if (rs6000_hard_regno_nregs[m][r] > 1)
1912 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
1913 rs6000_hard_regno_nregs[m][r]);
1914 else
1915 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
1916
1917 comma = ", ";
1918 }
1919
1920 if (call_used_regs[r])
1921 {
1922 if (len > 70)
1923 {
1924 fprintf (stderr, ",\n\t");
1925 len = 8;
1926 comma = "";
1927 }
1928
1929 len += fprintf (stderr, "%s%s", comma, "call-used");
1930 comma = ", ";
1931 }
1932
1933 if (fixed_regs[r])
1934 {
1935 if (len > 70)
1936 {
1937 fprintf (stderr, ",\n\t");
1938 len = 8;
1939 comma = "";
1940 }
1941
1942 len += fprintf (stderr, "%s%s", comma, "fixed");
1943 comma = ", ";
1944 }
1945
1946 if (len > 70)
1947 {
1948 fprintf (stderr, ",\n\t");
1949 comma = "";
1950 }
1951
1952 len += fprintf (stderr, "%sreg-class = %s", comma,
1953 reg_class_names[(int)rs6000_regno_regclass[r]]);
1954 comma = ", ";
1955
1956 if (len > 70)
1957 {
1958 fprintf (stderr, ",\n\t");
1959 comma = "";
1960 }
1961
1962 fprintf (stderr, "%sregno = %d\n", comma, r);
1963 }
1964 }
1965
1966 static const char *
1967 rs6000_debug_vector_unit (enum rs6000_vector v)
1968 {
1969 const char *ret;
1970
1971 switch (v)
1972 {
1973 case VECTOR_NONE: ret = "none"; break;
1974 case VECTOR_ALTIVEC: ret = "altivec"; break;
1975 case VECTOR_VSX: ret = "vsx"; break;
1976 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
1977 case VECTOR_PAIRED: ret = "paired"; break;
1978 case VECTOR_SPE: ret = "spe"; break;
1979 case VECTOR_OTHER: ret = "other"; break;
1980 default: ret = "unknown"; break;
1981 }
1982
1983 return ret;
1984 }
1985
1986 /* Inner function printing just the address mask for a particular reload
1987 register class. */
1988 DEBUG_FUNCTION char *
1989 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
1990 {
1991 static char ret[8];
1992 char *p = ret;
1993
1994 if ((mask & RELOAD_REG_VALID) != 0)
1995 *p++ = 'v';
1996 else if (keep_spaces)
1997 *p++ = ' ';
1998
1999 if ((mask & RELOAD_REG_MULTIPLE) != 0)
2000 *p++ = 'm';
2001 else if (keep_spaces)
2002 *p++ = ' ';
2003
2004 if ((mask & RELOAD_REG_INDEXED) != 0)
2005 *p++ = 'i';
2006 else if (keep_spaces)
2007 *p++ = ' ';
2008
2009 if ((mask & RELOAD_REG_OFFSET) != 0)
2010 *p++ = 'o';
2011 else if (keep_spaces)
2012 *p++ = ' ';
2013
2014 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2015 *p++ = '+';
2016 else if (keep_spaces)
2017 *p++ = ' ';
2018
2019 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2020 *p++ = '+';
2021 else if (keep_spaces)
2022 *p++ = ' ';
2023
2024 if ((mask & RELOAD_REG_AND_M16) != 0)
2025 *p++ = '&';
2026 else if (keep_spaces)
2027 *p++ = ' ';
2028
2029 *p = '\0';
2030
2031 return ret;
2032 }
2033
2034 /* Print the address masks in a human readble fashion. */
2035 DEBUG_FUNCTION void
2036 rs6000_debug_print_mode (ssize_t m)
2037 {
2038 ssize_t rc;
2039 int spaces = 0;
2040 bool fuse_extra_p;
2041
2042 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2043 for (rc = 0; rc < N_RELOAD_REG; rc++)
2044 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2045 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2046
2047 if ((reg_addr[m].reload_store != CODE_FOR_nothing)
2048 || (reg_addr[m].reload_load != CODE_FOR_nothing))
2049 fprintf (stderr, " Reload=%c%c",
2050 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2051 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
2052 else
2053 spaces += sizeof (" Reload=sl") - 1;
2054
2055 if (reg_addr[m].scalar_in_vmx_p)
2056 {
2057 fprintf (stderr, "%*s Upper=y", spaces, "");
2058 spaces = 0;
2059 }
2060 else
2061 spaces += sizeof (" Upper=y") - 1;
2062
2063 fuse_extra_p = ((reg_addr[m].fusion_gpr_ld != CODE_FOR_nothing)
2064 || reg_addr[m].fused_toc);
2065 if (!fuse_extra_p)
2066 {
2067 for (rc = 0; rc < N_RELOAD_REG; rc++)
2068 {
2069 if (rc != RELOAD_REG_ANY)
2070 {
2071 if (reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing
2072 || reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing
2073 || reg_addr[m].fusion_addi_st[rc] != CODE_FOR_nothing
2074 || reg_addr[m].fusion_addis_ld[rc] != CODE_FOR_nothing
2075 || reg_addr[m].fusion_addis_st[rc] != CODE_FOR_nothing)
2076 {
2077 fuse_extra_p = true;
2078 break;
2079 }
2080 }
2081 }
2082 }
2083
2084 if (fuse_extra_p)
2085 {
2086 fprintf (stderr, "%*s Fuse:", spaces, "");
2087 spaces = 0;
2088
2089 for (rc = 0; rc < N_RELOAD_REG; rc++)
2090 {
2091 if (rc != RELOAD_REG_ANY)
2092 {
2093 char load, store;
2094
2095 if (reg_addr[m].fusion_addis_ld[rc] != CODE_FOR_nothing)
2096 load = 'l';
2097 else if (reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing)
2098 load = 'L';
2099 else
2100 load = '-';
2101
2102 if (reg_addr[m].fusion_addis_st[rc] != CODE_FOR_nothing)
2103 store = 's';
2104 else if (reg_addr[m].fusion_addi_st[rc] != CODE_FOR_nothing)
2105 store = 'S';
2106 else
2107 store = '-';
2108
2109 if (load == '-' && store == '-')
2110 spaces += 5;
2111 else
2112 {
2113 fprintf (stderr, "%*s%c=%c%c", (spaces + 1), "",
2114 reload_reg_map[rc].name[0], load, store);
2115 spaces = 0;
2116 }
2117 }
2118 }
2119
2120 if (reg_addr[m].fusion_gpr_ld != CODE_FOR_nothing)
2121 {
2122 fprintf (stderr, "%*sP8gpr", (spaces + 1), "");
2123 spaces = 0;
2124 }
2125 else
2126 spaces += sizeof (" P8gpr") - 1;
2127
2128 if (reg_addr[m].fused_toc)
2129 {
2130 fprintf (stderr, "%*sToc", (spaces + 1), "");
2131 spaces = 0;
2132 }
2133 else
2134 spaces += sizeof (" Toc") - 1;
2135 }
2136 else
2137 spaces += sizeof (" Fuse: G=ls F=ls v=ls P8gpr Toc") - 1;
2138
2139 if (rs6000_vector_unit[m] != VECTOR_NONE
2140 || rs6000_vector_mem[m] != VECTOR_NONE)
2141 {
2142 fprintf (stderr, "%*s vector: arith=%-10s mem=%s",
2143 spaces, "",
2144 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2145 rs6000_debug_vector_unit (rs6000_vector_mem[m]));
2146 }
2147
2148 fputs ("\n", stderr);
2149 }
2150
2151 #define DEBUG_FMT_ID "%-32s= "
2152 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2153 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2154 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2155
2156 /* Print various interesting information with -mdebug=reg. */
2157 static void
2158 rs6000_debug_reg_global (void)
2159 {
2160 static const char *const tf[2] = { "false", "true" };
2161 const char *nl = (const char *)0;
2162 int m;
2163 size_t m1, m2, v;
2164 char costly_num[20];
2165 char nop_num[20];
2166 char flags_buffer[40];
2167 const char *costly_str;
2168 const char *nop_str;
2169 const char *trace_str;
2170 const char *abi_str;
2171 const char *cmodel_str;
2172 struct cl_target_option cl_opts;
2173
2174 /* Modes we want tieable information on. */
2175 static const machine_mode print_tieable_modes[] = {
2176 QImode,
2177 HImode,
2178 SImode,
2179 DImode,
2180 TImode,
2181 PTImode,
2182 SFmode,
2183 DFmode,
2184 TFmode,
2185 IFmode,
2186 KFmode,
2187 SDmode,
2188 DDmode,
2189 TDmode,
2190 V8QImode,
2191 V4HImode,
2192 V2SImode,
2193 V16QImode,
2194 V8HImode,
2195 V4SImode,
2196 V2DImode,
2197 V1TImode,
2198 V32QImode,
2199 V16HImode,
2200 V8SImode,
2201 V4DImode,
2202 V2TImode,
2203 V2SFmode,
2204 V4SFmode,
2205 V2DFmode,
2206 V8SFmode,
2207 V4DFmode,
2208 CCmode,
2209 CCUNSmode,
2210 CCEQmode,
2211 };
2212
2213 /* Virtual regs we are interested in. */
2214 const static struct {
2215 int regno; /* register number. */
2216 const char *name; /* register name. */
2217 } virtual_regs[] = {
2218 { STACK_POINTER_REGNUM, "stack pointer:" },
2219 { TOC_REGNUM, "toc: " },
2220 { STATIC_CHAIN_REGNUM, "static chain: " },
2221 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2222 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2223 { ARG_POINTER_REGNUM, "arg pointer: " },
2224 { FRAME_POINTER_REGNUM, "frame pointer:" },
2225 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2226 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2227 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2228 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2229 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2230 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2231 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2232 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2233 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2234 };
2235
2236 fputs ("\nHard register information:\n", stderr);
2237 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2238 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2239 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2240 LAST_ALTIVEC_REGNO,
2241 "vs");
2242 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2243 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2244 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2245 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2246 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2247 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2248 rs6000_debug_reg_print (SPE_ACC_REGNO, SPE_ACC_REGNO, "spe_a");
2249 rs6000_debug_reg_print (SPEFSCR_REGNO, SPEFSCR_REGNO, "spe_f");
2250
2251 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2252 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2253 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2254
2255 fprintf (stderr,
2256 "\n"
2257 "d reg_class = %s\n"
2258 "f reg_class = %s\n"
2259 "v reg_class = %s\n"
2260 "wa reg_class = %s\n"
2261 "wd reg_class = %s\n"
2262 "wf reg_class = %s\n"
2263 "wg reg_class = %s\n"
2264 "wh reg_class = %s\n"
2265 "wi reg_class = %s\n"
2266 "wj reg_class = %s\n"
2267 "wk reg_class = %s\n"
2268 "wl reg_class = %s\n"
2269 "wm reg_class = %s\n"
2270 "wp reg_class = %s\n"
2271 "wq reg_class = %s\n"
2272 "wr reg_class = %s\n"
2273 "ws reg_class = %s\n"
2274 "wt reg_class = %s\n"
2275 "wu reg_class = %s\n"
2276 "wv reg_class = %s\n"
2277 "ww reg_class = %s\n"
2278 "wx reg_class = %s\n"
2279 "wy reg_class = %s\n"
2280 "wz reg_class = %s\n"
2281 "\n",
2282 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2283 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2284 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2285 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2286 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wd]],
2287 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]],
2288 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wg]],
2289 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wh]],
2290 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wi]],
2291 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wj]],
2292 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wk]],
2293 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wl]],
2294 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wm]],
2295 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wp]],
2296 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wq]],
2297 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2298 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]],
2299 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wt]],
2300 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wu]],
2301 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wv]],
2302 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ww]],
2303 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2304 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wy]],
2305 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wz]]);
2306
2307 nl = "\n";
2308 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2309 rs6000_debug_print_mode (m);
2310
2311 fputs ("\n", stderr);
2312
2313 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2314 {
2315 machine_mode mode1 = print_tieable_modes[m1];
2316 bool first_time = true;
2317
2318 nl = (const char *)0;
2319 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2320 {
2321 machine_mode mode2 = print_tieable_modes[m2];
2322 if (mode1 != mode2 && MODES_TIEABLE_P (mode1, mode2))
2323 {
2324 if (first_time)
2325 {
2326 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2327 nl = "\n";
2328 first_time = false;
2329 }
2330
2331 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2332 }
2333 }
2334
2335 if (!first_time)
2336 fputs ("\n", stderr);
2337 }
2338
2339 if (nl)
2340 fputs (nl, stderr);
2341
2342 if (rs6000_recip_control)
2343 {
2344 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2345
2346 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2347 if (rs6000_recip_bits[m])
2348 {
2349 fprintf (stderr,
2350 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2351 GET_MODE_NAME (m),
2352 (RS6000_RECIP_AUTO_RE_P (m)
2353 ? "auto"
2354 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2355 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2356 ? "auto"
2357 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2358 }
2359
2360 fputs ("\n", stderr);
2361 }
2362
2363 if (rs6000_cpu_index >= 0)
2364 {
2365 const char *name = processor_target_table[rs6000_cpu_index].name;
2366 HOST_WIDE_INT flags
2367 = processor_target_table[rs6000_cpu_index].target_enable;
2368
2369 sprintf (flags_buffer, "-mcpu=%s flags", name);
2370 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2371 }
2372 else
2373 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2374
2375 if (rs6000_tune_index >= 0)
2376 {
2377 const char *name = processor_target_table[rs6000_tune_index].name;
2378 HOST_WIDE_INT flags
2379 = processor_target_table[rs6000_tune_index].target_enable;
2380
2381 sprintf (flags_buffer, "-mtune=%s flags", name);
2382 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2383 }
2384 else
2385 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2386
2387 cl_target_option_save (&cl_opts, &global_options);
2388 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2389 rs6000_isa_flags);
2390
2391 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2392 rs6000_isa_flags_explicit);
2393
2394 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2395 rs6000_builtin_mask);
2396
2397 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2398
2399 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2400 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2401
2402 switch (rs6000_sched_costly_dep)
2403 {
2404 case max_dep_latency:
2405 costly_str = "max_dep_latency";
2406 break;
2407
2408 case no_dep_costly:
2409 costly_str = "no_dep_costly";
2410 break;
2411
2412 case all_deps_costly:
2413 costly_str = "all_deps_costly";
2414 break;
2415
2416 case true_store_to_load_dep_costly:
2417 costly_str = "true_store_to_load_dep_costly";
2418 break;
2419
2420 case store_to_load_dep_costly:
2421 costly_str = "store_to_load_dep_costly";
2422 break;
2423
2424 default:
2425 costly_str = costly_num;
2426 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2427 break;
2428 }
2429
2430 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2431
2432 switch (rs6000_sched_insert_nops)
2433 {
2434 case sched_finish_regroup_exact:
2435 nop_str = "sched_finish_regroup_exact";
2436 break;
2437
2438 case sched_finish_pad_groups:
2439 nop_str = "sched_finish_pad_groups";
2440 break;
2441
2442 case sched_finish_none:
2443 nop_str = "sched_finish_none";
2444 break;
2445
2446 default:
2447 nop_str = nop_num;
2448 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2449 break;
2450 }
2451
2452 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2453
2454 switch (rs6000_sdata)
2455 {
2456 default:
2457 case SDATA_NONE:
2458 break;
2459
2460 case SDATA_DATA:
2461 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2462 break;
2463
2464 case SDATA_SYSV:
2465 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2466 break;
2467
2468 case SDATA_EABI:
2469 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2470 break;
2471
2472 }
2473
2474 switch (rs6000_traceback)
2475 {
2476 case traceback_default: trace_str = "default"; break;
2477 case traceback_none: trace_str = "none"; break;
2478 case traceback_part: trace_str = "part"; break;
2479 case traceback_full: trace_str = "full"; break;
2480 default: trace_str = "unknown"; break;
2481 }
2482
2483 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2484
2485 switch (rs6000_current_cmodel)
2486 {
2487 case CMODEL_SMALL: cmodel_str = "small"; break;
2488 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2489 case CMODEL_LARGE: cmodel_str = "large"; break;
2490 default: cmodel_str = "unknown"; break;
2491 }
2492
2493 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2494
2495 switch (rs6000_current_abi)
2496 {
2497 case ABI_NONE: abi_str = "none"; break;
2498 case ABI_AIX: abi_str = "aix"; break;
2499 case ABI_ELFv2: abi_str = "ELFv2"; break;
2500 case ABI_V4: abi_str = "V4"; break;
2501 case ABI_DARWIN: abi_str = "darwin"; break;
2502 default: abi_str = "unknown"; break;
2503 }
2504
2505 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2506
2507 if (rs6000_altivec_abi)
2508 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2509
2510 if (rs6000_spe_abi)
2511 fprintf (stderr, DEBUG_FMT_S, "spe_abi", "true");
2512
2513 if (rs6000_darwin64_abi)
2514 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2515
2516 if (rs6000_float_gprs)
2517 fprintf (stderr, DEBUG_FMT_S, "float_gprs", "true");
2518
2519 fprintf (stderr, DEBUG_FMT_S, "fprs",
2520 (TARGET_FPRS ? "true" : "false"));
2521
2522 fprintf (stderr, DEBUG_FMT_S, "single_float",
2523 (TARGET_SINGLE_FLOAT ? "true" : "false"));
2524
2525 fprintf (stderr, DEBUG_FMT_S, "double_float",
2526 (TARGET_DOUBLE_FLOAT ? "true" : "false"));
2527
2528 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2529 (TARGET_SOFT_FLOAT ? "true" : "false"));
2530
2531 fprintf (stderr, DEBUG_FMT_S, "e500_single",
2532 (TARGET_E500_SINGLE ? "true" : "false"));
2533
2534 fprintf (stderr, DEBUG_FMT_S, "e500_double",
2535 (TARGET_E500_DOUBLE ? "true" : "false"));
2536
2537 if (TARGET_LINK_STACK)
2538 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2539
2540 if (targetm.lra_p ())
2541 fprintf (stderr, DEBUG_FMT_S, "lra", "true");
2542
2543 if (TARGET_P8_FUSION)
2544 {
2545 char options[80];
2546
2547 strcpy (options, (TARGET_P9_FUSION) ? "power9" : "power8");
2548 if (TARGET_TOC_FUSION)
2549 strcat (options, ", toc");
2550
2551 if (TARGET_P8_FUSION_SIGN)
2552 strcat (options, ", sign");
2553
2554 fprintf (stderr, DEBUG_FMT_S, "fusion", options);
2555 }
2556
2557 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2558 TARGET_SECURE_PLT ? "secure" : "bss");
2559 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2560 aix_struct_return ? "aix" : "sysv");
2561 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2562 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2563 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2564 tf[!!rs6000_align_branch_targets]);
2565 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2566 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2567 rs6000_long_double_type_size);
2568 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2569 (int)rs6000_sched_restricted_insns_priority);
2570 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2571 (int)END_BUILTINS);
2572 fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins",
2573 (int)RS6000_BUILTIN_COUNT);
2574
2575 if (TARGET_VSX)
2576 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2577 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2578 }
2579
2580 \f
2581 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2582 legitimate address support to figure out the appropriate addressing to
2583 use. */
2584
2585 static void
2586 rs6000_setup_reg_addr_masks (void)
2587 {
2588 ssize_t rc, reg, m, nregs;
2589 addr_mask_type any_addr_mask, addr_mask;
2590
2591 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2592 {
2593 machine_mode m2 = (machine_mode)m;
2594 unsigned short msize = GET_MODE_SIZE (m2);
2595
2596 /* SDmode is special in that we want to access it only via REG+REG
2597 addressing on power7 and above, since we want to use the LFIWZX and
2598 STFIWZX instructions to load it. */
2599 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2600
2601 any_addr_mask = 0;
2602 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2603 {
2604 addr_mask = 0;
2605 reg = reload_reg_map[rc].reg;
2606
2607 /* Can mode values go in the GPR/FPR/Altivec registers? */
2608 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2609 {
2610 nregs = rs6000_hard_regno_nregs[m][reg];
2611 addr_mask |= RELOAD_REG_VALID;
2612
2613 /* Indicate if the mode takes more than 1 physical register. If
2614 it takes a single register, indicate it can do REG+REG
2615 addressing. */
2616 if (nregs > 1 || m == BLKmode)
2617 addr_mask |= RELOAD_REG_MULTIPLE;
2618 else
2619 addr_mask |= RELOAD_REG_INDEXED;
2620
2621 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2622 addressing. Restrict addressing on SPE for 64-bit types
2623 because of the SUBREG hackery used to address 64-bit floats in
2624 '32-bit' GPRs. If we allow scalars into Altivec registers,
2625 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY. */
2626
2627 if (TARGET_UPDATE
2628 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2629 && msize <= 8
2630 && !VECTOR_MODE_P (m2)
2631 && !FLOAT128_VECTOR_P (m2)
2632 && !COMPLEX_MODE_P (m2)
2633 && (m2 != DFmode || !TARGET_UPPER_REGS_DF)
2634 && (m2 != SFmode || !TARGET_UPPER_REGS_SF)
2635 && !(TARGET_E500_DOUBLE && msize == 8))
2636 {
2637 addr_mask |= RELOAD_REG_PRE_INCDEC;
2638
2639 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2640 we don't allow PRE_MODIFY for some multi-register
2641 operations. */
2642 switch (m)
2643 {
2644 default:
2645 addr_mask |= RELOAD_REG_PRE_MODIFY;
2646 break;
2647
2648 case DImode:
2649 if (TARGET_POWERPC64)
2650 addr_mask |= RELOAD_REG_PRE_MODIFY;
2651 break;
2652
2653 case DFmode:
2654 case DDmode:
2655 if (TARGET_DF_INSN)
2656 addr_mask |= RELOAD_REG_PRE_MODIFY;
2657 break;
2658 }
2659 }
2660 }
2661
2662 /* GPR and FPR registers can do REG+OFFSET addressing, except
2663 possibly for SDmode. */
2664 if ((addr_mask != 0) && !indexed_only_p
2665 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR))
2666 addr_mask |= RELOAD_REG_OFFSET;
2667
2668 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2669 addressing on 128-bit types. */
2670 if (rc == RELOAD_REG_VMX && msize == 16
2671 && (addr_mask & RELOAD_REG_VALID) != 0)
2672 addr_mask |= RELOAD_REG_AND_M16;
2673
2674 reg_addr[m].addr_mask[rc] = addr_mask;
2675 any_addr_mask |= addr_mask;
2676 }
2677
2678 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2679 }
2680 }
2681
2682 \f
2683 /* Initialize the various global tables that are based on register size. */
2684 static void
2685 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2686 {
2687 ssize_t r, m, c;
2688 int align64;
2689 int align32;
2690
2691 /* Precalculate REGNO_REG_CLASS. */
2692 rs6000_regno_regclass[0] = GENERAL_REGS;
2693 for (r = 1; r < 32; ++r)
2694 rs6000_regno_regclass[r] = BASE_REGS;
2695
2696 for (r = 32; r < 64; ++r)
2697 rs6000_regno_regclass[r] = FLOAT_REGS;
2698
2699 for (r = 64; r < FIRST_PSEUDO_REGISTER; ++r)
2700 rs6000_regno_regclass[r] = NO_REGS;
2701
2702 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2703 rs6000_regno_regclass[r] = ALTIVEC_REGS;
2704
2705 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2706 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
2707 rs6000_regno_regclass[r] = CR_REGS;
2708
2709 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
2710 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
2711 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
2712 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
2713 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
2714 rs6000_regno_regclass[SPE_ACC_REGNO] = SPE_ACC_REGS;
2715 rs6000_regno_regclass[SPEFSCR_REGNO] = SPEFSCR_REGS;
2716 rs6000_regno_regclass[TFHAR_REGNO] = SPR_REGS;
2717 rs6000_regno_regclass[TFIAR_REGNO] = SPR_REGS;
2718 rs6000_regno_regclass[TEXASR_REGNO] = SPR_REGS;
2719 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
2720 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
2721
2722 /* Precalculate register class to simpler reload register class. We don't
2723 need all of the register classes that are combinations of different
2724 classes, just the simple ones that have constraint letters. */
2725 for (c = 0; c < N_REG_CLASSES; c++)
2726 reg_class_to_reg_type[c] = NO_REG_TYPE;
2727
2728 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
2729 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
2730 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
2731 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
2732 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
2733 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
2734 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
2735 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
2736 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
2737 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
2738 reg_class_to_reg_type[(int)SPE_ACC_REGS] = SPE_ACC_TYPE;
2739 reg_class_to_reg_type[(int)SPEFSCR_REGS] = SPEFSCR_REG_TYPE;
2740
2741 if (TARGET_VSX)
2742 {
2743 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
2744 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
2745 }
2746 else
2747 {
2748 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
2749 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
2750 }
2751
2752 /* Precalculate the valid memory formats as well as the vector information,
2753 this must be set up before the rs6000_hard_regno_nregs_internal calls
2754 below. */
2755 gcc_assert ((int)VECTOR_NONE == 0);
2756 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
2757 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_unit));
2758
2759 gcc_assert ((int)CODE_FOR_nothing == 0);
2760 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
2761
2762 gcc_assert ((int)NO_REGS == 0);
2763 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
2764
2765 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2766 believes it can use native alignment or still uses 128-bit alignment. */
2767 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
2768 {
2769 align64 = 64;
2770 align32 = 32;
2771 }
2772 else
2773 {
2774 align64 = 128;
2775 align32 = 128;
2776 }
2777
2778 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
2779 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
2780 if (TARGET_FLOAT128)
2781 {
2782 rs6000_vector_mem[KFmode] = VECTOR_VSX;
2783 rs6000_vector_align[KFmode] = 128;
2784
2785 if (FLOAT128_IEEE_P (TFmode))
2786 {
2787 rs6000_vector_mem[TFmode] = VECTOR_VSX;
2788 rs6000_vector_align[TFmode] = 128;
2789 }
2790 }
2791
2792 /* V2DF mode, VSX only. */
2793 if (TARGET_VSX)
2794 {
2795 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
2796 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
2797 rs6000_vector_align[V2DFmode] = align64;
2798 }
2799
2800 /* V4SF mode, either VSX or Altivec. */
2801 if (TARGET_VSX)
2802 {
2803 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
2804 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
2805 rs6000_vector_align[V4SFmode] = align32;
2806 }
2807 else if (TARGET_ALTIVEC)
2808 {
2809 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
2810 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
2811 rs6000_vector_align[V4SFmode] = align32;
2812 }
2813
2814 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2815 and stores. */
2816 if (TARGET_ALTIVEC)
2817 {
2818 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
2819 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
2820 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
2821 rs6000_vector_align[V4SImode] = align32;
2822 rs6000_vector_align[V8HImode] = align32;
2823 rs6000_vector_align[V16QImode] = align32;
2824
2825 if (TARGET_VSX)
2826 {
2827 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
2828 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
2829 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
2830 }
2831 else
2832 {
2833 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
2834 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
2835 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
2836 }
2837 }
2838
2839 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
2840 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
2841 if (TARGET_VSX)
2842 {
2843 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
2844 rs6000_vector_unit[V2DImode]
2845 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2846 rs6000_vector_align[V2DImode] = align64;
2847
2848 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
2849 rs6000_vector_unit[V1TImode]
2850 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2851 rs6000_vector_align[V1TImode] = 128;
2852 }
2853
2854 /* DFmode, see if we want to use the VSX unit. Memory is handled
2855 differently, so don't set rs6000_vector_mem. */
2856 if (TARGET_VSX && TARGET_VSX_SCALAR_DOUBLE)
2857 {
2858 rs6000_vector_unit[DFmode] = VECTOR_VSX;
2859 rs6000_vector_align[DFmode] = 64;
2860 }
2861
2862 /* SFmode, see if we want to use the VSX unit. */
2863 if (TARGET_P8_VECTOR && TARGET_VSX_SCALAR_FLOAT)
2864 {
2865 rs6000_vector_unit[SFmode] = VECTOR_VSX;
2866 rs6000_vector_align[SFmode] = 32;
2867 }
2868
2869 /* Allow TImode in VSX register and set the VSX memory macros. */
2870 if (TARGET_VSX && TARGET_VSX_TIMODE)
2871 {
2872 rs6000_vector_mem[TImode] = VECTOR_VSX;
2873 rs6000_vector_align[TImode] = align64;
2874 }
2875
2876 /* TODO add SPE and paired floating point vector support. */
2877
2878 /* Register class constraints for the constraints that depend on compile
2879 switches. When the VSX code was added, different constraints were added
2880 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
2881 of the VSX registers are used. The register classes for scalar floating
2882 point types is set, based on whether we allow that type into the upper
2883 (Altivec) registers. GCC has register classes to target the Altivec
2884 registers for load/store operations, to select using a VSX memory
2885 operation instead of the traditional floating point operation. The
2886 constraints are:
2887
2888 d - Register class to use with traditional DFmode instructions.
2889 f - Register class to use with traditional SFmode instructions.
2890 v - Altivec register.
2891 wa - Any VSX register.
2892 wc - Reserved to represent individual CR bits (used in LLVM).
2893 wd - Preferred register class for V2DFmode.
2894 wf - Preferred register class for V4SFmode.
2895 wg - Float register for power6x move insns.
2896 wh - FP register for direct move instructions.
2897 wi - FP or VSX register to hold 64-bit integers for VSX insns.
2898 wj - FP or VSX register to hold 64-bit integers for direct moves.
2899 wk - FP or VSX register to hold 64-bit doubles for direct moves.
2900 wl - Float register if we can do 32-bit signed int loads.
2901 wm - VSX register for ISA 2.07 direct move operations.
2902 wn - always NO_REGS.
2903 wr - GPR if 64-bit mode is permitted.
2904 ws - Register class to do ISA 2.06 DF operations.
2905 wt - VSX register for TImode in VSX registers.
2906 wu - Altivec register for ISA 2.07 VSX SF/SI load/stores.
2907 wv - Altivec register for ISA 2.06 VSX DF/DI load/stores.
2908 ww - Register class to do SF conversions in with VSX operations.
2909 wx - Float register if we can do 32-bit int stores.
2910 wy - Register class to do ISA 2.07 SF operations.
2911 wz - Float register if we can do 32-bit unsigned int loads. */
2912
2913 if (TARGET_HARD_FLOAT && TARGET_FPRS)
2914 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */
2915
2916 if (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
2917 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */
2918
2919 if (TARGET_VSX)
2920 {
2921 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
2922 rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS; /* V2DFmode */
2923 rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS; /* V4SFmode */
2924 rs6000_constraints[RS6000_CONSTRAINT_wi] = FLOAT_REGS; /* DImode */
2925
2926 if (TARGET_VSX_TIMODE)
2927 rs6000_constraints[RS6000_CONSTRAINT_wt] = VSX_REGS; /* TImode */
2928
2929 if (TARGET_UPPER_REGS_DF) /* DFmode */
2930 {
2931 rs6000_constraints[RS6000_CONSTRAINT_ws] = VSX_REGS;
2932 rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS;
2933 }
2934 else
2935 rs6000_constraints[RS6000_CONSTRAINT_ws] = FLOAT_REGS;
2936 }
2937
2938 /* Add conditional constraints based on various options, to allow us to
2939 collapse multiple insn patterns. */
2940 if (TARGET_ALTIVEC)
2941 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
2942
2943 if (TARGET_MFPGPR) /* DFmode */
2944 rs6000_constraints[RS6000_CONSTRAINT_wg] = FLOAT_REGS;
2945
2946 if (TARGET_LFIWAX)
2947 rs6000_constraints[RS6000_CONSTRAINT_wl] = FLOAT_REGS; /* DImode */
2948
2949 if (TARGET_DIRECT_MOVE)
2950 {
2951 rs6000_constraints[RS6000_CONSTRAINT_wh] = FLOAT_REGS;
2952 rs6000_constraints[RS6000_CONSTRAINT_wj] /* DImode */
2953 = rs6000_constraints[RS6000_CONSTRAINT_wi];
2954 rs6000_constraints[RS6000_CONSTRAINT_wk] /* DFmode */
2955 = rs6000_constraints[RS6000_CONSTRAINT_ws];
2956 rs6000_constraints[RS6000_CONSTRAINT_wm] = VSX_REGS;
2957 }
2958
2959 if (TARGET_POWERPC64)
2960 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
2961
2962 if (TARGET_P8_VECTOR && TARGET_UPPER_REGS_SF) /* SFmode */
2963 {
2964 rs6000_constraints[RS6000_CONSTRAINT_wu] = ALTIVEC_REGS;
2965 rs6000_constraints[RS6000_CONSTRAINT_wy] = VSX_REGS;
2966 rs6000_constraints[RS6000_CONSTRAINT_ww] = VSX_REGS;
2967 }
2968 else if (TARGET_P8_VECTOR)
2969 {
2970 rs6000_constraints[RS6000_CONSTRAINT_wy] = FLOAT_REGS;
2971 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
2972 }
2973 else if (TARGET_VSX)
2974 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
2975
2976 if (TARGET_STFIWX)
2977 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
2978
2979 if (TARGET_LFIWZX)
2980 rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS; /* DImode */
2981
2982 if (TARGET_FLOAT128)
2983 {
2984 rs6000_constraints[RS6000_CONSTRAINT_wq] = VSX_REGS; /* KFmode */
2985 if (FLOAT128_IEEE_P (TFmode))
2986 rs6000_constraints[RS6000_CONSTRAINT_wp] = VSX_REGS; /* TFmode */
2987 }
2988
2989 /* Set up the reload helper and direct move functions. */
2990 if (TARGET_VSX || TARGET_ALTIVEC)
2991 {
2992 if (TARGET_64BIT)
2993 {
2994 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
2995 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
2996 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
2997 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
2998 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
2999 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
3000 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
3001 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
3002 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
3003 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
3004 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
3005 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
3006 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
3007 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
3008 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store;
3009 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load;
3010 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
3011 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
3012 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
3013 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
3014 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
3015 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
3016
3017 if (FLOAT128_IEEE_P (TFmode))
3018 {
3019 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store;
3020 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load;
3021 }
3022
3023 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3024 available. */
3025 if (TARGET_NO_SDMODE_STACK)
3026 {
3027 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
3028 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
3029 }
3030
3031 if (TARGET_VSX_TIMODE)
3032 {
3033 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
3034 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
3035 }
3036
3037 if (TARGET_DIRECT_MOVE)
3038 {
3039 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
3040 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
3041 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
3042 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
3043 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
3044 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
3045 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
3046 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
3047 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
3048
3049 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
3050 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
3051 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
3052 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
3053 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
3054 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
3055 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
3056 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
3057 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
3058 }
3059 }
3060 else
3061 {
3062 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
3063 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
3064 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
3065 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
3066 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
3067 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
3068 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
3069 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
3070 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
3071 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
3072 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
3073 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
3074 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
3075 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
3076 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store;
3077 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load;
3078 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
3079 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
3080 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
3081 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
3082 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
3083 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
3084
3085 if (FLOAT128_IEEE_P (TFmode))
3086 {
3087 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store;
3088 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load;
3089 }
3090
3091 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3092 available. */
3093 if (TARGET_NO_SDMODE_STACK)
3094 {
3095 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
3096 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
3097 }
3098
3099 if (TARGET_VSX_TIMODE)
3100 {
3101 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
3102 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
3103 }
3104
3105 if (TARGET_DIRECT_MOVE)
3106 {
3107 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
3108 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
3109 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
3110 }
3111 }
3112
3113 if (TARGET_UPPER_REGS_DF)
3114 reg_addr[DFmode].scalar_in_vmx_p = true;
3115
3116 if (TARGET_UPPER_REGS_SF)
3117 reg_addr[SFmode].scalar_in_vmx_p = true;
3118 }
3119
3120 /* Setup the fusion operations. */
3121 if (TARGET_P8_FUSION)
3122 {
3123 reg_addr[QImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_qi;
3124 reg_addr[HImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_hi;
3125 reg_addr[SImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_si;
3126 if (TARGET_64BIT)
3127 reg_addr[DImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_di;
3128 }
3129
3130 if (TARGET_P9_FUSION)
3131 {
3132 struct fuse_insns {
3133 enum machine_mode mode; /* mode of the fused type. */
3134 enum machine_mode pmode; /* pointer mode. */
3135 enum rs6000_reload_reg_type rtype; /* register type. */
3136 enum insn_code load; /* load insn. */
3137 enum insn_code store; /* store insn. */
3138 };
3139
3140 static const struct fuse_insns addis_insns[] = {
3141 { SFmode, DImode, RELOAD_REG_FPR,
3142 CODE_FOR_fusion_fpr_di_sf_load,
3143 CODE_FOR_fusion_fpr_di_sf_store },
3144
3145 { SFmode, SImode, RELOAD_REG_FPR,
3146 CODE_FOR_fusion_fpr_si_sf_load,
3147 CODE_FOR_fusion_fpr_si_sf_store },
3148
3149 { DFmode, DImode, RELOAD_REG_FPR,
3150 CODE_FOR_fusion_fpr_di_df_load,
3151 CODE_FOR_fusion_fpr_di_df_store },
3152
3153 { DFmode, SImode, RELOAD_REG_FPR,
3154 CODE_FOR_fusion_fpr_si_df_load,
3155 CODE_FOR_fusion_fpr_si_df_store },
3156
3157 { DImode, DImode, RELOAD_REG_FPR,
3158 CODE_FOR_fusion_fpr_di_di_load,
3159 CODE_FOR_fusion_fpr_di_di_store },
3160
3161 { DImode, SImode, RELOAD_REG_FPR,
3162 CODE_FOR_fusion_fpr_si_di_load,
3163 CODE_FOR_fusion_fpr_si_di_store },
3164
3165 { QImode, DImode, RELOAD_REG_GPR,
3166 CODE_FOR_fusion_gpr_di_qi_load,
3167 CODE_FOR_fusion_gpr_di_qi_store },
3168
3169 { QImode, SImode, RELOAD_REG_GPR,
3170 CODE_FOR_fusion_gpr_si_qi_load,
3171 CODE_FOR_fusion_gpr_si_qi_store },
3172
3173 { HImode, DImode, RELOAD_REG_GPR,
3174 CODE_FOR_fusion_gpr_di_hi_load,
3175 CODE_FOR_fusion_gpr_di_hi_store },
3176
3177 { HImode, SImode, RELOAD_REG_GPR,
3178 CODE_FOR_fusion_gpr_si_hi_load,
3179 CODE_FOR_fusion_gpr_si_hi_store },
3180
3181 { SImode, DImode, RELOAD_REG_GPR,
3182 CODE_FOR_fusion_gpr_di_si_load,
3183 CODE_FOR_fusion_gpr_di_si_store },
3184
3185 { SImode, SImode, RELOAD_REG_GPR,
3186 CODE_FOR_fusion_gpr_si_si_load,
3187 CODE_FOR_fusion_gpr_si_si_store },
3188
3189 { SFmode, DImode, RELOAD_REG_GPR,
3190 CODE_FOR_fusion_gpr_di_sf_load,
3191 CODE_FOR_fusion_gpr_di_sf_store },
3192
3193 { SFmode, SImode, RELOAD_REG_GPR,
3194 CODE_FOR_fusion_gpr_si_sf_load,
3195 CODE_FOR_fusion_gpr_si_sf_store },
3196
3197 { DImode, DImode, RELOAD_REG_GPR,
3198 CODE_FOR_fusion_gpr_di_di_load,
3199 CODE_FOR_fusion_gpr_di_di_store },
3200
3201 { DFmode, DImode, RELOAD_REG_GPR,
3202 CODE_FOR_fusion_gpr_di_df_load,
3203 CODE_FOR_fusion_gpr_di_df_store },
3204 };
3205
3206 enum machine_mode cur_pmode = Pmode;
3207 size_t i;
3208
3209 for (i = 0; i < ARRAY_SIZE (addis_insns); i++)
3210 {
3211 enum machine_mode xmode = addis_insns[i].mode;
3212 enum rs6000_reload_reg_type rtype = addis_insns[i].rtype;
3213
3214 if (addis_insns[i].pmode != cur_pmode)
3215 continue;
3216
3217 if (rtype == RELOAD_REG_FPR
3218 && (!TARGET_HARD_FLOAT || !TARGET_FPRS))
3219 continue;
3220
3221 reg_addr[xmode].fusion_addis_ld[rtype] = addis_insns[i].load;
3222 reg_addr[xmode].fusion_addis_st[rtype] = addis_insns[i].store;
3223 }
3224 }
3225
3226 /* Note which types we support fusing TOC setup plus memory insn. We only do
3227 fused TOCs for medium/large code models. */
3228 if (TARGET_P8_FUSION && TARGET_TOC_FUSION && TARGET_POWERPC64
3229 && (TARGET_CMODEL != CMODEL_SMALL))
3230 {
3231 reg_addr[QImode].fused_toc = true;
3232 reg_addr[HImode].fused_toc = true;
3233 reg_addr[SImode].fused_toc = true;
3234 reg_addr[DImode].fused_toc = true;
3235 if (TARGET_HARD_FLOAT && TARGET_FPRS)
3236 {
3237 if (TARGET_SINGLE_FLOAT)
3238 reg_addr[SFmode].fused_toc = true;
3239 if (TARGET_DOUBLE_FLOAT)
3240 reg_addr[DFmode].fused_toc = true;
3241 }
3242 }
3243
3244 /* Precalculate HARD_REGNO_NREGS. */
3245 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
3246 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3247 rs6000_hard_regno_nregs[m][r]
3248 = rs6000_hard_regno_nregs_internal (r, (machine_mode)m);
3249
3250 /* Precalculate HARD_REGNO_MODE_OK. */
3251 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
3252 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3253 if (rs6000_hard_regno_mode_ok (r, (machine_mode)m))
3254 rs6000_hard_regno_mode_ok_p[m][r] = true;
3255
3256 /* Precalculate CLASS_MAX_NREGS sizes. */
3257 for (c = 0; c < LIM_REG_CLASSES; ++c)
3258 {
3259 int reg_size;
3260
3261 if (TARGET_VSX && VSX_REG_CLASS_P (c))
3262 reg_size = UNITS_PER_VSX_WORD;
3263
3264 else if (c == ALTIVEC_REGS)
3265 reg_size = UNITS_PER_ALTIVEC_WORD;
3266
3267 else if (c == FLOAT_REGS)
3268 reg_size = UNITS_PER_FP_WORD;
3269
3270 else
3271 reg_size = UNITS_PER_WORD;
3272
3273 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3274 {
3275 machine_mode m2 = (machine_mode)m;
3276 int reg_size2 = reg_size;
3277
3278 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3279 in VSX. */
3280 if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m))
3281 reg_size2 = UNITS_PER_FP_WORD;
3282
3283 rs6000_class_max_nregs[m][c]
3284 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3285 }
3286 }
3287
3288 if (TARGET_E500_DOUBLE)
3289 rs6000_class_max_nregs[DFmode][GENERAL_REGS] = 1;
3290
3291 /* Calculate which modes to automatically generate code to use a the
3292 reciprocal divide and square root instructions. In the future, possibly
3293 automatically generate the instructions even if the user did not specify
3294 -mrecip. The older machines double precision reciprocal sqrt estimate is
3295 not accurate enough. */
3296 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3297 if (TARGET_FRES)
3298 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3299 if (TARGET_FRE)
3300 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3301 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3302 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3303 if (VECTOR_UNIT_VSX_P (V2DFmode))
3304 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3305
3306 if (TARGET_FRSQRTES)
3307 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3308 if (TARGET_FRSQRTE)
3309 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3310 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3311 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3312 if (VECTOR_UNIT_VSX_P (V2DFmode))
3313 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3314
3315 if (rs6000_recip_control)
3316 {
3317 if (!flag_finite_math_only)
3318 warning (0, "-mrecip requires -ffinite-math or -ffast-math");
3319 if (flag_trapping_math)
3320 warning (0, "-mrecip requires -fno-trapping-math or -ffast-math");
3321 if (!flag_reciprocal_math)
3322 warning (0, "-mrecip requires -freciprocal-math or -ffast-math");
3323 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3324 {
3325 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3326 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3327 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3328
3329 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3330 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3331 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3332
3333 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3334 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3335 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3336
3337 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3338 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3339 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3340
3341 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3342 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3343 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3344
3345 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3346 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3347 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3348
3349 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3350 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3351 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3352
3353 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3354 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3355 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3356 }
3357 }
3358
3359 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3360 legitimate address support to figure out the appropriate addressing to
3361 use. */
3362 rs6000_setup_reg_addr_masks ();
3363
3364 if (global_init_p || TARGET_DEBUG_TARGET)
3365 {
3366 if (TARGET_DEBUG_REG)
3367 rs6000_debug_reg_global ();
3368
3369 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3370 fprintf (stderr,
3371 "SImode variable mult cost = %d\n"
3372 "SImode constant mult cost = %d\n"
3373 "SImode short constant mult cost = %d\n"
3374 "DImode multipliciation cost = %d\n"
3375 "SImode division cost = %d\n"
3376 "DImode division cost = %d\n"
3377 "Simple fp operation cost = %d\n"
3378 "DFmode multiplication cost = %d\n"
3379 "SFmode division cost = %d\n"
3380 "DFmode division cost = %d\n"
3381 "cache line size = %d\n"
3382 "l1 cache size = %d\n"
3383 "l2 cache size = %d\n"
3384 "simultaneous prefetches = %d\n"
3385 "\n",
3386 rs6000_cost->mulsi,
3387 rs6000_cost->mulsi_const,
3388 rs6000_cost->mulsi_const9,
3389 rs6000_cost->muldi,
3390 rs6000_cost->divsi,
3391 rs6000_cost->divdi,
3392 rs6000_cost->fp,
3393 rs6000_cost->dmul,
3394 rs6000_cost->sdiv,
3395 rs6000_cost->ddiv,
3396 rs6000_cost->cache_line_size,
3397 rs6000_cost->l1_cache_size,
3398 rs6000_cost->l2_cache_size,
3399 rs6000_cost->simultaneous_prefetches);
3400 }
3401 }
3402
3403 #if TARGET_MACHO
3404 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3405
3406 static void
3407 darwin_rs6000_override_options (void)
3408 {
3409 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3410 off. */
3411 rs6000_altivec_abi = 1;
3412 TARGET_ALTIVEC_VRSAVE = 1;
3413 rs6000_current_abi = ABI_DARWIN;
3414
3415 if (DEFAULT_ABI == ABI_DARWIN
3416 && TARGET_64BIT)
3417 darwin_one_byte_bool = 1;
3418
3419 if (TARGET_64BIT && ! TARGET_POWERPC64)
3420 {
3421 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3422 warning (0, "-m64 requires PowerPC64 architecture, enabling");
3423 }
3424 if (flag_mkernel)
3425 {
3426 rs6000_default_long_calls = 1;
3427 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3428 }
3429
3430 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3431 Altivec. */
3432 if (!flag_mkernel && !flag_apple_kext
3433 && TARGET_64BIT
3434 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3435 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3436
3437 /* Unless the user (not the configurer) has explicitly overridden
3438 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3439 G4 unless targeting the kernel. */
3440 if (!flag_mkernel
3441 && !flag_apple_kext
3442 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3443 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3444 && ! global_options_set.x_rs6000_cpu_index)
3445 {
3446 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3447 }
3448 }
3449 #endif
3450
3451 /* If not otherwise specified by a target, make 'long double' equivalent to
3452 'double'. */
3453
3454 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3455 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3456 #endif
3457
3458 /* Return the builtin mask of the various options used that could affect which
3459 builtins were used. In the past we used target_flags, but we've run out of
3460 bits, and some options like SPE and PAIRED are no longer in
3461 target_flags. */
3462
3463 HOST_WIDE_INT
3464 rs6000_builtin_mask_calculate (void)
3465 {
3466 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
3467 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
3468 | ((TARGET_SPE) ? RS6000_BTM_SPE : 0)
3469 | ((TARGET_PAIRED_FLOAT) ? RS6000_BTM_PAIRED : 0)
3470 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
3471 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
3472 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
3473 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
3474 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
3475 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
3476 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
3477 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
3478 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
3479 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
3480 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
3481 | ((TARGET_LONG_DOUBLE_128) ? RS6000_BTM_LDBL128 : 0));
3482 }
3483
3484 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3485 to clobber the XER[CA] bit because clobbering that bit without telling
3486 the compiler worked just fine with versions of GCC before GCC 5, and
3487 breaking a lot of older code in ways that are hard to track down is
3488 not such a great idea. */
3489
3490 static rtx_insn *
3491 rs6000_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
3492 vec<const char *> &/*constraints*/,
3493 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
3494 {
3495 clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
3496 SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
3497 return NULL;
3498 }
3499
3500 /* Override command line options. Mostly we process the processor type and
3501 sometimes adjust other TARGET_ options. */
3502
3503 static bool
3504 rs6000_option_override_internal (bool global_init_p)
3505 {
3506 bool ret = true;
3507 bool have_cpu = false;
3508
3509 /* The default cpu requested at configure time, if any. */
3510 const char *implicit_cpu = OPTION_TARGET_CPU_DEFAULT;
3511
3512 HOST_WIDE_INT set_masks;
3513 int cpu_index;
3514 int tune_index;
3515 struct cl_target_option *main_target_opt
3516 = ((global_init_p || target_option_default_node == NULL)
3517 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3518
3519 /* Print defaults. */
3520 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
3521 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
3522
3523 /* Remember the explicit arguments. */
3524 if (global_init_p)
3525 rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags;
3526
3527 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3528 library functions, so warn about it. The flag may be useful for
3529 performance studies from time to time though, so don't disable it
3530 entirely. */
3531 if (global_options_set.x_rs6000_alignment_flags
3532 && rs6000_alignment_flags == MASK_ALIGN_POWER
3533 && DEFAULT_ABI == ABI_DARWIN
3534 && TARGET_64BIT)
3535 warning (0, "-malign-power is not supported for 64-bit Darwin;"
3536 " it is incompatible with the installed C and C++ libraries");
3537
3538 /* Numerous experiment shows that IRA based loop pressure
3539 calculation works better for RTL loop invariant motion on targets
3540 with enough (>= 32) registers. It is an expensive optimization.
3541 So it is on only for peak performance. */
3542 if (optimize >= 3 && global_init_p
3543 && !global_options_set.x_flag_ira_loop_pressure)
3544 flag_ira_loop_pressure = 1;
3545
3546 /* Set the pointer size. */
3547 if (TARGET_64BIT)
3548 {
3549 rs6000_pmode = (int)DImode;
3550 rs6000_pointer_size = 64;
3551 }
3552 else
3553 {
3554 rs6000_pmode = (int)SImode;
3555 rs6000_pointer_size = 32;
3556 }
3557
3558 /* Some OSs don't support saving the high part of 64-bit registers on context
3559 switch. Other OSs don't support saving Altivec registers. On those OSs,
3560 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3561 if the user wants either, the user must explicitly specify them and we
3562 won't interfere with the user's specification. */
3563
3564 set_masks = POWERPC_MASKS;
3565 #ifdef OS_MISSING_POWERPC64
3566 if (OS_MISSING_POWERPC64)
3567 set_masks &= ~OPTION_MASK_POWERPC64;
3568 #endif
3569 #ifdef OS_MISSING_ALTIVEC
3570 if (OS_MISSING_ALTIVEC)
3571 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX);
3572 #endif
3573
3574 /* Don't override by the processor default if given explicitly. */
3575 set_masks &= ~rs6000_isa_flags_explicit;
3576
3577 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3578 the cpu in a target attribute or pragma, but did not specify a tuning
3579 option, use the cpu for the tuning option rather than the option specified
3580 with -mtune on the command line. Process a '--with-cpu' configuration
3581 request as an implicit --cpu. */
3582 if (rs6000_cpu_index >= 0)
3583 {
3584 cpu_index = rs6000_cpu_index;
3585 have_cpu = true;
3586 }
3587 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3588 {
3589 rs6000_cpu_index = cpu_index = main_target_opt->x_rs6000_cpu_index;
3590 have_cpu = true;
3591 }
3592 else if (implicit_cpu)
3593 {
3594 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (implicit_cpu);
3595 have_cpu = true;
3596 }
3597 else
3598 {
3599 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3600 const char *default_cpu = ((!TARGET_POWERPC64)
3601 ? "powerpc"
3602 : ((BYTES_BIG_ENDIAN)
3603 ? "powerpc64"
3604 : "powerpc64le"));
3605
3606 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (default_cpu);
3607 have_cpu = false;
3608 }
3609
3610 gcc_assert (cpu_index >= 0);
3611
3612 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3613 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3614 with those from the cpu, except for options that were explicitly set. If
3615 we don't have a cpu, do not override the target bits set in
3616 TARGET_DEFAULT. */
3617 if (have_cpu)
3618 {
3619 rs6000_isa_flags &= ~set_masks;
3620 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3621 & set_masks);
3622 }
3623 else
3624 {
3625 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3626 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3627 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3628 to using rs6000_isa_flags, we need to do the initialization here.
3629
3630 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
3631 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
3632 HOST_WIDE_INT flags = ((TARGET_DEFAULT) ? TARGET_DEFAULT
3633 : processor_target_table[cpu_index].target_enable);
3634 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
3635 }
3636
3637 if (rs6000_tune_index >= 0)
3638 tune_index = rs6000_tune_index;
3639 else if (have_cpu)
3640 {
3641 /* Until power9 tuning is available, use power8 tuning if -mcpu=power9. */
3642 if (processor_target_table[cpu_index].processor != PROCESSOR_POWER9)
3643 rs6000_tune_index = tune_index = cpu_index;
3644 else
3645 {
3646 size_t i;
3647 tune_index = -1;
3648 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3649 if (processor_target_table[i].processor == PROCESSOR_POWER8)
3650 {
3651 rs6000_tune_index = tune_index = i;
3652 break;
3653 }
3654 }
3655 }
3656 else
3657 {
3658 size_t i;
3659 enum processor_type tune_proc
3660 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
3661
3662 tune_index = -1;
3663 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3664 if (processor_target_table[i].processor == tune_proc)
3665 {
3666 rs6000_tune_index = tune_index = i;
3667 break;
3668 }
3669 }
3670
3671 gcc_assert (tune_index >= 0);
3672 rs6000_cpu = processor_target_table[tune_index].processor;
3673
3674 /* Pick defaults for SPE related control flags. Do this early to make sure
3675 that the TARGET_ macros are representative ASAP. */
3676 {
3677 int spe_capable_cpu =
3678 (rs6000_cpu == PROCESSOR_PPC8540
3679 || rs6000_cpu == PROCESSOR_PPC8548);
3680
3681 if (!global_options_set.x_rs6000_spe_abi)
3682 rs6000_spe_abi = spe_capable_cpu;
3683
3684 if (!global_options_set.x_rs6000_spe)
3685 rs6000_spe = spe_capable_cpu;
3686
3687 if (!global_options_set.x_rs6000_float_gprs)
3688 rs6000_float_gprs =
3689 (rs6000_cpu == PROCESSOR_PPC8540 ? 1
3690 : rs6000_cpu == PROCESSOR_PPC8548 ? 2
3691 : 0);
3692 }
3693
3694 if (global_options_set.x_rs6000_spe_abi
3695 && rs6000_spe_abi
3696 && !TARGET_SPE_ABI)
3697 error ("not configured for SPE ABI");
3698
3699 if (global_options_set.x_rs6000_spe
3700 && rs6000_spe
3701 && !TARGET_SPE)
3702 error ("not configured for SPE instruction set");
3703
3704 if (main_target_opt != NULL
3705 && ((main_target_opt->x_rs6000_spe_abi != rs6000_spe_abi)
3706 || (main_target_opt->x_rs6000_spe != rs6000_spe)
3707 || (main_target_opt->x_rs6000_float_gprs != rs6000_float_gprs)))
3708 error ("target attribute or pragma changes SPE ABI");
3709
3710 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
3711 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
3712 || rs6000_cpu == PROCESSOR_PPCE5500)
3713 {
3714 if (TARGET_ALTIVEC)
3715 error ("AltiVec not supported in this target");
3716 if (TARGET_SPE)
3717 error ("SPE not supported in this target");
3718 }
3719 if (rs6000_cpu == PROCESSOR_PPCE6500)
3720 {
3721 if (TARGET_SPE)
3722 error ("SPE not supported in this target");
3723 }
3724
3725 /* Disable Cell microcode if we are optimizing for the Cell
3726 and not optimizing for size. */
3727 if (rs6000_gen_cell_microcode == -1)
3728 rs6000_gen_cell_microcode = !(rs6000_cpu == PROCESSOR_CELL
3729 && !optimize_size);
3730
3731 /* If we are optimizing big endian systems for space and it's OK to
3732 use instructions that would be microcoded on the Cell, use the
3733 load/store multiple and string instructions. */
3734 if (BYTES_BIG_ENDIAN && optimize_size && rs6000_gen_cell_microcode)
3735 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & (OPTION_MASK_MULTIPLE
3736 | OPTION_MASK_STRING);
3737
3738 /* Don't allow -mmultiple or -mstring on little endian systems
3739 unless the cpu is a 750, because the hardware doesn't support the
3740 instructions used in little endian mode, and causes an alignment
3741 trap. The 750 does not cause an alignment trap (except when the
3742 target is unaligned). */
3743
3744 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750)
3745 {
3746 if (TARGET_MULTIPLE)
3747 {
3748 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
3749 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
3750 warning (0, "-mmultiple is not supported on little endian systems");
3751 }
3752
3753 if (TARGET_STRING)
3754 {
3755 rs6000_isa_flags &= ~OPTION_MASK_STRING;
3756 if ((rs6000_isa_flags_explicit & OPTION_MASK_STRING) != 0)
3757 warning (0, "-mstring is not supported on little endian systems");
3758 }
3759 }
3760
3761 /* If little-endian, default to -mstrict-align on older processors.
3762 Testing for htm matches power8 and later. */
3763 if (!BYTES_BIG_ENDIAN
3764 && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
3765 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
3766
3767 /* -maltivec={le,be} implies -maltivec. */
3768 if (rs6000_altivec_element_order != 0)
3769 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3770
3771 /* Disallow -maltivec=le in big endian mode for now. This is not
3772 known to be useful for anyone. */
3773 if (BYTES_BIG_ENDIAN && rs6000_altivec_element_order == 1)
3774 {
3775 warning (0, N_("-maltivec=le not allowed for big-endian targets"));
3776 rs6000_altivec_element_order = 0;
3777 }
3778
3779 /* Add some warnings for VSX. */
3780 if (TARGET_VSX)
3781 {
3782 const char *msg = NULL;
3783 if (!TARGET_HARD_FLOAT || !TARGET_FPRS
3784 || !TARGET_SINGLE_FLOAT || !TARGET_DOUBLE_FLOAT)
3785 {
3786 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3787 msg = N_("-mvsx requires hardware floating point");
3788 else
3789 {
3790 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3791 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3792 }
3793 }
3794 else if (TARGET_PAIRED_FLOAT)
3795 msg = N_("-mvsx and -mpaired are incompatible");
3796 else if (TARGET_AVOID_XFORM > 0)
3797 msg = N_("-mvsx needs indexed addressing");
3798 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
3799 & OPTION_MASK_ALTIVEC))
3800 {
3801 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3802 msg = N_("-mvsx and -mno-altivec are incompatible");
3803 else
3804 msg = N_("-mno-altivec disables vsx");
3805 }
3806
3807 if (msg)
3808 {
3809 warning (0, msg);
3810 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3811 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3812 }
3813 }
3814
3815 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
3816 the -mcpu setting to enable options that conflict. */
3817 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
3818 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
3819 | OPTION_MASK_ALTIVEC
3820 | OPTION_MASK_VSX)) != 0)
3821 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
3822 | OPTION_MASK_DIRECT_MOVE)
3823 & ~rs6000_isa_flags_explicit);
3824
3825 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3826 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
3827
3828 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
3829 unless the user explicitly used the -mno-<option> to disable the code. */
3830 if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_DFORM || TARGET_P9_MINMAX)
3831 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~rs6000_isa_flags_explicit);
3832 else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
3833 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~rs6000_isa_flags_explicit);
3834 else if (TARGET_VSX)
3835 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~rs6000_isa_flags_explicit);
3836 else if (TARGET_POPCNTD)
3837 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
3838 else if (TARGET_DFP)
3839 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~rs6000_isa_flags_explicit);
3840 else if (TARGET_CMPB)
3841 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
3842 else if (TARGET_FPRND)
3843 rs6000_isa_flags |= (ISA_2_4_MASKS & ~rs6000_isa_flags_explicit);
3844 else if (TARGET_POPCNTB)
3845 rs6000_isa_flags |= (ISA_2_2_MASKS & ~rs6000_isa_flags_explicit);
3846 else if (TARGET_ALTIVEC)
3847 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~rs6000_isa_flags_explicit);
3848
3849 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
3850 {
3851 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
3852 error ("-mcrypto requires -maltivec");
3853 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
3854 }
3855
3856 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
3857 {
3858 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
3859 error ("-mdirect-move requires -mvsx");
3860 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
3861 }
3862
3863 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
3864 {
3865 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3866 error ("-mpower8-vector requires -maltivec");
3867 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3868 }
3869
3870 if (TARGET_P8_VECTOR && !TARGET_VSX)
3871 {
3872 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3873 error ("-mpower8-vector requires -mvsx");
3874 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3875 }
3876
3877 if (TARGET_VSX_TIMODE && !TARGET_VSX)
3878 {
3879 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE)
3880 error ("-mvsx-timode requires -mvsx");
3881 rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE;
3882 }
3883
3884 if (TARGET_DFP && !TARGET_HARD_FLOAT)
3885 {
3886 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
3887 error ("-mhard-dfp requires -mhard-float");
3888 rs6000_isa_flags &= ~OPTION_MASK_DFP;
3889 }
3890
3891 /* Allow an explicit -mupper-regs to set both -mupper-regs-df and
3892 -mupper-regs-sf, depending on the cpu, unless the user explicitly also set
3893 the individual option. */
3894 if (TARGET_UPPER_REGS > 0)
3895 {
3896 if (TARGET_VSX
3897 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
3898 {
3899 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_DF;
3900 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
3901 }
3902 if (TARGET_P8_VECTOR
3903 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
3904 {
3905 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_SF;
3906 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
3907 }
3908 }
3909 else if (TARGET_UPPER_REGS == 0)
3910 {
3911 if (TARGET_VSX
3912 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
3913 {
3914 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
3915 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
3916 }
3917 if (TARGET_P8_VECTOR
3918 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
3919 {
3920 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
3921 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
3922 }
3923 }
3924
3925 if (TARGET_UPPER_REGS_DF && !TARGET_VSX)
3926 {
3927 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)
3928 error ("-mupper-regs-df requires -mvsx");
3929 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
3930 }
3931
3932 if (TARGET_UPPER_REGS_SF && !TARGET_P8_VECTOR)
3933 {
3934 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)
3935 error ("-mupper-regs-sf requires -mpower8-vector");
3936 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
3937 }
3938
3939 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
3940 silently turn off quad memory mode. */
3941 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
3942 {
3943 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3944 warning (0, N_("-mquad-memory requires 64-bit mode"));
3945
3946 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
3947 warning (0, N_("-mquad-memory-atomic requires 64-bit mode"));
3948
3949 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
3950 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
3951 }
3952
3953 /* Non-atomic quad memory load/store are disabled for little endian, since
3954 the words are reversed, but atomic operations can still be done by
3955 swapping the words. */
3956 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
3957 {
3958 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3959 warning (0, N_("-mquad-memory is not available in little endian mode"));
3960
3961 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
3962 }
3963
3964 /* Assume if the user asked for normal quad memory instructions, they want
3965 the atomic versions as well, unless they explicity told us not to use quad
3966 word atomic instructions. */
3967 if (TARGET_QUAD_MEMORY
3968 && !TARGET_QUAD_MEMORY_ATOMIC
3969 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
3970 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
3971
3972 /* Enable power8 fusion if we are tuning for power8, even if we aren't
3973 generating power8 instructions. */
3974 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
3975 rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
3976 & OPTION_MASK_P8_FUSION);
3977
3978 /* Setting additional fusion flags turns on base fusion. */
3979 if (!TARGET_P8_FUSION && (TARGET_P8_FUSION_SIGN || TARGET_TOC_FUSION))
3980 {
3981 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
3982 {
3983 if (TARGET_P8_FUSION_SIGN)
3984 error ("-mpower8-fusion-sign requires -mpower8-fusion");
3985
3986 if (TARGET_TOC_FUSION)
3987 error ("-mtoc-fusion requires -mpower8-fusion");
3988
3989 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
3990 }
3991 else
3992 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
3993 }
3994
3995 /* Power9 fusion is a superset over power8 fusion. */
3996 if (TARGET_P9_FUSION && !TARGET_P8_FUSION)
3997 {
3998 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
3999 {
4000 error ("-mpower9-fusion requires -mpower8-fusion");
4001 rs6000_isa_flags &= ~OPTION_MASK_P9_FUSION;
4002 }
4003 else
4004 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4005 }
4006
4007 /* Enable power9 fusion if we are tuning for power9, even if we aren't
4008 generating power9 instructions. */
4009 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_FUSION))
4010 rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
4011 & OPTION_MASK_P9_FUSION);
4012
4013 /* Power8 does not fuse sign extended loads with the addis. If we are
4014 optimizing at high levels for speed, convert a sign extended load into a
4015 zero extending load, and an explicit sign extension. */
4016 if (TARGET_P8_FUSION
4017 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
4018 && optimize_function_for_speed_p (cfun)
4019 && optimize >= 3)
4020 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
4021
4022 /* TOC fusion requires 64-bit and medium/large code model. */
4023 if (TARGET_TOC_FUSION && !TARGET_POWERPC64)
4024 {
4025 rs6000_isa_flags &= ~OPTION_MASK_TOC_FUSION;
4026 if ((rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION) != 0)
4027 warning (0, N_("-mtoc-fusion requires 64-bit"));
4028 }
4029
4030 if (TARGET_TOC_FUSION && (TARGET_CMODEL == CMODEL_SMALL))
4031 {
4032 rs6000_isa_flags &= ~OPTION_MASK_TOC_FUSION;
4033 if ((rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION) != 0)
4034 warning (0, N_("-mtoc-fusion requires medium/large code model"));
4035 }
4036
4037 /* Turn on -mtoc-fusion by default if p8-fusion and 64-bit medium/large code
4038 model. */
4039 if (TARGET_P8_FUSION && !TARGET_TOC_FUSION && TARGET_POWERPC64
4040 && (TARGET_CMODEL != CMODEL_SMALL)
4041 && !(rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION))
4042 rs6000_isa_flags |= OPTION_MASK_TOC_FUSION;
4043
4044 /* ISA 3.0 D-form instructions require p9-vector and upper-regs. */
4045 if (TARGET_P9_DFORM && !TARGET_P9_VECTOR)
4046 {
4047 if (rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR)
4048 error ("-mpower9-dform requires -mpower9-vector");
4049 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM;
4050 }
4051
4052 if (TARGET_P9_DFORM && !TARGET_UPPER_REGS_DF)
4053 {
4054 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)
4055 error ("-mpower9-dform requires -mupper-regs-df");
4056 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM;
4057 }
4058
4059 if (TARGET_P9_DFORM && !TARGET_UPPER_REGS_SF)
4060 {
4061 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)
4062 error ("-mpower9-dform requires -mupper-regs-sf");
4063 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM;
4064 }
4065
4066 /* ISA 3.0 vector instructions include ISA 2.07. */
4067 if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
4068 {
4069 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4070 error ("-mpower9-vector requires -mpower8-vector");
4071 rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR;
4072 }
4073
4074 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4075 support. If we only have ISA 2.06 support, and the user did not specify
4076 the switch, leave it set to -1 so the movmisalign patterns are enabled,
4077 but we don't enable the full vectorization support */
4078 if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
4079 TARGET_ALLOW_MOVMISALIGN = 1;
4080
4081 else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
4082 {
4083 if (TARGET_ALLOW_MOVMISALIGN > 0)
4084 error ("-mallow-movmisalign requires -mvsx");
4085
4086 TARGET_ALLOW_MOVMISALIGN = 0;
4087 }
4088
4089 /* Determine when unaligned vector accesses are permitted, and when
4090 they are preferred over masked Altivec loads. Note that if
4091 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4092 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
4093 not true. */
4094 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4095 {
4096 if (!TARGET_VSX)
4097 {
4098 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4099 error ("-mefficient-unaligned-vsx requires -mvsx");
4100
4101 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4102 }
4103
4104 else if (!TARGET_ALLOW_MOVMISALIGN)
4105 {
4106 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4107 error ("-mefficient-unaligned-vsx requires -mallow-movmisalign");
4108
4109 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4110 }
4111 }
4112
4113 /* __float128 requires VSX support. */
4114 if (TARGET_FLOAT128 && !TARGET_VSX)
4115 {
4116 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128) != 0)
4117 error ("-mfloat128 requires VSX support");
4118
4119 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128 | OPTION_MASK_FLOAT128_HW);
4120 }
4121
4122 /* IEEE 128-bit floating point hardware instructions imply enabling
4123 __float128. */
4124 if (TARGET_FLOAT128_HW
4125 && (rs6000_isa_flags & (OPTION_MASK_P9_VECTOR
4126 | OPTION_MASK_DIRECT_MOVE
4127 | OPTION_MASK_UPPER_REGS_DF
4128 | OPTION_MASK_UPPER_REGS_SF)) == 0)
4129 {
4130 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4131 error ("-mfloat128-hardware requires full ISA 3.0 support");
4132
4133 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4134 }
4135
4136 else if (TARGET_P9_VECTOR && !TARGET_FLOAT128_HW
4137 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) == 0)
4138 rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
4139
4140 if (TARGET_FLOAT128_HW
4141 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128) == 0)
4142 rs6000_isa_flags |= OPTION_MASK_FLOAT128;
4143
4144 /* Print the options after updating the defaults. */
4145 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4146 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
4147
4148 /* E500mc does "better" if we inline more aggressively. Respect the
4149 user's opinion, though. */
4150 if (rs6000_block_move_inline_limit == 0
4151 && (rs6000_cpu == PROCESSOR_PPCE500MC
4152 || rs6000_cpu == PROCESSOR_PPCE500MC64
4153 || rs6000_cpu == PROCESSOR_PPCE5500
4154 || rs6000_cpu == PROCESSOR_PPCE6500))
4155 rs6000_block_move_inline_limit = 128;
4156
4157 /* store_one_arg depends on expand_block_move to handle at least the
4158 size of reg_parm_stack_space. */
4159 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
4160 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
4161
4162 if (global_init_p)
4163 {
4164 /* If the appropriate debug option is enabled, replace the target hooks
4165 with debug versions that call the real version and then prints
4166 debugging information. */
4167 if (TARGET_DEBUG_COST)
4168 {
4169 targetm.rtx_costs = rs6000_debug_rtx_costs;
4170 targetm.address_cost = rs6000_debug_address_cost;
4171 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
4172 }
4173
4174 if (TARGET_DEBUG_ADDR)
4175 {
4176 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
4177 targetm.legitimize_address = rs6000_debug_legitimize_address;
4178 rs6000_secondary_reload_class_ptr
4179 = rs6000_debug_secondary_reload_class;
4180 rs6000_secondary_memory_needed_ptr
4181 = rs6000_debug_secondary_memory_needed;
4182 rs6000_cannot_change_mode_class_ptr
4183 = rs6000_debug_cannot_change_mode_class;
4184 rs6000_preferred_reload_class_ptr
4185 = rs6000_debug_preferred_reload_class;
4186 rs6000_legitimize_reload_address_ptr
4187 = rs6000_debug_legitimize_reload_address;
4188 rs6000_mode_dependent_address_ptr
4189 = rs6000_debug_mode_dependent_address;
4190 }
4191
4192 if (rs6000_veclibabi_name)
4193 {
4194 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
4195 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
4196 else
4197 {
4198 error ("unknown vectorization library ABI type (%s) for "
4199 "-mveclibabi= switch", rs6000_veclibabi_name);
4200 ret = false;
4201 }
4202 }
4203 }
4204
4205 if (!global_options_set.x_rs6000_long_double_type_size)
4206 {
4207 if (main_target_opt != NULL
4208 && (main_target_opt->x_rs6000_long_double_type_size
4209 != RS6000_DEFAULT_LONG_DOUBLE_SIZE))
4210 error ("target attribute or pragma changes long double size");
4211 else
4212 rs6000_long_double_type_size = RS6000_DEFAULT_LONG_DOUBLE_SIZE;
4213 }
4214
4215 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
4216 if (!global_options_set.x_rs6000_ieeequad)
4217 rs6000_ieeequad = 1;
4218 #endif
4219
4220 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
4221 target attribute or pragma which automatically enables both options,
4222 unless the altivec ABI was set. This is set by default for 64-bit, but
4223 not for 32-bit. */
4224 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4225 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC
4226 | OPTION_MASK_FLOAT128)
4227 & ~rs6000_isa_flags_explicit);
4228
4229 /* Enable Altivec ABI for AIX -maltivec. */
4230 if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX))
4231 {
4232 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4233 error ("target attribute or pragma changes AltiVec ABI");
4234 else
4235 rs6000_altivec_abi = 1;
4236 }
4237
4238 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4239 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4240 be explicitly overridden in either case. */
4241 if (TARGET_ELF)
4242 {
4243 if (!global_options_set.x_rs6000_altivec_abi
4244 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
4245 {
4246 if (main_target_opt != NULL &&
4247 !main_target_opt->x_rs6000_altivec_abi)
4248 error ("target attribute or pragma changes AltiVec ABI");
4249 else
4250 rs6000_altivec_abi = 1;
4251 }
4252 }
4253
4254 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4255 So far, the only darwin64 targets are also MACH-O. */
4256 if (TARGET_MACHO
4257 && DEFAULT_ABI == ABI_DARWIN
4258 && TARGET_64BIT)
4259 {
4260 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
4261 error ("target attribute or pragma changes darwin64 ABI");
4262 else
4263 {
4264 rs6000_darwin64_abi = 1;
4265 /* Default to natural alignment, for better performance. */
4266 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
4267 }
4268 }
4269
4270 /* Place FP constants in the constant pool instead of TOC
4271 if section anchors enabled. */
4272 if (flag_section_anchors
4273 && !global_options_set.x_TARGET_NO_FP_IN_TOC)
4274 TARGET_NO_FP_IN_TOC = 1;
4275
4276 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4277 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
4278
4279 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4280 SUBTARGET_OVERRIDE_OPTIONS;
4281 #endif
4282 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4283 SUBSUBTARGET_OVERRIDE_OPTIONS;
4284 #endif
4285 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4286 SUB3TARGET_OVERRIDE_OPTIONS;
4287 #endif
4288
4289 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4290 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
4291
4292 /* For the E500 family of cores, reset the single/double FP flags to let us
4293 check that they remain constant across attributes or pragmas. Also,
4294 clear a possible request for string instructions, not supported and which
4295 we might have silently queried above for -Os.
4296
4297 For other families, clear ISEL in case it was set implicitly.
4298 */
4299
4300 switch (rs6000_cpu)
4301 {
4302 case PROCESSOR_PPC8540:
4303 case PROCESSOR_PPC8548:
4304 case PROCESSOR_PPCE500MC:
4305 case PROCESSOR_PPCE500MC64:
4306 case PROCESSOR_PPCE5500:
4307 case PROCESSOR_PPCE6500:
4308
4309 rs6000_single_float = TARGET_E500_SINGLE || TARGET_E500_DOUBLE;
4310 rs6000_double_float = TARGET_E500_DOUBLE;
4311
4312 rs6000_isa_flags &= ~OPTION_MASK_STRING;
4313
4314 break;
4315
4316 default:
4317
4318 if (have_cpu && !(rs6000_isa_flags_explicit & OPTION_MASK_ISEL))
4319 rs6000_isa_flags &= ~OPTION_MASK_ISEL;
4320
4321 break;
4322 }
4323
4324 if (main_target_opt)
4325 {
4326 if (main_target_opt->x_rs6000_single_float != rs6000_single_float)
4327 error ("target attribute or pragma changes single precision floating "
4328 "point");
4329 if (main_target_opt->x_rs6000_double_float != rs6000_double_float)
4330 error ("target attribute or pragma changes double precision floating "
4331 "point");
4332 }
4333
4334 /* Detect invalid option combinations with E500. */
4335 CHECK_E500_OPTIONS;
4336
4337 rs6000_always_hint = (rs6000_cpu != PROCESSOR_POWER4
4338 && rs6000_cpu != PROCESSOR_POWER5
4339 && rs6000_cpu != PROCESSOR_POWER6
4340 && rs6000_cpu != PROCESSOR_POWER7
4341 && rs6000_cpu != PROCESSOR_POWER8
4342 && rs6000_cpu != PROCESSOR_POWER9
4343 && rs6000_cpu != PROCESSOR_PPCA2
4344 && rs6000_cpu != PROCESSOR_CELL
4345 && rs6000_cpu != PROCESSOR_PPC476);
4346 rs6000_sched_groups = (rs6000_cpu == PROCESSOR_POWER4
4347 || rs6000_cpu == PROCESSOR_POWER5
4348 || rs6000_cpu == PROCESSOR_POWER7
4349 || rs6000_cpu == PROCESSOR_POWER8
4350 || rs6000_cpu == PROCESSOR_POWER9);
4351 rs6000_align_branch_targets = (rs6000_cpu == PROCESSOR_POWER4
4352 || rs6000_cpu == PROCESSOR_POWER5
4353 || rs6000_cpu == PROCESSOR_POWER6
4354 || rs6000_cpu == PROCESSOR_POWER7
4355 || rs6000_cpu == PROCESSOR_POWER8
4356 || rs6000_cpu == PROCESSOR_POWER9
4357 || rs6000_cpu == PROCESSOR_PPCE500MC
4358 || rs6000_cpu == PROCESSOR_PPCE500MC64
4359 || rs6000_cpu == PROCESSOR_PPCE5500
4360 || rs6000_cpu == PROCESSOR_PPCE6500);
4361
4362 /* Allow debug switches to override the above settings. These are set to -1
4363 in rs6000.opt to indicate the user hasn't directly set the switch. */
4364 if (TARGET_ALWAYS_HINT >= 0)
4365 rs6000_always_hint = TARGET_ALWAYS_HINT;
4366
4367 if (TARGET_SCHED_GROUPS >= 0)
4368 rs6000_sched_groups = TARGET_SCHED_GROUPS;
4369
4370 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
4371 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
4372
4373 rs6000_sched_restricted_insns_priority
4374 = (rs6000_sched_groups ? 1 : 0);
4375
4376 /* Handle -msched-costly-dep option. */
4377 rs6000_sched_costly_dep
4378 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
4379
4380 if (rs6000_sched_costly_dep_str)
4381 {
4382 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
4383 rs6000_sched_costly_dep = no_dep_costly;
4384 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
4385 rs6000_sched_costly_dep = all_deps_costly;
4386 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
4387 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
4388 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
4389 rs6000_sched_costly_dep = store_to_load_dep_costly;
4390 else
4391 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
4392 atoi (rs6000_sched_costly_dep_str));
4393 }
4394
4395 /* Handle -minsert-sched-nops option. */
4396 rs6000_sched_insert_nops
4397 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
4398
4399 if (rs6000_sched_insert_nops_str)
4400 {
4401 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
4402 rs6000_sched_insert_nops = sched_finish_none;
4403 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
4404 rs6000_sched_insert_nops = sched_finish_pad_groups;
4405 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
4406 rs6000_sched_insert_nops = sched_finish_regroup_exact;
4407 else
4408 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
4409 atoi (rs6000_sched_insert_nops_str));
4410 }
4411
4412 if (global_init_p)
4413 {
4414 #ifdef TARGET_REGNAMES
4415 /* If the user desires alternate register names, copy in the
4416 alternate names now. */
4417 if (TARGET_REGNAMES)
4418 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
4419 #endif
4420
4421 /* Set aix_struct_return last, after the ABI is determined.
4422 If -maix-struct-return or -msvr4-struct-return was explicitly
4423 used, don't override with the ABI default. */
4424 if (!global_options_set.x_aix_struct_return)
4425 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
4426
4427 #if 0
4428 /* IBM XL compiler defaults to unsigned bitfields. */
4429 if (TARGET_XL_COMPAT)
4430 flag_signed_bitfields = 0;
4431 #endif
4432
4433 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
4434 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
4435
4436 if (TARGET_TOC)
4437 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
4438
4439 /* We can only guarantee the availability of DI pseudo-ops when
4440 assembling for 64-bit targets. */
4441 if (!TARGET_64BIT)
4442 {
4443 targetm.asm_out.aligned_op.di = NULL;
4444 targetm.asm_out.unaligned_op.di = NULL;
4445 }
4446
4447
4448 /* Set branch target alignment, if not optimizing for size. */
4449 if (!optimize_size)
4450 {
4451 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4452 aligned 8byte to avoid misprediction by the branch predictor. */
4453 if (rs6000_cpu == PROCESSOR_TITAN
4454 || rs6000_cpu == PROCESSOR_CELL)
4455 {
4456 if (align_functions <= 0)
4457 align_functions = 8;
4458 if (align_jumps <= 0)
4459 align_jumps = 8;
4460 if (align_loops <= 0)
4461 align_loops = 8;
4462 }
4463 if (rs6000_align_branch_targets)
4464 {
4465 if (align_functions <= 0)
4466 align_functions = 16;
4467 if (align_jumps <= 0)
4468 align_jumps = 16;
4469 if (align_loops <= 0)
4470 {
4471 can_override_loop_align = 1;
4472 align_loops = 16;
4473 }
4474 }
4475 if (align_jumps_max_skip <= 0)
4476 align_jumps_max_skip = 15;
4477 if (align_loops_max_skip <= 0)
4478 align_loops_max_skip = 15;
4479 }
4480
4481 /* Arrange to save and restore machine status around nested functions. */
4482 init_machine_status = rs6000_init_machine_status;
4483
4484 /* We should always be splitting complex arguments, but we can't break
4485 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
4486 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
4487 targetm.calls.split_complex_arg = NULL;
4488 }
4489
4490 /* Initialize rs6000_cost with the appropriate target costs. */
4491 if (optimize_size)
4492 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
4493 else
4494 switch (rs6000_cpu)
4495 {
4496 case PROCESSOR_RS64A:
4497 rs6000_cost = &rs64a_cost;
4498 break;
4499
4500 case PROCESSOR_MPCCORE:
4501 rs6000_cost = &mpccore_cost;
4502 break;
4503
4504 case PROCESSOR_PPC403:
4505 rs6000_cost = &ppc403_cost;
4506 break;
4507
4508 case PROCESSOR_PPC405:
4509 rs6000_cost = &ppc405_cost;
4510 break;
4511
4512 case PROCESSOR_PPC440:
4513 rs6000_cost = &ppc440_cost;
4514 break;
4515
4516 case PROCESSOR_PPC476:
4517 rs6000_cost = &ppc476_cost;
4518 break;
4519
4520 case PROCESSOR_PPC601:
4521 rs6000_cost = &ppc601_cost;
4522 break;
4523
4524 case PROCESSOR_PPC603:
4525 rs6000_cost = &ppc603_cost;
4526 break;
4527
4528 case PROCESSOR_PPC604:
4529 rs6000_cost = &ppc604_cost;
4530 break;
4531
4532 case PROCESSOR_PPC604e:
4533 rs6000_cost = &ppc604e_cost;
4534 break;
4535
4536 case PROCESSOR_PPC620:
4537 rs6000_cost = &ppc620_cost;
4538 break;
4539
4540 case PROCESSOR_PPC630:
4541 rs6000_cost = &ppc630_cost;
4542 break;
4543
4544 case PROCESSOR_CELL:
4545 rs6000_cost = &ppccell_cost;
4546 break;
4547
4548 case PROCESSOR_PPC750:
4549 case PROCESSOR_PPC7400:
4550 rs6000_cost = &ppc750_cost;
4551 break;
4552
4553 case PROCESSOR_PPC7450:
4554 rs6000_cost = &ppc7450_cost;
4555 break;
4556
4557 case PROCESSOR_PPC8540:
4558 case PROCESSOR_PPC8548:
4559 rs6000_cost = &ppc8540_cost;
4560 break;
4561
4562 case PROCESSOR_PPCE300C2:
4563 case PROCESSOR_PPCE300C3:
4564 rs6000_cost = &ppce300c2c3_cost;
4565 break;
4566
4567 case PROCESSOR_PPCE500MC:
4568 rs6000_cost = &ppce500mc_cost;
4569 break;
4570
4571 case PROCESSOR_PPCE500MC64:
4572 rs6000_cost = &ppce500mc64_cost;
4573 break;
4574
4575 case PROCESSOR_PPCE5500:
4576 rs6000_cost = &ppce5500_cost;
4577 break;
4578
4579 case PROCESSOR_PPCE6500:
4580 rs6000_cost = &ppce6500_cost;
4581 break;
4582
4583 case PROCESSOR_TITAN:
4584 rs6000_cost = &titan_cost;
4585 break;
4586
4587 case PROCESSOR_POWER4:
4588 case PROCESSOR_POWER5:
4589 rs6000_cost = &power4_cost;
4590 break;
4591
4592 case PROCESSOR_POWER6:
4593 rs6000_cost = &power6_cost;
4594 break;
4595
4596 case PROCESSOR_POWER7:
4597 rs6000_cost = &power7_cost;
4598 break;
4599
4600 case PROCESSOR_POWER8:
4601 rs6000_cost = &power8_cost;
4602 break;
4603
4604 case PROCESSOR_POWER9:
4605 rs6000_cost = &power9_cost;
4606 break;
4607
4608 case PROCESSOR_PPCA2:
4609 rs6000_cost = &ppca2_cost;
4610 break;
4611
4612 default:
4613 gcc_unreachable ();
4614 }
4615
4616 if (global_init_p)
4617 {
4618 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4619 rs6000_cost->simultaneous_prefetches,
4620 global_options.x_param_values,
4621 global_options_set.x_param_values);
4622 maybe_set_param_value (PARAM_L1_CACHE_SIZE, rs6000_cost->l1_cache_size,
4623 global_options.x_param_values,
4624 global_options_set.x_param_values);
4625 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4626 rs6000_cost->cache_line_size,
4627 global_options.x_param_values,
4628 global_options_set.x_param_values);
4629 maybe_set_param_value (PARAM_L2_CACHE_SIZE, rs6000_cost->l2_cache_size,
4630 global_options.x_param_values,
4631 global_options_set.x_param_values);
4632
4633 /* Increase loop peeling limits based on performance analysis. */
4634 maybe_set_param_value (PARAM_MAX_PEELED_INSNS, 400,
4635 global_options.x_param_values,
4636 global_options_set.x_param_values);
4637 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 400,
4638 global_options.x_param_values,
4639 global_options_set.x_param_values);
4640
4641 /* If using typedef char *va_list, signal that
4642 __builtin_va_start (&ap, 0) can be optimized to
4643 ap = __builtin_next_arg (0). */
4644 if (DEFAULT_ABI != ABI_V4)
4645 targetm.expand_builtin_va_start = NULL;
4646 }
4647
4648 /* Set up single/double float flags.
4649 If TARGET_HARD_FLOAT is set, but neither single or double is set,
4650 then set both flags. */
4651 if (TARGET_HARD_FLOAT && TARGET_FPRS
4652 && rs6000_single_float == 0 && rs6000_double_float == 0)
4653 rs6000_single_float = rs6000_double_float = 1;
4654
4655 /* If not explicitly specified via option, decide whether to generate indexed
4656 load/store instructions. */
4657 if (TARGET_AVOID_XFORM == -1)
4658 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4659 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4660 need indexed accesses and the type used is the scalar type of the element
4661 being loaded or stored. */
4662 TARGET_AVOID_XFORM = (rs6000_cpu == PROCESSOR_POWER6 && TARGET_CMPB
4663 && !TARGET_ALTIVEC);
4664
4665 /* Set the -mrecip options. */
4666 if (rs6000_recip_name)
4667 {
4668 char *p = ASTRDUP (rs6000_recip_name);
4669 char *q;
4670 unsigned int mask, i;
4671 bool invert;
4672
4673 while ((q = strtok (p, ",")) != NULL)
4674 {
4675 p = NULL;
4676 if (*q == '!')
4677 {
4678 invert = true;
4679 q++;
4680 }
4681 else
4682 invert = false;
4683
4684 if (!strcmp (q, "default"))
4685 mask = ((TARGET_RECIP_PRECISION)
4686 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
4687 else
4688 {
4689 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4690 if (!strcmp (q, recip_options[i].string))
4691 {
4692 mask = recip_options[i].mask;
4693 break;
4694 }
4695
4696 if (i == ARRAY_SIZE (recip_options))
4697 {
4698 error ("unknown option for -mrecip=%s", q);
4699 invert = false;
4700 mask = 0;
4701 ret = false;
4702 }
4703 }
4704
4705 if (invert)
4706 rs6000_recip_control &= ~mask;
4707 else
4708 rs6000_recip_control |= mask;
4709 }
4710 }
4711
4712 /* Set the builtin mask of the various options used that could affect which
4713 builtins were used. In the past we used target_flags, but we've run out
4714 of bits, and some options like SPE and PAIRED are no longer in
4715 target_flags. */
4716 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
4717 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
4718 rs6000_print_builtin_options (stderr, 0, "builtin mask",
4719 rs6000_builtin_mask);
4720
4721 /* Initialize all of the registers. */
4722 rs6000_init_hard_regno_mode_ok (global_init_p);
4723
4724 /* Save the initial options in case the user does function specific options */
4725 if (global_init_p)
4726 target_option_default_node = target_option_current_node
4727 = build_target_option_node (&global_options);
4728
4729 /* If not explicitly specified via option, decide whether to generate the
4730 extra blr's required to preserve the link stack on some cpus (eg, 476). */
4731 if (TARGET_LINK_STACK == -1)
4732 SET_TARGET_LINK_STACK (rs6000_cpu == PROCESSOR_PPC476 && flag_pic);
4733
4734 return ret;
4735 }
4736
4737 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
4738 define the target cpu type. */
4739
4740 static void
4741 rs6000_option_override (void)
4742 {
4743 (void) rs6000_option_override_internal (true);
4744
4745 /* Register machine-specific passes. This needs to be done at start-up.
4746 It's convenient to do it here (like i386 does). */
4747 opt_pass *pass_analyze_swaps = make_pass_analyze_swaps (g);
4748
4749 struct register_pass_info analyze_swaps_info
4750 = { pass_analyze_swaps, "cse1", 1, PASS_POS_INSERT_BEFORE };
4751
4752 register_pass (&analyze_swaps_info);
4753 }
4754
4755 \f
4756 /* Implement targetm.vectorize.builtin_mask_for_load. */
4757 static tree
4758 rs6000_builtin_mask_for_load (void)
4759 {
4760 /* Don't use lvsl/vperm for P8 and similarly efficient machines. */
4761 if ((TARGET_ALTIVEC && !TARGET_VSX)
4762 || (TARGET_VSX && !TARGET_EFFICIENT_UNALIGNED_VSX))
4763 return altivec_builtin_mask_for_load;
4764 else
4765 return 0;
4766 }
4767
4768 /* Implement LOOP_ALIGN. */
4769 int
4770 rs6000_loop_align (rtx label)
4771 {
4772 basic_block bb;
4773 int ninsns;
4774
4775 /* Don't override loop alignment if -falign-loops was specified. */
4776 if (!can_override_loop_align)
4777 return align_loops_log;
4778
4779 bb = BLOCK_FOR_INSN (label);
4780 ninsns = num_loop_insns(bb->loop_father);
4781
4782 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
4783 if (ninsns > 4 && ninsns <= 8
4784 && (rs6000_cpu == PROCESSOR_POWER4
4785 || rs6000_cpu == PROCESSOR_POWER5
4786 || rs6000_cpu == PROCESSOR_POWER6
4787 || rs6000_cpu == PROCESSOR_POWER7
4788 || rs6000_cpu == PROCESSOR_POWER8
4789 || rs6000_cpu == PROCESSOR_POWER9))
4790 return 5;
4791 else
4792 return align_loops_log;
4793 }
4794
4795 /* Implement TARGET_LOOP_ALIGN_MAX_SKIP. */
4796 static int
4797 rs6000_loop_align_max_skip (rtx_insn *label)
4798 {
4799 return (1 << rs6000_loop_align (label)) - 1;
4800 }
4801
4802 /* Return true iff, data reference of TYPE can reach vector alignment (16)
4803 after applying N number of iterations. This routine does not determine
4804 how may iterations are required to reach desired alignment. */
4805
4806 static bool
4807 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
4808 {
4809 if (is_packed)
4810 return false;
4811
4812 if (TARGET_32BIT)
4813 {
4814 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
4815 return true;
4816
4817 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
4818 return true;
4819
4820 return false;
4821 }
4822 else
4823 {
4824 if (TARGET_MACHO)
4825 return false;
4826
4827 /* Assuming that all other types are naturally aligned. CHECKME! */
4828 return true;
4829 }
4830 }
4831
4832 /* Return true if the vector misalignment factor is supported by the
4833 target. */
4834 static bool
4835 rs6000_builtin_support_vector_misalignment (machine_mode mode,
4836 const_tree type,
4837 int misalignment,
4838 bool is_packed)
4839 {
4840 if (TARGET_VSX)
4841 {
4842 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4843 return true;
4844
4845 /* Return if movmisalign pattern is not supported for this mode. */
4846 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
4847 return false;
4848
4849 if (misalignment == -1)
4850 {
4851 /* Misalignment factor is unknown at compile time but we know
4852 it's word aligned. */
4853 if (rs6000_vector_alignment_reachable (type, is_packed))
4854 {
4855 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
4856
4857 if (element_size == 64 || element_size == 32)
4858 return true;
4859 }
4860
4861 return false;
4862 }
4863
4864 /* VSX supports word-aligned vector. */
4865 if (misalignment % 4 == 0)
4866 return true;
4867 }
4868 return false;
4869 }
4870
4871 /* Implement targetm.vectorize.builtin_vectorization_cost. */
4872 static int
4873 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4874 tree vectype, int misalign)
4875 {
4876 unsigned elements;
4877 tree elem_type;
4878
4879 switch (type_of_cost)
4880 {
4881 case scalar_stmt:
4882 case scalar_load:
4883 case scalar_store:
4884 case vector_stmt:
4885 case vector_load:
4886 case vector_store:
4887 case vec_to_scalar:
4888 case scalar_to_vec:
4889 case cond_branch_not_taken:
4890 return 1;
4891
4892 case vec_perm:
4893 if (TARGET_VSX)
4894 return 3;
4895 else
4896 return 1;
4897
4898 case vec_promote_demote:
4899 if (TARGET_VSX)
4900 return 4;
4901 else
4902 return 1;
4903
4904 case cond_branch_taken:
4905 return 3;
4906
4907 case unaligned_load:
4908 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4909 return 1;
4910
4911 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4912 {
4913 elements = TYPE_VECTOR_SUBPARTS (vectype);
4914 if (elements == 2)
4915 /* Double word aligned. */
4916 return 2;
4917
4918 if (elements == 4)
4919 {
4920 switch (misalign)
4921 {
4922 case 8:
4923 /* Double word aligned. */
4924 return 2;
4925
4926 case -1:
4927 /* Unknown misalignment. */
4928 case 4:
4929 case 12:
4930 /* Word aligned. */
4931 return 22;
4932
4933 default:
4934 gcc_unreachable ();
4935 }
4936 }
4937 }
4938
4939 if (TARGET_ALTIVEC)
4940 /* Misaligned loads are not supported. */
4941 gcc_unreachable ();
4942
4943 return 2;
4944
4945 case unaligned_store:
4946 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4947 return 1;
4948
4949 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4950 {
4951 elements = TYPE_VECTOR_SUBPARTS (vectype);
4952 if (elements == 2)
4953 /* Double word aligned. */
4954 return 2;
4955
4956 if (elements == 4)
4957 {
4958 switch (misalign)
4959 {
4960 case 8:
4961 /* Double word aligned. */
4962 return 2;
4963
4964 case -1:
4965 /* Unknown misalignment. */
4966 case 4:
4967 case 12:
4968 /* Word aligned. */
4969 return 23;
4970
4971 default:
4972 gcc_unreachable ();
4973 }
4974 }
4975 }
4976
4977 if (TARGET_ALTIVEC)
4978 /* Misaligned stores are not supported. */
4979 gcc_unreachable ();
4980
4981 return 2;
4982
4983 case vec_construct:
4984 elements = TYPE_VECTOR_SUBPARTS (vectype);
4985 elem_type = TREE_TYPE (vectype);
4986 /* 32-bit vectors loaded into registers are stored as double
4987 precision, so we need n/2 converts in addition to the usual
4988 n/2 merges to construct a vector of short floats from them. */
4989 if (SCALAR_FLOAT_TYPE_P (elem_type)
4990 && TYPE_PRECISION (elem_type) == 32)
4991 return elements + 1;
4992 else
4993 return elements / 2 + 1;
4994
4995 default:
4996 gcc_unreachable ();
4997 }
4998 }
4999
5000 /* Implement targetm.vectorize.preferred_simd_mode. */
5001
5002 static machine_mode
5003 rs6000_preferred_simd_mode (machine_mode mode)
5004 {
5005 if (TARGET_VSX)
5006 switch (mode)
5007 {
5008 case DFmode:
5009 return V2DFmode;
5010 default:;
5011 }
5012 if (TARGET_ALTIVEC || TARGET_VSX)
5013 switch (mode)
5014 {
5015 case SFmode:
5016 return V4SFmode;
5017 case TImode:
5018 return V1TImode;
5019 case DImode:
5020 return V2DImode;
5021 case SImode:
5022 return V4SImode;
5023 case HImode:
5024 return V8HImode;
5025 case QImode:
5026 return V16QImode;
5027 default:;
5028 }
5029 if (TARGET_SPE)
5030 switch (mode)
5031 {
5032 case SFmode:
5033 return V2SFmode;
5034 case SImode:
5035 return V2SImode;
5036 default:;
5037 }
5038 if (TARGET_PAIRED_FLOAT
5039 && mode == SFmode)
5040 return V2SFmode;
5041 return word_mode;
5042 }
5043
5044 typedef struct _rs6000_cost_data
5045 {
5046 struct loop *loop_info;
5047 unsigned cost[3];
5048 } rs6000_cost_data;
5049
5050 /* Test for likely overcommitment of vector hardware resources. If a
5051 loop iteration is relatively large, and too large a percentage of
5052 instructions in the loop are vectorized, the cost model may not
5053 adequately reflect delays from unavailable vector resources.
5054 Penalize the loop body cost for this case. */
5055
5056 static void
5057 rs6000_density_test (rs6000_cost_data *data)
5058 {
5059 const int DENSITY_PCT_THRESHOLD = 85;
5060 const int DENSITY_SIZE_THRESHOLD = 70;
5061 const int DENSITY_PENALTY = 10;
5062 struct loop *loop = data->loop_info;
5063 basic_block *bbs = get_loop_body (loop);
5064 int nbbs = loop->num_nodes;
5065 int vec_cost = data->cost[vect_body], not_vec_cost = 0;
5066 int i, density_pct;
5067
5068 for (i = 0; i < nbbs; i++)
5069 {
5070 basic_block bb = bbs[i];
5071 gimple_stmt_iterator gsi;
5072
5073 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5074 {
5075 gimple *stmt = gsi_stmt (gsi);
5076 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5077
5078 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5079 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
5080 not_vec_cost++;
5081 }
5082 }
5083
5084 free (bbs);
5085 density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
5086
5087 if (density_pct > DENSITY_PCT_THRESHOLD
5088 && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
5089 {
5090 data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
5091 if (dump_enabled_p ())
5092 dump_printf_loc (MSG_NOTE, vect_location,
5093 "density %d%%, cost %d exceeds threshold, penalizing "
5094 "loop body cost by %d%%", density_pct,
5095 vec_cost + not_vec_cost, DENSITY_PENALTY);
5096 }
5097 }
5098
5099 /* Implement targetm.vectorize.init_cost. */
5100
5101 static void *
5102 rs6000_init_cost (struct loop *loop_info)
5103 {
5104 rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
5105 data->loop_info = loop_info;
5106 data->cost[vect_prologue] = 0;
5107 data->cost[vect_body] = 0;
5108 data->cost[vect_epilogue] = 0;
5109 return data;
5110 }
5111
5112 /* Implement targetm.vectorize.add_stmt_cost. */
5113
5114 static unsigned
5115 rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
5116 struct _stmt_vec_info *stmt_info, int misalign,
5117 enum vect_cost_model_location where)
5118 {
5119 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5120 unsigned retval = 0;
5121
5122 if (flag_vect_cost_model)
5123 {
5124 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
5125 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
5126 misalign);
5127 /* Statements in an inner loop relative to the loop being
5128 vectorized are weighted more heavily. The value here is
5129 arbitrary and could potentially be improved with analysis. */
5130 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
5131 count *= 50; /* FIXME. */
5132
5133 retval = (unsigned) (count * stmt_cost);
5134 cost_data->cost[where] += retval;
5135 }
5136
5137 return retval;
5138 }
5139
5140 /* Implement targetm.vectorize.finish_cost. */
5141
5142 static void
5143 rs6000_finish_cost (void *data, unsigned *prologue_cost,
5144 unsigned *body_cost, unsigned *epilogue_cost)
5145 {
5146 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5147
5148 if (cost_data->loop_info)
5149 rs6000_density_test (cost_data);
5150
5151 *prologue_cost = cost_data->cost[vect_prologue];
5152 *body_cost = cost_data->cost[vect_body];
5153 *epilogue_cost = cost_data->cost[vect_epilogue];
5154 }
5155
5156 /* Implement targetm.vectorize.destroy_cost_data. */
5157
5158 static void
5159 rs6000_destroy_cost_data (void *data)
5160 {
5161 free (data);
5162 }
5163
5164 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5165 library with vectorized intrinsics. */
5166
5167 static tree
5168 rs6000_builtin_vectorized_libmass (tree fndecl, tree type_out, tree type_in)
5169 {
5170 char name[32];
5171 const char *suffix = NULL;
5172 tree fntype, new_fndecl, bdecl = NULL_TREE;
5173 int n_args = 1;
5174 const char *bname;
5175 machine_mode el_mode, in_mode;
5176 int n, in_n;
5177
5178 /* Libmass is suitable for unsafe math only as it does not correctly support
5179 parts of IEEE with the required precision such as denormals. Only support
5180 it if we have VSX to use the simd d2 or f4 functions.
5181 XXX: Add variable length support. */
5182 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
5183 return NULL_TREE;
5184
5185 el_mode = TYPE_MODE (TREE_TYPE (type_out));
5186 n = TYPE_VECTOR_SUBPARTS (type_out);
5187 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5188 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5189 if (el_mode != in_mode
5190 || n != in_n)
5191 return NULL_TREE;
5192
5193 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
5194 {
5195 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
5196 switch (fn)
5197 {
5198 case BUILT_IN_ATAN2:
5199 case BUILT_IN_HYPOT:
5200 case BUILT_IN_POW:
5201 n_args = 2;
5202 /* fall through */
5203
5204 case BUILT_IN_ACOS:
5205 case BUILT_IN_ACOSH:
5206 case BUILT_IN_ASIN:
5207 case BUILT_IN_ASINH:
5208 case BUILT_IN_ATAN:
5209 case BUILT_IN_ATANH:
5210 case BUILT_IN_CBRT:
5211 case BUILT_IN_COS:
5212 case BUILT_IN_COSH:
5213 case BUILT_IN_ERF:
5214 case BUILT_IN_ERFC:
5215 case BUILT_IN_EXP2:
5216 case BUILT_IN_EXP:
5217 case BUILT_IN_EXPM1:
5218 case BUILT_IN_LGAMMA:
5219 case BUILT_IN_LOG10:
5220 case BUILT_IN_LOG1P:
5221 case BUILT_IN_LOG2:
5222 case BUILT_IN_LOG:
5223 case BUILT_IN_SIN:
5224 case BUILT_IN_SINH:
5225 case BUILT_IN_SQRT:
5226 case BUILT_IN_TAN:
5227 case BUILT_IN_TANH:
5228 bdecl = builtin_decl_implicit (fn);
5229 suffix = "d2"; /* pow -> powd2 */
5230 if (el_mode != DFmode
5231 || n != 2
5232 || !bdecl)
5233 return NULL_TREE;
5234 break;
5235
5236 case BUILT_IN_ATAN2F:
5237 case BUILT_IN_HYPOTF:
5238 case BUILT_IN_POWF:
5239 n_args = 2;
5240 /* fall through */
5241
5242 case BUILT_IN_ACOSF:
5243 case BUILT_IN_ACOSHF:
5244 case BUILT_IN_ASINF:
5245 case BUILT_IN_ASINHF:
5246 case BUILT_IN_ATANF:
5247 case BUILT_IN_ATANHF:
5248 case BUILT_IN_CBRTF:
5249 case BUILT_IN_COSF:
5250 case BUILT_IN_COSHF:
5251 case BUILT_IN_ERFF:
5252 case BUILT_IN_ERFCF:
5253 case BUILT_IN_EXP2F:
5254 case BUILT_IN_EXPF:
5255 case BUILT_IN_EXPM1F:
5256 case BUILT_IN_LGAMMAF:
5257 case BUILT_IN_LOG10F:
5258 case BUILT_IN_LOG1PF:
5259 case BUILT_IN_LOG2F:
5260 case BUILT_IN_LOGF:
5261 case BUILT_IN_SINF:
5262 case BUILT_IN_SINHF:
5263 case BUILT_IN_SQRTF:
5264 case BUILT_IN_TANF:
5265 case BUILT_IN_TANHF:
5266 bdecl = builtin_decl_implicit (fn);
5267 suffix = "4"; /* powf -> powf4 */
5268 if (el_mode != SFmode
5269 || n != 4
5270 || !bdecl)
5271 return NULL_TREE;
5272 break;
5273
5274 default:
5275 return NULL_TREE;
5276 }
5277 }
5278 else
5279 return NULL_TREE;
5280
5281 gcc_assert (suffix != NULL);
5282 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
5283 if (!bname)
5284 return NULL_TREE;
5285
5286 strcpy (name, bname + sizeof ("__builtin_") - 1);
5287 strcat (name, suffix);
5288
5289 if (n_args == 1)
5290 fntype = build_function_type_list (type_out, type_in, NULL);
5291 else if (n_args == 2)
5292 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
5293 else
5294 gcc_unreachable ();
5295
5296 /* Build a function declaration for the vectorized function. */
5297 new_fndecl = build_decl (BUILTINS_LOCATION,
5298 FUNCTION_DECL, get_identifier (name), fntype);
5299 TREE_PUBLIC (new_fndecl) = 1;
5300 DECL_EXTERNAL (new_fndecl) = 1;
5301 DECL_IS_NOVOPS (new_fndecl) = 1;
5302 TREE_READONLY (new_fndecl) = 1;
5303
5304 return new_fndecl;
5305 }
5306
5307 /* Returns a function decl for a vectorized version of the builtin function
5308 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5309 if it is not available. */
5310
5311 static tree
5312 rs6000_builtin_vectorized_function (tree fndecl, tree type_out,
5313 tree type_in)
5314 {
5315 machine_mode in_mode, out_mode;
5316 int in_n, out_n;
5317
5318 if (TARGET_DEBUG_BUILTIN)
5319 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5320 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
5321 GET_MODE_NAME (TYPE_MODE (type_out)),
5322 GET_MODE_NAME (TYPE_MODE (type_in)));
5323
5324 if (TREE_CODE (type_out) != VECTOR_TYPE
5325 || TREE_CODE (type_in) != VECTOR_TYPE
5326 || !TARGET_VECTORIZE_BUILTINS)
5327 return NULL_TREE;
5328
5329 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5330 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5331 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5332 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5333
5334 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
5335 {
5336 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
5337 switch (fn)
5338 {
5339 case BUILT_IN_CLZIMAX:
5340 case BUILT_IN_CLZLL:
5341 case BUILT_IN_CLZL:
5342 case BUILT_IN_CLZ:
5343 if (TARGET_P8_VECTOR && in_mode == out_mode && out_n == in_n)
5344 {
5345 if (out_mode == QImode && out_n == 16)
5346 return rs6000_builtin_decls[P8V_BUILTIN_VCLZB];
5347 else if (out_mode == HImode && out_n == 8)
5348 return rs6000_builtin_decls[P8V_BUILTIN_VCLZH];
5349 else if (out_mode == SImode && out_n == 4)
5350 return rs6000_builtin_decls[P8V_BUILTIN_VCLZW];
5351 else if (out_mode == DImode && out_n == 2)
5352 return rs6000_builtin_decls[P8V_BUILTIN_VCLZD];
5353 }
5354 break;
5355 case BUILT_IN_COPYSIGN:
5356 if (VECTOR_UNIT_VSX_P (V2DFmode)
5357 && out_mode == DFmode && out_n == 2
5358 && in_mode == DFmode && in_n == 2)
5359 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
5360 break;
5361 case BUILT_IN_COPYSIGNF:
5362 if (out_mode != SFmode || out_n != 4
5363 || in_mode != SFmode || in_n != 4)
5364 break;
5365 if (VECTOR_UNIT_VSX_P (V4SFmode))
5366 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
5367 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
5368 return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
5369 break;
5370 case BUILT_IN_POPCOUNTIMAX:
5371 case BUILT_IN_POPCOUNTLL:
5372 case BUILT_IN_POPCOUNTL:
5373 case BUILT_IN_POPCOUNT:
5374 if (TARGET_P8_VECTOR && in_mode == out_mode && out_n == in_n)
5375 {
5376 if (out_mode == QImode && out_n == 16)
5377 return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTB];
5378 else if (out_mode == HImode && out_n == 8)
5379 return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTH];
5380 else if (out_mode == SImode && out_n == 4)
5381 return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTW];
5382 else if (out_mode == DImode && out_n == 2)
5383 return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTD];
5384 }
5385 break;
5386 case BUILT_IN_SQRT:
5387 if (VECTOR_UNIT_VSX_P (V2DFmode)
5388 && out_mode == DFmode && out_n == 2
5389 && in_mode == DFmode && in_n == 2)
5390 return rs6000_builtin_decls[VSX_BUILTIN_XVSQRTDP];
5391 break;
5392 case BUILT_IN_SQRTF:
5393 if (VECTOR_UNIT_VSX_P (V4SFmode)
5394 && out_mode == SFmode && out_n == 4
5395 && in_mode == SFmode && in_n == 4)
5396 return rs6000_builtin_decls[VSX_BUILTIN_XVSQRTSP];
5397 break;
5398 case BUILT_IN_CEIL:
5399 if (VECTOR_UNIT_VSX_P (V2DFmode)
5400 && out_mode == DFmode && out_n == 2
5401 && in_mode == DFmode && in_n == 2)
5402 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
5403 break;
5404 case BUILT_IN_CEILF:
5405 if (out_mode != SFmode || out_n != 4
5406 || in_mode != SFmode || in_n != 4)
5407 break;
5408 if (VECTOR_UNIT_VSX_P (V4SFmode))
5409 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
5410 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
5411 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
5412 break;
5413 case BUILT_IN_FLOOR:
5414 if (VECTOR_UNIT_VSX_P (V2DFmode)
5415 && out_mode == DFmode && out_n == 2
5416 && in_mode == DFmode && in_n == 2)
5417 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
5418 break;
5419 case BUILT_IN_FLOORF:
5420 if (out_mode != SFmode || out_n != 4
5421 || in_mode != SFmode || in_n != 4)
5422 break;
5423 if (VECTOR_UNIT_VSX_P (V4SFmode))
5424 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
5425 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
5426 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
5427 break;
5428 case BUILT_IN_FMA:
5429 if (VECTOR_UNIT_VSX_P (V2DFmode)
5430 && out_mode == DFmode && out_n == 2
5431 && in_mode == DFmode && in_n == 2)
5432 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
5433 break;
5434 case BUILT_IN_FMAF:
5435 if (VECTOR_UNIT_VSX_P (V4SFmode)
5436 && out_mode == SFmode && out_n == 4
5437 && in_mode == SFmode && in_n == 4)
5438 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
5439 else if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5440 && out_mode == SFmode && out_n == 4
5441 && in_mode == SFmode && in_n == 4)
5442 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
5443 break;
5444 case BUILT_IN_TRUNC:
5445 if (VECTOR_UNIT_VSX_P (V2DFmode)
5446 && out_mode == DFmode && out_n == 2
5447 && in_mode == DFmode && in_n == 2)
5448 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
5449 break;
5450 case BUILT_IN_TRUNCF:
5451 if (out_mode != SFmode || out_n != 4
5452 || in_mode != SFmode || in_n != 4)
5453 break;
5454 if (VECTOR_UNIT_VSX_P (V4SFmode))
5455 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
5456 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
5457 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
5458 break;
5459 case BUILT_IN_NEARBYINT:
5460 if (VECTOR_UNIT_VSX_P (V2DFmode)
5461 && flag_unsafe_math_optimizations
5462 && out_mode == DFmode && out_n == 2
5463 && in_mode == DFmode && in_n == 2)
5464 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
5465 break;
5466 case BUILT_IN_NEARBYINTF:
5467 if (VECTOR_UNIT_VSX_P (V4SFmode)
5468 && flag_unsafe_math_optimizations
5469 && out_mode == SFmode && out_n == 4
5470 && in_mode == SFmode && in_n == 4)
5471 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
5472 break;
5473 case BUILT_IN_RINT:
5474 if (VECTOR_UNIT_VSX_P (V2DFmode)
5475 && !flag_trapping_math
5476 && out_mode == DFmode && out_n == 2
5477 && in_mode == DFmode && in_n == 2)
5478 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
5479 break;
5480 case BUILT_IN_RINTF:
5481 if (VECTOR_UNIT_VSX_P (V4SFmode)
5482 && !flag_trapping_math
5483 && out_mode == SFmode && out_n == 4
5484 && in_mode == SFmode && in_n == 4)
5485 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
5486 break;
5487 default:
5488 break;
5489 }
5490 }
5491
5492 else if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
5493 {
5494 enum rs6000_builtins fn
5495 = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
5496 switch (fn)
5497 {
5498 case RS6000_BUILTIN_RSQRTF:
5499 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5500 && out_mode == SFmode && out_n == 4
5501 && in_mode == SFmode && in_n == 4)
5502 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
5503 break;
5504 case RS6000_BUILTIN_RSQRT:
5505 if (VECTOR_UNIT_VSX_P (V2DFmode)
5506 && out_mode == DFmode && out_n == 2
5507 && in_mode == DFmode && in_n == 2)
5508 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
5509 break;
5510 case RS6000_BUILTIN_RECIPF:
5511 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5512 && out_mode == SFmode && out_n == 4
5513 && in_mode == SFmode && in_n == 4)
5514 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
5515 break;
5516 case RS6000_BUILTIN_RECIP:
5517 if (VECTOR_UNIT_VSX_P (V2DFmode)
5518 && out_mode == DFmode && out_n == 2
5519 && in_mode == DFmode && in_n == 2)
5520 return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
5521 break;
5522 default:
5523 break;
5524 }
5525 }
5526
5527 /* Generate calls to libmass if appropriate. */
5528 if (rs6000_veclib_handler)
5529 return rs6000_veclib_handler (fndecl, type_out, type_in);
5530
5531 return NULL_TREE;
5532 }
5533 \f
5534 /* Default CPU string for rs6000*_file_start functions. */
5535 static const char *rs6000_default_cpu;
5536
5537 /* Do anything needed at the start of the asm file. */
5538
5539 static void
5540 rs6000_file_start (void)
5541 {
5542 char buffer[80];
5543 const char *start = buffer;
5544 FILE *file = asm_out_file;
5545
5546 rs6000_default_cpu = TARGET_CPU_DEFAULT;
5547
5548 default_file_start ();
5549
5550 if (flag_verbose_asm)
5551 {
5552 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
5553
5554 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
5555 {
5556 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
5557 start = "";
5558 }
5559
5560 if (global_options_set.x_rs6000_cpu_index)
5561 {
5562 fprintf (file, "%s -mcpu=%s", start,
5563 processor_target_table[rs6000_cpu_index].name);
5564 start = "";
5565 }
5566
5567 if (global_options_set.x_rs6000_tune_index)
5568 {
5569 fprintf (file, "%s -mtune=%s", start,
5570 processor_target_table[rs6000_tune_index].name);
5571 start = "";
5572 }
5573
5574 if (PPC405_ERRATUM77)
5575 {
5576 fprintf (file, "%s PPC405CR_ERRATUM77", start);
5577 start = "";
5578 }
5579
5580 #ifdef USING_ELFOS_H
5581 switch (rs6000_sdata)
5582 {
5583 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
5584 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
5585 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
5586 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
5587 }
5588
5589 if (rs6000_sdata && g_switch_value)
5590 {
5591 fprintf (file, "%s -G %d", start,
5592 g_switch_value);
5593 start = "";
5594 }
5595 #endif
5596
5597 if (*start == '\0')
5598 putc ('\n', file);
5599 }
5600
5601 #ifdef USING_ELFOS_H
5602 if (rs6000_default_cpu == 0 || rs6000_default_cpu[0] == '\0'
5603 || !global_options_set.x_rs6000_cpu_index)
5604 {
5605 fputs ("\t.machine ", asm_out_file);
5606 if ((rs6000_isa_flags & OPTION_MASK_MODULO) != 0)
5607 fputs ("power9\n", asm_out_file);
5608 else if ((rs6000_isa_flags & OPTION_MASK_DIRECT_MOVE) != 0)
5609 fputs ("power8\n", asm_out_file);
5610 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTD) != 0)
5611 fputs ("power7\n", asm_out_file);
5612 else if ((rs6000_isa_flags & OPTION_MASK_CMPB) != 0)
5613 fputs ("power6\n", asm_out_file);
5614 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTB) != 0)
5615 fputs ("power5\n", asm_out_file);
5616 else if ((rs6000_isa_flags & OPTION_MASK_MFCRF) != 0)
5617 fputs ("power4\n", asm_out_file);
5618 else if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) != 0)
5619 fputs ("ppc64\n", asm_out_file);
5620 else
5621 fputs ("ppc\n", asm_out_file);
5622 }
5623 #endif
5624
5625 if (DEFAULT_ABI == ABI_ELFv2)
5626 fprintf (file, "\t.abiversion 2\n");
5627
5628 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2
5629 || (TARGET_ELF && flag_pic == 2))
5630 {
5631 switch_to_section (toc_section);
5632 switch_to_section (text_section);
5633 }
5634 }
5635
5636 \f
5637 /* Return nonzero if this function is known to have a null epilogue. */
5638
5639 int
5640 direct_return (void)
5641 {
5642 if (reload_completed)
5643 {
5644 rs6000_stack_t *info = rs6000_stack_info ();
5645
5646 if (info->first_gp_reg_save == 32
5647 && info->first_fp_reg_save == 64
5648 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
5649 && ! info->lr_save_p
5650 && ! info->cr_save_p
5651 && info->vrsave_size == 0
5652 && ! info->push_p)
5653 return 1;
5654 }
5655
5656 return 0;
5657 }
5658
5659 /* Return the number of instructions it takes to form a constant in an
5660 integer register. */
5661
5662 int
5663 num_insns_constant_wide (HOST_WIDE_INT value)
5664 {
5665 /* signed constant loadable with addi */
5666 if (((unsigned HOST_WIDE_INT) value + 0x8000) < 0x10000)
5667 return 1;
5668
5669 /* constant loadable with addis */
5670 else if ((value & 0xffff) == 0
5671 && (value >> 31 == -1 || value >> 31 == 0))
5672 return 1;
5673
5674 else if (TARGET_POWERPC64)
5675 {
5676 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
5677 HOST_WIDE_INT high = value >> 31;
5678
5679 if (high == 0 || high == -1)
5680 return 2;
5681
5682 high >>= 1;
5683
5684 if (low == 0)
5685 return num_insns_constant_wide (high) + 1;
5686 else if (high == 0)
5687 return num_insns_constant_wide (low) + 1;
5688 else
5689 return (num_insns_constant_wide (high)
5690 + num_insns_constant_wide (low) + 1);
5691 }
5692
5693 else
5694 return 2;
5695 }
5696
5697 int
5698 num_insns_constant (rtx op, machine_mode mode)
5699 {
5700 HOST_WIDE_INT low, high;
5701
5702 switch (GET_CODE (op))
5703 {
5704 case CONST_INT:
5705 if ((INTVAL (op) >> 31) != 0 && (INTVAL (op) >> 31) != -1
5706 && rs6000_is_valid_and_mask (op, mode))
5707 return 2;
5708 else
5709 return num_insns_constant_wide (INTVAL (op));
5710
5711 case CONST_WIDE_INT:
5712 {
5713 int i;
5714 int ins = CONST_WIDE_INT_NUNITS (op) - 1;
5715 for (i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
5716 ins += num_insns_constant_wide (CONST_WIDE_INT_ELT (op, i));
5717 return ins;
5718 }
5719
5720 case CONST_DOUBLE:
5721 if (mode == SFmode || mode == SDmode)
5722 {
5723 long l;
5724
5725 if (DECIMAL_FLOAT_MODE_P (mode))
5726 REAL_VALUE_TO_TARGET_DECIMAL32
5727 (*CONST_DOUBLE_REAL_VALUE (op), l);
5728 else
5729 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), l);
5730 return num_insns_constant_wide ((HOST_WIDE_INT) l);
5731 }
5732
5733 long l[2];
5734 if (DECIMAL_FLOAT_MODE_P (mode))
5735 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (op), l);
5736 else
5737 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (op), l);
5738 high = l[WORDS_BIG_ENDIAN == 0];
5739 low = l[WORDS_BIG_ENDIAN != 0];
5740
5741 if (TARGET_32BIT)
5742 return (num_insns_constant_wide (low)
5743 + num_insns_constant_wide (high));
5744 else
5745 {
5746 if ((high == 0 && low >= 0)
5747 || (high == -1 && low < 0))
5748 return num_insns_constant_wide (low);
5749
5750 else if (rs6000_is_valid_and_mask (op, mode))
5751 return 2;
5752
5753 else if (low == 0)
5754 return num_insns_constant_wide (high) + 1;
5755
5756 else
5757 return (num_insns_constant_wide (high)
5758 + num_insns_constant_wide (low) + 1);
5759 }
5760
5761 default:
5762 gcc_unreachable ();
5763 }
5764 }
5765
5766 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
5767 If the mode of OP is MODE_VECTOR_INT, this simply returns the
5768 corresponding element of the vector, but for V4SFmode and V2SFmode,
5769 the corresponding "float" is interpreted as an SImode integer. */
5770
5771 HOST_WIDE_INT
5772 const_vector_elt_as_int (rtx op, unsigned int elt)
5773 {
5774 rtx tmp;
5775
5776 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
5777 gcc_assert (GET_MODE (op) != V2DImode
5778 && GET_MODE (op) != V2DFmode);
5779
5780 tmp = CONST_VECTOR_ELT (op, elt);
5781 if (GET_MODE (op) == V4SFmode
5782 || GET_MODE (op) == V2SFmode)
5783 tmp = gen_lowpart (SImode, tmp);
5784 return INTVAL (tmp);
5785 }
5786
5787 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
5788 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
5789 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
5790 all items are set to the same value and contain COPIES replicas of the
5791 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
5792 operand and the others are set to the value of the operand's msb. */
5793
5794 static bool
5795 vspltis_constant (rtx op, unsigned step, unsigned copies)
5796 {
5797 machine_mode mode = GET_MODE (op);
5798 machine_mode inner = GET_MODE_INNER (mode);
5799
5800 unsigned i;
5801 unsigned nunits;
5802 unsigned bitsize;
5803 unsigned mask;
5804
5805 HOST_WIDE_INT val;
5806 HOST_WIDE_INT splat_val;
5807 HOST_WIDE_INT msb_val;
5808
5809 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
5810 return false;
5811
5812 nunits = GET_MODE_NUNITS (mode);
5813 bitsize = GET_MODE_BITSIZE (inner);
5814 mask = GET_MODE_MASK (inner);
5815
5816 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
5817 splat_val = val;
5818 msb_val = val >= 0 ? 0 : -1;
5819
5820 /* Construct the value to be splatted, if possible. If not, return 0. */
5821 for (i = 2; i <= copies; i *= 2)
5822 {
5823 HOST_WIDE_INT small_val;
5824 bitsize /= 2;
5825 small_val = splat_val >> bitsize;
5826 mask >>= bitsize;
5827 if (splat_val != ((small_val << bitsize) | (small_val & mask)))
5828 return false;
5829 splat_val = small_val;
5830 }
5831
5832 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
5833 if (EASY_VECTOR_15 (splat_val))
5834 ;
5835
5836 /* Also check if we can splat, and then add the result to itself. Do so if
5837 the value is positive, of if the splat instruction is using OP's mode;
5838 for splat_val < 0, the splat and the add should use the same mode. */
5839 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
5840 && (splat_val >= 0 || (step == 1 && copies == 1)))
5841 ;
5842
5843 /* Also check if are loading up the most significant bit which can be done by
5844 loading up -1 and shifting the value left by -1. */
5845 else if (EASY_VECTOR_MSB (splat_val, inner))
5846 ;
5847
5848 else
5849 return false;
5850
5851 /* Check if VAL is present in every STEP-th element, and the
5852 other elements are filled with its most significant bit. */
5853 for (i = 1; i < nunits; ++i)
5854 {
5855 HOST_WIDE_INT desired_val;
5856 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
5857 if ((i & (step - 1)) == 0)
5858 desired_val = val;
5859 else
5860 desired_val = msb_val;
5861
5862 if (desired_val != const_vector_elt_as_int (op, elt))
5863 return false;
5864 }
5865
5866 return true;
5867 }
5868
5869 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
5870 instruction, filling in the bottom elements with 0 or -1.
5871
5872 Return 0 if the constant cannot be generated with VSLDOI. Return positive
5873 for the number of zeroes to shift in, or negative for the number of 0xff
5874 bytes to shift in.
5875
5876 OP is a CONST_VECTOR. */
5877
5878 int
5879 vspltis_shifted (rtx op)
5880 {
5881 machine_mode mode = GET_MODE (op);
5882 machine_mode inner = GET_MODE_INNER (mode);
5883
5884 unsigned i, j;
5885 unsigned nunits;
5886 unsigned mask;
5887
5888 HOST_WIDE_INT val;
5889
5890 if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
5891 return false;
5892
5893 /* We need to create pseudo registers to do the shift, so don't recognize
5894 shift vector constants after reload. */
5895 if (!can_create_pseudo_p ())
5896 return false;
5897
5898 nunits = GET_MODE_NUNITS (mode);
5899 mask = GET_MODE_MASK (inner);
5900
5901 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
5902
5903 /* Check if the value can really be the operand of a vspltis[bhw]. */
5904 if (EASY_VECTOR_15 (val))
5905 ;
5906
5907 /* Also check if we are loading up the most significant bit which can be done
5908 by loading up -1 and shifting the value left by -1. */
5909 else if (EASY_VECTOR_MSB (val, inner))
5910 ;
5911
5912 else
5913 return 0;
5914
5915 /* Check if VAL is present in every STEP-th element until we find elements
5916 that are 0 or all 1 bits. */
5917 for (i = 1; i < nunits; ++i)
5918 {
5919 unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
5920 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
5921
5922 /* If the value isn't the splat value, check for the remaining elements
5923 being 0/-1. */
5924 if (val != elt_val)
5925 {
5926 if (elt_val == 0)
5927 {
5928 for (j = i+1; j < nunits; ++j)
5929 {
5930 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
5931 if (const_vector_elt_as_int (op, elt2) != 0)
5932 return 0;
5933 }
5934
5935 return (nunits - i) * GET_MODE_SIZE (inner);
5936 }
5937
5938 else if ((elt_val & mask) == mask)
5939 {
5940 for (j = i+1; j < nunits; ++j)
5941 {
5942 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
5943 if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
5944 return 0;
5945 }
5946
5947 return -((nunits - i) * GET_MODE_SIZE (inner));
5948 }
5949
5950 else
5951 return 0;
5952 }
5953 }
5954
5955 /* If all elements are equal, we don't need to do VLSDOI. */
5956 return 0;
5957 }
5958
5959
5960 /* Return true if OP is of the given MODE and can be synthesized
5961 with a vspltisb, vspltish or vspltisw. */
5962
5963 bool
5964 easy_altivec_constant (rtx op, machine_mode mode)
5965 {
5966 unsigned step, copies;
5967
5968 if (mode == VOIDmode)
5969 mode = GET_MODE (op);
5970 else if (mode != GET_MODE (op))
5971 return false;
5972
5973 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
5974 constants. */
5975 if (mode == V2DFmode)
5976 return zero_constant (op, mode);
5977
5978 else if (mode == V2DImode)
5979 {
5980 if (GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
5981 || GET_CODE (CONST_VECTOR_ELT (op, 1)) != CONST_INT)
5982 return false;
5983
5984 if (zero_constant (op, mode))
5985 return true;
5986
5987 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
5988 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
5989 return true;
5990
5991 return false;
5992 }
5993
5994 /* V1TImode is a special container for TImode. Ignore for now. */
5995 else if (mode == V1TImode)
5996 return false;
5997
5998 /* Start with a vspltisw. */
5999 step = GET_MODE_NUNITS (mode) / 4;
6000 copies = 1;
6001
6002 if (vspltis_constant (op, step, copies))
6003 return true;
6004
6005 /* Then try with a vspltish. */
6006 if (step == 1)
6007 copies <<= 1;
6008 else
6009 step >>= 1;
6010
6011 if (vspltis_constant (op, step, copies))
6012 return true;
6013
6014 /* And finally a vspltisb. */
6015 if (step == 1)
6016 copies <<= 1;
6017 else
6018 step >>= 1;
6019
6020 if (vspltis_constant (op, step, copies))
6021 return true;
6022
6023 if (vspltis_shifted (op) != 0)
6024 return true;
6025
6026 return false;
6027 }
6028
6029 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6030 result is OP. Abort if it is not possible. */
6031
6032 rtx
6033 gen_easy_altivec_constant (rtx op)
6034 {
6035 machine_mode mode = GET_MODE (op);
6036 int nunits = GET_MODE_NUNITS (mode);
6037 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6038 unsigned step = nunits / 4;
6039 unsigned copies = 1;
6040
6041 /* Start with a vspltisw. */
6042 if (vspltis_constant (op, step, copies))
6043 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
6044
6045 /* Then try with a vspltish. */
6046 if (step == 1)
6047 copies <<= 1;
6048 else
6049 step >>= 1;
6050
6051 if (vspltis_constant (op, step, copies))
6052 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
6053
6054 /* And finally a vspltisb. */
6055 if (step == 1)
6056 copies <<= 1;
6057 else
6058 step >>= 1;
6059
6060 if (vspltis_constant (op, step, copies))
6061 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
6062
6063 gcc_unreachable ();
6064 }
6065
6066 const char *
6067 output_vec_const_move (rtx *operands)
6068 {
6069 int cst, cst2, shift;
6070 machine_mode mode;
6071 rtx dest, vec;
6072
6073 dest = operands[0];
6074 vec = operands[1];
6075 mode = GET_MODE (dest);
6076
6077 if (TARGET_VSX)
6078 {
6079 if (zero_constant (vec, mode))
6080 return "xxlxor %x0,%x0,%x0";
6081
6082 if (TARGET_P8_VECTOR && vec == CONSTM1_RTX (mode))
6083 return "xxlorc %x0,%x0,%x0";
6084
6085 if ((mode == V2DImode || mode == V1TImode)
6086 && INTVAL (CONST_VECTOR_ELT (vec, 0)) == -1
6087 && INTVAL (CONST_VECTOR_ELT (vec, 1)) == -1)
6088 return (TARGET_P8_VECTOR) ? "xxlorc %x0,%x0,%x0" : "vspltisw %0,-1";
6089 }
6090
6091 if (TARGET_ALTIVEC)
6092 {
6093 rtx splat_vec;
6094 if (zero_constant (vec, mode))
6095 return "vxor %0,%0,%0";
6096
6097 /* Do we need to construct a value using VSLDOI? */
6098 shift = vspltis_shifted (vec);
6099 if (shift != 0)
6100 return "#";
6101
6102 splat_vec = gen_easy_altivec_constant (vec);
6103 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
6104 operands[1] = XEXP (splat_vec, 0);
6105 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
6106 return "#";
6107
6108 switch (GET_MODE (splat_vec))
6109 {
6110 case V4SImode:
6111 return "vspltisw %0,%1";
6112
6113 case V8HImode:
6114 return "vspltish %0,%1";
6115
6116 case V16QImode:
6117 return "vspltisb %0,%1";
6118
6119 default:
6120 gcc_unreachable ();
6121 }
6122 }
6123
6124 gcc_assert (TARGET_SPE);
6125
6126 /* Vector constant 0 is handled as a splitter of V2SI, and in the
6127 pattern of V1DI, V4HI, and V2SF.
6128
6129 FIXME: We should probably return # and add post reload
6130 splitters for these, but this way is so easy ;-). */
6131 cst = INTVAL (CONST_VECTOR_ELT (vec, 0));
6132 cst2 = INTVAL (CONST_VECTOR_ELT (vec, 1));
6133 operands[1] = CONST_VECTOR_ELT (vec, 0);
6134 operands[2] = CONST_VECTOR_ELT (vec, 1);
6135 if (cst == cst2)
6136 return "li %0,%1\n\tevmergelo %0,%0,%0";
6137 else if (WORDS_BIG_ENDIAN)
6138 return "li %0,%1\n\tevmergelo %0,%0,%0\n\tli %0,%2";
6139 else
6140 return "li %0,%2\n\tevmergelo %0,%0,%0\n\tli %0,%1";
6141 }
6142
6143 /* Initialize TARGET of vector PAIRED to VALS. */
6144
6145 void
6146 paired_expand_vector_init (rtx target, rtx vals)
6147 {
6148 machine_mode mode = GET_MODE (target);
6149 int n_elts = GET_MODE_NUNITS (mode);
6150 int n_var = 0;
6151 rtx x, new_rtx, tmp, constant_op, op1, op2;
6152 int i;
6153
6154 for (i = 0; i < n_elts; ++i)
6155 {
6156 x = XVECEXP (vals, 0, i);
6157 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6158 ++n_var;
6159 }
6160 if (n_var == 0)
6161 {
6162 /* Load from constant pool. */
6163 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
6164 return;
6165 }
6166
6167 if (n_var == 2)
6168 {
6169 /* The vector is initialized only with non-constants. */
6170 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, XVECEXP (vals, 0, 0),
6171 XVECEXP (vals, 0, 1));
6172
6173 emit_move_insn (target, new_rtx);
6174 return;
6175 }
6176
6177 /* One field is non-constant and the other one is a constant. Load the
6178 constant from the constant pool and use ps_merge instruction to
6179 construct the whole vector. */
6180 op1 = XVECEXP (vals, 0, 0);
6181 op2 = XVECEXP (vals, 0, 1);
6182
6183 constant_op = (CONSTANT_P (op1)) ? op1 : op2;
6184
6185 tmp = gen_reg_rtx (GET_MODE (constant_op));
6186 emit_move_insn (tmp, constant_op);
6187
6188 if (CONSTANT_P (op1))
6189 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, tmp, op2);
6190 else
6191 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, op1, tmp);
6192
6193 emit_move_insn (target, new_rtx);
6194 }
6195
6196 void
6197 paired_expand_vector_move (rtx operands[])
6198 {
6199 rtx op0 = operands[0], op1 = operands[1];
6200
6201 emit_move_insn (op0, op1);
6202 }
6203
6204 /* Emit vector compare for code RCODE. DEST is destination, OP1 and
6205 OP2 are two VEC_COND_EXPR operands, CC_OP0 and CC_OP1 are the two
6206 operands for the relation operation COND. This is a recursive
6207 function. */
6208
6209 static void
6210 paired_emit_vector_compare (enum rtx_code rcode,
6211 rtx dest, rtx op0, rtx op1,
6212 rtx cc_op0, rtx cc_op1)
6213 {
6214 rtx tmp = gen_reg_rtx (V2SFmode);
6215 rtx tmp1, max, min;
6216
6217 gcc_assert (TARGET_PAIRED_FLOAT);
6218 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6219
6220 switch (rcode)
6221 {
6222 case LT:
6223 case LTU:
6224 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
6225 return;
6226 case GE:
6227 case GEU:
6228 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
6229 emit_insn (gen_selv2sf4 (dest, tmp, op0, op1, CONST0_RTX (SFmode)));
6230 return;
6231 case LE:
6232 case LEU:
6233 paired_emit_vector_compare (GE, dest, op0, op1, cc_op1, cc_op0);
6234 return;
6235 case GT:
6236 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
6237 return;
6238 case EQ:
6239 tmp1 = gen_reg_rtx (V2SFmode);
6240 max = gen_reg_rtx (V2SFmode);
6241 min = gen_reg_rtx (V2SFmode);
6242 gen_reg_rtx (V2SFmode);
6243
6244 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
6245 emit_insn (gen_selv2sf4
6246 (max, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
6247 emit_insn (gen_subv2sf3 (tmp, cc_op1, cc_op0));
6248 emit_insn (gen_selv2sf4
6249 (min, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
6250 emit_insn (gen_subv2sf3 (tmp1, min, max));
6251 emit_insn (gen_selv2sf4 (dest, tmp1, op0, op1, CONST0_RTX (SFmode)));
6252 return;
6253 case NE:
6254 paired_emit_vector_compare (EQ, dest, op1, op0, cc_op0, cc_op1);
6255 return;
6256 case UNLE:
6257 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
6258 return;
6259 case UNLT:
6260 paired_emit_vector_compare (LT, dest, op1, op0, cc_op0, cc_op1);
6261 return;
6262 case UNGE:
6263 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
6264 return;
6265 case UNGT:
6266 paired_emit_vector_compare (GT, dest, op1, op0, cc_op0, cc_op1);
6267 return;
6268 default:
6269 gcc_unreachable ();
6270 }
6271
6272 return;
6273 }
6274
6275 /* Emit vector conditional expression.
6276 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6277 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6278
6279 int
6280 paired_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6281 rtx cond, rtx cc_op0, rtx cc_op1)
6282 {
6283 enum rtx_code rcode = GET_CODE (cond);
6284
6285 if (!TARGET_PAIRED_FLOAT)
6286 return 0;
6287
6288 paired_emit_vector_compare (rcode, dest, op1, op2, cc_op0, cc_op1);
6289
6290 return 1;
6291 }
6292
6293 /* Initialize vector TARGET to VALS. */
6294
6295 void
6296 rs6000_expand_vector_init (rtx target, rtx vals)
6297 {
6298 machine_mode mode = GET_MODE (target);
6299 machine_mode inner_mode = GET_MODE_INNER (mode);
6300 int n_elts = GET_MODE_NUNITS (mode);
6301 int n_var = 0, one_var = -1;
6302 bool all_same = true, all_const_zero = true;
6303 rtx x, mem;
6304 int i;
6305
6306 for (i = 0; i < n_elts; ++i)
6307 {
6308 x = XVECEXP (vals, 0, i);
6309 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6310 ++n_var, one_var = i;
6311 else if (x != CONST0_RTX (inner_mode))
6312 all_const_zero = false;
6313
6314 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6315 all_same = false;
6316 }
6317
6318 if (n_var == 0)
6319 {
6320 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6321 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
6322 if ((int_vector_p || TARGET_VSX) && all_const_zero)
6323 {
6324 /* Zero register. */
6325 emit_insn (gen_rtx_SET (target, gen_rtx_XOR (mode, target, target)));
6326 return;
6327 }
6328 else if (int_vector_p && easy_vector_constant (const_vec, mode))
6329 {
6330 /* Splat immediate. */
6331 emit_insn (gen_rtx_SET (target, const_vec));
6332 return;
6333 }
6334 else
6335 {
6336 /* Load from constant pool. */
6337 emit_move_insn (target, const_vec);
6338 return;
6339 }
6340 }
6341
6342 /* Double word values on VSX can use xxpermdi or lxvdsx. */
6343 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
6344 {
6345 rtx op0 = XVECEXP (vals, 0, 0);
6346 rtx op1 = XVECEXP (vals, 0, 1);
6347 if (all_same)
6348 {
6349 if (!MEM_P (op0) && !REG_P (op0))
6350 op0 = force_reg (inner_mode, op0);
6351 if (mode == V2DFmode)
6352 emit_insn (gen_vsx_splat_v2df (target, op0));
6353 else
6354 emit_insn (gen_vsx_splat_v2di (target, op0));
6355 }
6356 else
6357 {
6358 op0 = force_reg (inner_mode, op0);
6359 op1 = force_reg (inner_mode, op1);
6360 if (mode == V2DFmode)
6361 emit_insn (gen_vsx_concat_v2df (target, op0, op1));
6362 else
6363 emit_insn (gen_vsx_concat_v2di (target, op0, op1));
6364 }
6365 return;
6366 }
6367
6368 /* With single precision floating point on VSX, know that internally single
6369 precision is actually represented as a double, and either make 2 V2DF
6370 vectors, and convert these vectors to single precision, or do one
6371 conversion, and splat the result to the other elements. */
6372 if (mode == V4SFmode && VECTOR_MEM_VSX_P (mode))
6373 {
6374 if (all_same)
6375 {
6376 rtx freg = gen_reg_rtx (V4SFmode);
6377 rtx sreg = force_reg (SFmode, XVECEXP (vals, 0, 0));
6378 rtx cvt = ((TARGET_XSCVDPSPN)
6379 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
6380 : gen_vsx_xscvdpsp_scalar (freg, sreg));
6381
6382 emit_insn (cvt);
6383 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg, const0_rtx));
6384 }
6385 else
6386 {
6387 rtx dbl_even = gen_reg_rtx (V2DFmode);
6388 rtx dbl_odd = gen_reg_rtx (V2DFmode);
6389 rtx flt_even = gen_reg_rtx (V4SFmode);
6390 rtx flt_odd = gen_reg_rtx (V4SFmode);
6391 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
6392 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
6393 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
6394 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
6395
6396 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
6397 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
6398 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6399 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6400 rs6000_expand_extract_even (target, flt_even, flt_odd);
6401 }
6402 return;
6403 }
6404
6405 /* Store value to stack temp. Load vector element. Splat. However, splat
6406 of 64-bit items is not supported on Altivec. */
6407 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
6408 {
6409 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6410 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
6411 XVECEXP (vals, 0, 0));
6412 x = gen_rtx_UNSPEC (VOIDmode,
6413 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6414 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6415 gen_rtvec (2,
6416 gen_rtx_SET (target, mem),
6417 x)));
6418 x = gen_rtx_VEC_SELECT (inner_mode, target,
6419 gen_rtx_PARALLEL (VOIDmode,
6420 gen_rtvec (1, const0_rtx)));
6421 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
6422 return;
6423 }
6424
6425 /* One field is non-constant. Load constant then overwrite
6426 varying field. */
6427 if (n_var == 1)
6428 {
6429 rtx copy = copy_rtx (vals);
6430
6431 /* Load constant part of vector, substitute neighboring value for
6432 varying element. */
6433 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
6434 rs6000_expand_vector_init (target, copy);
6435
6436 /* Insert variable. */
6437 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var);
6438 return;
6439 }
6440
6441 /* Construct the vector in memory one field at a time
6442 and load the whole vector. */
6443 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6444 for (i = 0; i < n_elts; i++)
6445 emit_move_insn (adjust_address_nv (mem, inner_mode,
6446 i * GET_MODE_SIZE (inner_mode)),
6447 XVECEXP (vals, 0, i));
6448 emit_move_insn (target, mem);
6449 }
6450
6451 /* Set field ELT of TARGET to VAL. */
6452
6453 void
6454 rs6000_expand_vector_set (rtx target, rtx val, int elt)
6455 {
6456 machine_mode mode = GET_MODE (target);
6457 machine_mode inner_mode = GET_MODE_INNER (mode);
6458 rtx reg = gen_reg_rtx (mode);
6459 rtx mask, mem, x;
6460 int width = GET_MODE_SIZE (inner_mode);
6461 int i;
6462
6463 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
6464 {
6465 rtx (*set_func) (rtx, rtx, rtx, rtx)
6466 = ((mode == V2DFmode) ? gen_vsx_set_v2df : gen_vsx_set_v2di);
6467 emit_insn (set_func (target, target, val, GEN_INT (elt)));
6468 return;
6469 }
6470
6471 /* Simplify setting single element vectors like V1TImode. */
6472 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0)
6473 {
6474 emit_move_insn (target, gen_lowpart (mode, val));
6475 return;
6476 }
6477
6478 /* Load single variable value. */
6479 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6480 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
6481 x = gen_rtx_UNSPEC (VOIDmode,
6482 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6483 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6484 gen_rtvec (2,
6485 gen_rtx_SET (reg, mem),
6486 x)));
6487
6488 /* Linear sequence. */
6489 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
6490 for (i = 0; i < 16; ++i)
6491 XVECEXP (mask, 0, i) = GEN_INT (i);
6492
6493 /* Set permute mask to insert element into target. */
6494 for (i = 0; i < width; ++i)
6495 XVECEXP (mask, 0, elt*width + i)
6496 = GEN_INT (i + 0x10);
6497 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
6498
6499 if (BYTES_BIG_ENDIAN)
6500 x = gen_rtx_UNSPEC (mode,
6501 gen_rtvec (3, target, reg,
6502 force_reg (V16QImode, x)),
6503 UNSPEC_VPERM);
6504 else
6505 {
6506 /* Invert selector. We prefer to generate VNAND on P8 so
6507 that future fusion opportunities can kick in, but must
6508 generate VNOR elsewhere. */
6509 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
6510 rtx iorx = (TARGET_P8_VECTOR
6511 ? gen_rtx_IOR (V16QImode, notx, notx)
6512 : gen_rtx_AND (V16QImode, notx, notx));
6513 rtx tmp = gen_reg_rtx (V16QImode);
6514 emit_insn (gen_rtx_SET (tmp, iorx));
6515
6516 /* Permute with operands reversed and adjusted selector. */
6517 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
6518 UNSPEC_VPERM);
6519 }
6520
6521 emit_insn (gen_rtx_SET (target, x));
6522 }
6523
6524 /* Extract field ELT from VEC into TARGET. */
6525
6526 void
6527 rs6000_expand_vector_extract (rtx target, rtx vec, int elt)
6528 {
6529 machine_mode mode = GET_MODE (vec);
6530 machine_mode inner_mode = GET_MODE_INNER (mode);
6531 rtx mem;
6532
6533 if (VECTOR_MEM_VSX_P (mode))
6534 {
6535 switch (mode)
6536 {
6537 default:
6538 break;
6539 case V1TImode:
6540 gcc_assert (elt == 0 && inner_mode == TImode);
6541 emit_move_insn (target, gen_lowpart (TImode, vec));
6542 break;
6543 case V2DFmode:
6544 emit_insn (gen_vsx_extract_v2df (target, vec, GEN_INT (elt)));
6545 return;
6546 case V2DImode:
6547 emit_insn (gen_vsx_extract_v2di (target, vec, GEN_INT (elt)));
6548 return;
6549 case V4SFmode:
6550 emit_insn (gen_vsx_extract_v4sf (target, vec, GEN_INT (elt)));
6551 return;
6552 }
6553 }
6554
6555 /* Allocate mode-sized buffer. */
6556 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6557
6558 emit_move_insn (mem, vec);
6559
6560 /* Add offset to field within buffer matching vector element. */
6561 mem = adjust_address_nv (mem, inner_mode, elt * GET_MODE_SIZE (inner_mode));
6562
6563 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
6564 }
6565
6566 /* Return TRUE if OP is an invalid SUBREG operation on the e500. */
6567
6568 bool
6569 invalid_e500_subreg (rtx op, machine_mode mode)
6570 {
6571 if (TARGET_E500_DOUBLE)
6572 {
6573 /* Reject (subreg:SI (reg:DF)); likewise with subreg:DI or
6574 subreg:TI and reg:TF. Decimal float modes are like integer
6575 modes (only low part of each register used) for this
6576 purpose. */
6577 if (GET_CODE (op) == SUBREG
6578 && (mode == SImode || mode == DImode || mode == TImode
6579 || mode == DDmode || mode == TDmode || mode == PTImode)
6580 && REG_P (SUBREG_REG (op))
6581 && (GET_MODE (SUBREG_REG (op)) == DFmode
6582 || GET_MODE (SUBREG_REG (op)) == TFmode
6583 || GET_MODE (SUBREG_REG (op)) == IFmode
6584 || GET_MODE (SUBREG_REG (op)) == KFmode))
6585 return true;
6586
6587 /* Reject (subreg:DF (reg:DI)); likewise with subreg:TF and
6588 reg:TI. */
6589 if (GET_CODE (op) == SUBREG
6590 && (mode == DFmode || mode == TFmode || mode == IFmode
6591 || mode == KFmode)
6592 && REG_P (SUBREG_REG (op))
6593 && (GET_MODE (SUBREG_REG (op)) == DImode
6594 || GET_MODE (SUBREG_REG (op)) == TImode
6595 || GET_MODE (SUBREG_REG (op)) == PTImode
6596 || GET_MODE (SUBREG_REG (op)) == DDmode
6597 || GET_MODE (SUBREG_REG (op)) == TDmode))
6598 return true;
6599 }
6600
6601 if (TARGET_SPE
6602 && GET_CODE (op) == SUBREG
6603 && mode == SImode
6604 && REG_P (SUBREG_REG (op))
6605 && SPE_VECTOR_MODE (GET_MODE (SUBREG_REG (op))))
6606 return true;
6607
6608 return false;
6609 }
6610
6611 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
6612 selects whether the alignment is abi mandated, optional, or
6613 both abi and optional alignment. */
6614
6615 unsigned int
6616 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
6617 {
6618 if (how != align_opt)
6619 {
6620 if (TREE_CODE (type) == VECTOR_TYPE)
6621 {
6622 if ((TARGET_SPE && SPE_VECTOR_MODE (TYPE_MODE (type)))
6623 || (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (TYPE_MODE (type))))
6624 {
6625 if (align < 64)
6626 align = 64;
6627 }
6628 else if (align < 128)
6629 align = 128;
6630 }
6631 else if (TARGET_E500_DOUBLE
6632 && TREE_CODE (type) == REAL_TYPE
6633 && TYPE_MODE (type) == DFmode)
6634 {
6635 if (align < 64)
6636 align = 64;
6637 }
6638 }
6639
6640 if (how != align_abi)
6641 {
6642 if (TREE_CODE (type) == ARRAY_TYPE
6643 && TYPE_MODE (TREE_TYPE (type)) == QImode)
6644 {
6645 if (align < BITS_PER_WORD)
6646 align = BITS_PER_WORD;
6647 }
6648 }
6649
6650 return align;
6651 }
6652
6653 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
6654
6655 bool
6656 rs6000_special_adjust_field_align_p (tree field, unsigned int computed)
6657 {
6658 if (TARGET_ALTIVEC && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6659 {
6660 if (computed != 128)
6661 {
6662 static bool warned;
6663 if (!warned && warn_psabi)
6664 {
6665 warned = true;
6666 inform (input_location,
6667 "the layout of aggregates containing vectors with"
6668 " %d-byte alignment has changed in GCC 5",
6669 computed / BITS_PER_UNIT);
6670 }
6671 }
6672 /* In current GCC there is no special case. */
6673 return false;
6674 }
6675
6676 return false;
6677 }
6678
6679 /* AIX increases natural record alignment to doubleword if the first
6680 field is an FP double while the FP fields remain word aligned. */
6681
6682 unsigned int
6683 rs6000_special_round_type_align (tree type, unsigned int computed,
6684 unsigned int specified)
6685 {
6686 unsigned int align = MAX (computed, specified);
6687 tree field = TYPE_FIELDS (type);
6688
6689 /* Skip all non field decls */
6690 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
6691 field = DECL_CHAIN (field);
6692
6693 if (field != NULL && field != type)
6694 {
6695 type = TREE_TYPE (field);
6696 while (TREE_CODE (type) == ARRAY_TYPE)
6697 type = TREE_TYPE (type);
6698
6699 if (type != error_mark_node && TYPE_MODE (type) == DFmode)
6700 align = MAX (align, 64);
6701 }
6702
6703 return align;
6704 }
6705
6706 /* Darwin increases record alignment to the natural alignment of
6707 the first field. */
6708
6709 unsigned int
6710 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
6711 unsigned int specified)
6712 {
6713 unsigned int align = MAX (computed, specified);
6714
6715 if (TYPE_PACKED (type))
6716 return align;
6717
6718 /* Find the first field, looking down into aggregates. */
6719 do {
6720 tree field = TYPE_FIELDS (type);
6721 /* Skip all non field decls */
6722 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
6723 field = DECL_CHAIN (field);
6724 if (! field)
6725 break;
6726 /* A packed field does not contribute any extra alignment. */
6727 if (DECL_PACKED (field))
6728 return align;
6729 type = TREE_TYPE (field);
6730 while (TREE_CODE (type) == ARRAY_TYPE)
6731 type = TREE_TYPE (type);
6732 } while (AGGREGATE_TYPE_P (type));
6733
6734 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
6735 align = MAX (align, TYPE_ALIGN (type));
6736
6737 return align;
6738 }
6739
6740 /* Return 1 for an operand in small memory on V.4/eabi. */
6741
6742 int
6743 small_data_operand (rtx op ATTRIBUTE_UNUSED,
6744 machine_mode mode ATTRIBUTE_UNUSED)
6745 {
6746 #if TARGET_ELF
6747 rtx sym_ref;
6748
6749 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
6750 return 0;
6751
6752 if (DEFAULT_ABI != ABI_V4)
6753 return 0;
6754
6755 /* Vector and float memory instructions have a limited offset on the
6756 SPE, so using a vector or float variable directly as an operand is
6757 not useful. */
6758 if (TARGET_SPE
6759 && (SPE_VECTOR_MODE (mode) || FLOAT_MODE_P (mode)))
6760 return 0;
6761
6762 if (GET_CODE (op) == SYMBOL_REF)
6763 sym_ref = op;
6764
6765 else if (GET_CODE (op) != CONST
6766 || GET_CODE (XEXP (op, 0)) != PLUS
6767 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF
6768 || GET_CODE (XEXP (XEXP (op, 0), 1)) != CONST_INT)
6769 return 0;
6770
6771 else
6772 {
6773 rtx sum = XEXP (op, 0);
6774 HOST_WIDE_INT summand;
6775
6776 /* We have to be careful here, because it is the referenced address
6777 that must be 32k from _SDA_BASE_, not just the symbol. */
6778 summand = INTVAL (XEXP (sum, 1));
6779 if (summand < 0 || summand > g_switch_value)
6780 return 0;
6781
6782 sym_ref = XEXP (sum, 0);
6783 }
6784
6785 return SYMBOL_REF_SMALL_P (sym_ref);
6786 #else
6787 return 0;
6788 #endif
6789 }
6790
6791 /* Return true if either operand is a general purpose register. */
6792
6793 bool
6794 gpr_or_gpr_p (rtx op0, rtx op1)
6795 {
6796 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
6797 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
6798 }
6799
6800 /* Return true if this is a move direct operation between GPR registers and
6801 floating point/VSX registers. */
6802
6803 bool
6804 direct_move_p (rtx op0, rtx op1)
6805 {
6806 int regno0, regno1;
6807
6808 if (!REG_P (op0) || !REG_P (op1))
6809 return false;
6810
6811 if (!TARGET_DIRECT_MOVE && !TARGET_MFPGPR)
6812 return false;
6813
6814 regno0 = REGNO (op0);
6815 regno1 = REGNO (op1);
6816 if (regno0 >= FIRST_PSEUDO_REGISTER || regno1 >= FIRST_PSEUDO_REGISTER)
6817 return false;
6818
6819 if (INT_REGNO_P (regno0))
6820 return (TARGET_DIRECT_MOVE) ? VSX_REGNO_P (regno1) : FP_REGNO_P (regno1);
6821
6822 else if (INT_REGNO_P (regno1))
6823 {
6824 if (TARGET_MFPGPR && FP_REGNO_P (regno0))
6825 return true;
6826
6827 else if (TARGET_DIRECT_MOVE && VSX_REGNO_P (regno0))
6828 return true;
6829 }
6830
6831 return false;
6832 }
6833
6834 /* Return true if this is a load or store quad operation. This function does
6835 not handle the atomic quad memory instructions. */
6836
6837 bool
6838 quad_load_store_p (rtx op0, rtx op1)
6839 {
6840 bool ret;
6841
6842 if (!TARGET_QUAD_MEMORY)
6843 ret = false;
6844
6845 else if (REG_P (op0) && MEM_P (op1))
6846 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
6847 && quad_memory_operand (op1, GET_MODE (op1))
6848 && !reg_overlap_mentioned_p (op0, op1));
6849
6850 else if (MEM_P (op0) && REG_P (op1))
6851 ret = (quad_memory_operand (op0, GET_MODE (op0))
6852 && quad_int_reg_operand (op1, GET_MODE (op1)));
6853
6854 else
6855 ret = false;
6856
6857 if (TARGET_DEBUG_ADDR)
6858 {
6859 fprintf (stderr, "\n========== quad_load_store, return %s\n",
6860 ret ? "true" : "false");
6861 debug_rtx (gen_rtx_SET (op0, op1));
6862 }
6863
6864 return ret;
6865 }
6866
6867 /* Given an address, return a constant offset term if one exists. */
6868
6869 static rtx
6870 address_offset (rtx op)
6871 {
6872 if (GET_CODE (op) == PRE_INC
6873 || GET_CODE (op) == PRE_DEC)
6874 op = XEXP (op, 0);
6875 else if (GET_CODE (op) == PRE_MODIFY
6876 || GET_CODE (op) == LO_SUM)
6877 op = XEXP (op, 1);
6878
6879 if (GET_CODE (op) == CONST)
6880 op = XEXP (op, 0);
6881
6882 if (GET_CODE (op) == PLUS)
6883 op = XEXP (op, 1);
6884
6885 if (CONST_INT_P (op))
6886 return op;
6887
6888 return NULL_RTX;
6889 }
6890
6891 /* Return true if the MEM operand is a memory operand suitable for use
6892 with a (full width, possibly multiple) gpr load/store. On
6893 powerpc64 this means the offset must be divisible by 4.
6894 Implements 'Y' constraint.
6895
6896 Accept direct, indexed, offset, lo_sum and tocref. Since this is
6897 a constraint function we know the operand has satisfied a suitable
6898 memory predicate. Also accept some odd rtl generated by reload
6899 (see rs6000_legitimize_reload_address for various forms). It is
6900 important that reload rtl be accepted by appropriate constraints
6901 but not by the operand predicate.
6902
6903 Offsetting a lo_sum should not be allowed, except where we know by
6904 alignment that a 32k boundary is not crossed, but see the ???
6905 comment in rs6000_legitimize_reload_address. Note that by
6906 "offsetting" here we mean a further offset to access parts of the
6907 MEM. It's fine to have a lo_sum where the inner address is offset
6908 from a sym, since the same sym+offset will appear in the high part
6909 of the address calculation. */
6910
6911 bool
6912 mem_operand_gpr (rtx op, machine_mode mode)
6913 {
6914 unsigned HOST_WIDE_INT offset;
6915 int extra;
6916 rtx addr = XEXP (op, 0);
6917
6918 op = address_offset (addr);
6919 if (op == NULL_RTX)
6920 return true;
6921
6922 offset = INTVAL (op);
6923 if (TARGET_POWERPC64 && (offset & 3) != 0)
6924 return false;
6925
6926 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
6927 if (extra < 0)
6928 extra = 0;
6929
6930 if (GET_CODE (addr) == LO_SUM)
6931 /* For lo_sum addresses, we must allow any offset except one that
6932 causes a wrap, so test only the low 16 bits. */
6933 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
6934
6935 return offset + 0x8000 < 0x10000u - extra;
6936 }
6937 \f
6938 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
6939
6940 static bool
6941 reg_offset_addressing_ok_p (machine_mode mode)
6942 {
6943 switch (mode)
6944 {
6945 case V16QImode:
6946 case V8HImode:
6947 case V4SFmode:
6948 case V4SImode:
6949 case V2DFmode:
6950 case V2DImode:
6951 case V1TImode:
6952 case TImode:
6953 case TFmode:
6954 case KFmode:
6955 /* AltiVec/VSX vector modes. Only reg+reg addressing is valid. While
6956 TImode is not a vector mode, if we want to use the VSX registers to
6957 move it around, we need to restrict ourselves to reg+reg addressing.
6958 Similarly for IEEE 128-bit floating point that is passed in a single
6959 vector register. */
6960 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
6961 return false;
6962 break;
6963
6964 case V4HImode:
6965 case V2SImode:
6966 case V1DImode:
6967 case V2SFmode:
6968 /* Paired vector modes. Only reg+reg addressing is valid. */
6969 if (TARGET_PAIRED_FLOAT)
6970 return false;
6971 break;
6972
6973 case SDmode:
6974 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
6975 addressing for the LFIWZX and STFIWX instructions. */
6976 if (TARGET_NO_SDMODE_STACK)
6977 return false;
6978 break;
6979
6980 default:
6981 break;
6982 }
6983
6984 return true;
6985 }
6986
6987 static bool
6988 virtual_stack_registers_memory_p (rtx op)
6989 {
6990 int regnum;
6991
6992 if (GET_CODE (op) == REG)
6993 regnum = REGNO (op);
6994
6995 else if (GET_CODE (op) == PLUS
6996 && GET_CODE (XEXP (op, 0)) == REG
6997 && GET_CODE (XEXP (op, 1)) == CONST_INT)
6998 regnum = REGNO (XEXP (op, 0));
6999
7000 else
7001 return false;
7002
7003 return (regnum >= FIRST_VIRTUAL_REGISTER
7004 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
7005 }
7006
7007 /* Return true if a MODE sized memory accesses to OP plus OFFSET
7008 is known to not straddle a 32k boundary. This function is used
7009 to determine whether -mcmodel=medium code can use TOC pointer
7010 relative addressing for OP. This means the alignment of the TOC
7011 pointer must also be taken into account, and unfortunately that is
7012 only 8 bytes. */
7013
7014 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
7015 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
7016 #endif
7017
7018 static bool
7019 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
7020 machine_mode mode)
7021 {
7022 tree decl;
7023 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
7024
7025 if (GET_CODE (op) != SYMBOL_REF)
7026 return false;
7027
7028 dsize = GET_MODE_SIZE (mode);
7029 decl = SYMBOL_REF_DECL (op);
7030 if (!decl)
7031 {
7032 if (dsize == 0)
7033 return false;
7034
7035 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
7036 replacing memory addresses with an anchor plus offset. We
7037 could find the decl by rummaging around in the block->objects
7038 VEC for the given offset but that seems like too much work. */
7039 dalign = BITS_PER_UNIT;
7040 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
7041 && SYMBOL_REF_ANCHOR_P (op)
7042 && SYMBOL_REF_BLOCK (op) != NULL)
7043 {
7044 struct object_block *block = SYMBOL_REF_BLOCK (op);
7045
7046 dalign = block->alignment;
7047 offset += SYMBOL_REF_BLOCK_OFFSET (op);
7048 }
7049 else if (CONSTANT_POOL_ADDRESS_P (op))
7050 {
7051 /* It would be nice to have get_pool_align().. */
7052 machine_mode cmode = get_pool_mode (op);
7053
7054 dalign = GET_MODE_ALIGNMENT (cmode);
7055 }
7056 }
7057 else if (DECL_P (decl))
7058 {
7059 dalign = DECL_ALIGN (decl);
7060
7061 if (dsize == 0)
7062 {
7063 /* Allow BLKmode when the entire object is known to not
7064 cross a 32k boundary. */
7065 if (!DECL_SIZE_UNIT (decl))
7066 return false;
7067
7068 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
7069 return false;
7070
7071 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
7072 if (dsize > 32768)
7073 return false;
7074
7075 dalign /= BITS_PER_UNIT;
7076 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
7077 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
7078 return dalign >= dsize;
7079 }
7080 }
7081 else
7082 gcc_unreachable ();
7083
7084 /* Find how many bits of the alignment we know for this access. */
7085 dalign /= BITS_PER_UNIT;
7086 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
7087 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
7088 mask = dalign - 1;
7089 lsb = offset & -offset;
7090 mask &= lsb - 1;
7091 dalign = mask + 1;
7092
7093 return dalign >= dsize;
7094 }
7095
7096 static bool
7097 constant_pool_expr_p (rtx op)
7098 {
7099 rtx base, offset;
7100
7101 split_const (op, &base, &offset);
7102 return (GET_CODE (base) == SYMBOL_REF
7103 && CONSTANT_POOL_ADDRESS_P (base)
7104 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
7105 }
7106
7107 static const_rtx tocrel_base, tocrel_offset;
7108
7109 /* Return true if OP is a toc pointer relative address (the output
7110 of create_TOC_reference). If STRICT, do not match high part or
7111 non-split -mcmodel=large/medium toc pointer relative addresses. */
7112
7113 bool
7114 toc_relative_expr_p (const_rtx op, bool strict)
7115 {
7116 if (!TARGET_TOC)
7117 return false;
7118
7119 if (TARGET_CMODEL != CMODEL_SMALL)
7120 {
7121 /* Only match the low part. */
7122 if (GET_CODE (op) == LO_SUM
7123 && REG_P (XEXP (op, 0))
7124 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict))
7125 op = XEXP (op, 1);
7126 else if (strict)
7127 return false;
7128 }
7129
7130 tocrel_base = op;
7131 tocrel_offset = const0_rtx;
7132 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
7133 {
7134 tocrel_base = XEXP (op, 0);
7135 tocrel_offset = XEXP (op, 1);
7136 }
7137
7138 return (GET_CODE (tocrel_base) == UNSPEC
7139 && XINT (tocrel_base, 1) == UNSPEC_TOCREL);
7140 }
7141
7142 /* Return true if X is a constant pool address, and also for cmodel=medium
7143 if X is a toc-relative address known to be offsettable within MODE. */
7144
7145 bool
7146 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
7147 bool strict)
7148 {
7149 return (toc_relative_expr_p (x, strict)
7150 && (TARGET_CMODEL != CMODEL_MEDIUM
7151 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
7152 || mode == QImode
7153 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
7154 INTVAL (tocrel_offset), mode)));
7155 }
7156
7157 static bool
7158 legitimate_small_data_p (machine_mode mode, rtx x)
7159 {
7160 return (DEFAULT_ABI == ABI_V4
7161 && !flag_pic && !TARGET_TOC
7162 && (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST)
7163 && small_data_operand (x, mode));
7164 }
7165
7166 /* SPE offset addressing is limited to 5-bits worth of double words. */
7167 #define SPE_CONST_OFFSET_OK(x) (((x) & ~0xf8) == 0)
7168
7169 bool
7170 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
7171 bool strict, bool worst_case)
7172 {
7173 unsigned HOST_WIDE_INT offset;
7174 unsigned int extra;
7175
7176 if (GET_CODE (x) != PLUS)
7177 return false;
7178 if (!REG_P (XEXP (x, 0)))
7179 return false;
7180 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
7181 return false;
7182 if (!reg_offset_addressing_ok_p (mode))
7183 return virtual_stack_registers_memory_p (x);
7184 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
7185 return true;
7186 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7187 return false;
7188
7189 offset = INTVAL (XEXP (x, 1));
7190 extra = 0;
7191 switch (mode)
7192 {
7193 case V4HImode:
7194 case V2SImode:
7195 case V1DImode:
7196 case V2SFmode:
7197 /* SPE vector modes. */
7198 return SPE_CONST_OFFSET_OK (offset);
7199
7200 case DFmode:
7201 case DDmode:
7202 case DImode:
7203 /* On e500v2, we may have:
7204
7205 (subreg:DF (mem:DI (plus (reg) (const_int))) 0).
7206
7207 Which gets addressed with evldd instructions. */
7208 if (TARGET_E500_DOUBLE)
7209 return SPE_CONST_OFFSET_OK (offset);
7210
7211 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
7212 addressing. */
7213 if (VECTOR_MEM_VSX_P (mode))
7214 return false;
7215
7216 if (!worst_case)
7217 break;
7218 if (!TARGET_POWERPC64)
7219 extra = 4;
7220 else if (offset & 3)
7221 return false;
7222 break;
7223
7224 case TFmode:
7225 case IFmode:
7226 case KFmode:
7227 if (TARGET_E500_DOUBLE)
7228 return (SPE_CONST_OFFSET_OK (offset)
7229 && SPE_CONST_OFFSET_OK (offset + 8));
7230 /* fall through */
7231
7232 case TDmode:
7233 case TImode:
7234 case PTImode:
7235 extra = 8;
7236 if (!worst_case)
7237 break;
7238 if (!TARGET_POWERPC64)
7239 extra = 12;
7240 else if (offset & 3)
7241 return false;
7242 break;
7243
7244 default:
7245 break;
7246 }
7247
7248 offset += 0x8000;
7249 return offset < 0x10000 - extra;
7250 }
7251
7252 bool
7253 legitimate_indexed_address_p (rtx x, int strict)
7254 {
7255 rtx op0, op1;
7256
7257 if (GET_CODE (x) != PLUS)
7258 return false;
7259
7260 op0 = XEXP (x, 0);
7261 op1 = XEXP (x, 1);
7262
7263 /* Recognize the rtl generated by reload which we know will later be
7264 replaced with proper base and index regs. */
7265 if (!strict
7266 && reload_in_progress
7267 && (REG_P (op0) || GET_CODE (op0) == PLUS)
7268 && REG_P (op1))
7269 return true;
7270
7271 return (REG_P (op0) && REG_P (op1)
7272 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
7273 && INT_REG_OK_FOR_INDEX_P (op1, strict))
7274 || (INT_REG_OK_FOR_BASE_P (op1, strict)
7275 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
7276 }
7277
7278 bool
7279 avoiding_indexed_address_p (machine_mode mode)
7280 {
7281 /* Avoid indexed addressing for modes that have non-indexed
7282 load/store instruction forms. */
7283 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
7284 }
7285
7286 bool
7287 legitimate_indirect_address_p (rtx x, int strict)
7288 {
7289 return GET_CODE (x) == REG && INT_REG_OK_FOR_BASE_P (x, strict);
7290 }
7291
7292 bool
7293 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
7294 {
7295 if (!TARGET_MACHO || !flag_pic
7296 || mode != SImode || GET_CODE (x) != MEM)
7297 return false;
7298 x = XEXP (x, 0);
7299
7300 if (GET_CODE (x) != LO_SUM)
7301 return false;
7302 if (GET_CODE (XEXP (x, 0)) != REG)
7303 return false;
7304 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
7305 return false;
7306 x = XEXP (x, 1);
7307
7308 return CONSTANT_P (x);
7309 }
7310
7311 static bool
7312 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
7313 {
7314 if (GET_CODE (x) != LO_SUM)
7315 return false;
7316 if (GET_CODE (XEXP (x, 0)) != REG)
7317 return false;
7318 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
7319 return false;
7320 /* Restrict addressing for DI because of our SUBREG hackery. */
7321 if (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
7322 return false;
7323 x = XEXP (x, 1);
7324
7325 if (TARGET_ELF || TARGET_MACHO)
7326 {
7327 bool large_toc_ok;
7328
7329 if (DEFAULT_ABI == ABI_V4 && flag_pic)
7330 return false;
7331 /* LRA don't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
7332 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
7333 recognizes some LO_SUM addresses as valid although this
7334 function says opposite. In most cases, LRA through different
7335 transformations can generate correct code for address reloads.
7336 It can not manage only some LO_SUM cases. So we need to add
7337 code analogous to one in rs6000_legitimize_reload_address for
7338 LOW_SUM here saying that some addresses are still valid. */
7339 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
7340 && small_toc_ref (x, VOIDmode));
7341 if (TARGET_TOC && ! large_toc_ok)
7342 return false;
7343 if (GET_MODE_NUNITS (mode) != 1)
7344 return false;
7345 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
7346 && !(/* ??? Assume floating point reg based on mode? */
7347 TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
7348 && (mode == DFmode || mode == DDmode)))
7349 return false;
7350
7351 return CONSTANT_P (x) || large_toc_ok;
7352 }
7353
7354 return false;
7355 }
7356
7357
7358 /* Try machine-dependent ways of modifying an illegitimate address
7359 to be legitimate. If we find one, return the new, valid address.
7360 This is used from only one place: `memory_address' in explow.c.
7361
7362 OLDX is the address as it was before break_out_memory_refs was
7363 called. In some cases it is useful to look at this to decide what
7364 needs to be done.
7365
7366 It is always safe for this function to do nothing. It exists to
7367 recognize opportunities to optimize the output.
7368
7369 On RS/6000, first check for the sum of a register with a constant
7370 integer that is out of range. If so, generate code to add the
7371 constant with the low-order 16 bits masked to the register and force
7372 this result into another register (this can be done with `cau').
7373 Then generate an address of REG+(CONST&0xffff), allowing for the
7374 possibility of bit 16 being a one.
7375
7376 Then check for the sum of a register and something not constant, try to
7377 load the other things into a register and return the sum. */
7378
7379 static rtx
7380 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
7381 machine_mode mode)
7382 {
7383 unsigned int extra;
7384
7385 if (!reg_offset_addressing_ok_p (mode))
7386 {
7387 if (virtual_stack_registers_memory_p (x))
7388 return x;
7389
7390 /* In theory we should not be seeing addresses of the form reg+0,
7391 but just in case it is generated, optimize it away. */
7392 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
7393 return force_reg (Pmode, XEXP (x, 0));
7394
7395 /* For TImode with load/store quad, restrict addresses to just a single
7396 pointer, so it works with both GPRs and VSX registers. */
7397 /* Make sure both operands are registers. */
7398 else if (GET_CODE (x) == PLUS
7399 && (mode != TImode || !TARGET_QUAD_MEMORY))
7400 return gen_rtx_PLUS (Pmode,
7401 force_reg (Pmode, XEXP (x, 0)),
7402 force_reg (Pmode, XEXP (x, 1)));
7403 else
7404 return force_reg (Pmode, x);
7405 }
7406 if (GET_CODE (x) == SYMBOL_REF)
7407 {
7408 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
7409 if (model != 0)
7410 return rs6000_legitimize_tls_address (x, model);
7411 }
7412
7413 extra = 0;
7414 switch (mode)
7415 {
7416 case TFmode:
7417 case TDmode:
7418 case TImode:
7419 case PTImode:
7420 case IFmode:
7421 case KFmode:
7422 /* As in legitimate_offset_address_p we do not assume
7423 worst-case. The mode here is just a hint as to the registers
7424 used. A TImode is usually in gprs, but may actually be in
7425 fprs. Leave worst-case scenario for reload to handle via
7426 insn constraints. PTImode is only GPRs. */
7427 extra = 8;
7428 break;
7429 default:
7430 break;
7431 }
7432
7433 if (GET_CODE (x) == PLUS
7434 && GET_CODE (XEXP (x, 0)) == REG
7435 && GET_CODE (XEXP (x, 1)) == CONST_INT
7436 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
7437 >= 0x10000 - extra)
7438 && !(SPE_VECTOR_MODE (mode)
7439 || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)))
7440 {
7441 HOST_WIDE_INT high_int, low_int;
7442 rtx sum;
7443 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
7444 if (low_int >= 0x8000 - extra)
7445 low_int = 0;
7446 high_int = INTVAL (XEXP (x, 1)) - low_int;
7447 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
7448 GEN_INT (high_int)), 0);
7449 return plus_constant (Pmode, sum, low_int);
7450 }
7451 else if (GET_CODE (x) == PLUS
7452 && GET_CODE (XEXP (x, 0)) == REG
7453 && GET_CODE (XEXP (x, 1)) != CONST_INT
7454 && GET_MODE_NUNITS (mode) == 1
7455 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
7456 || (/* ??? Assume floating point reg based on mode? */
7457 (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
7458 && (mode == DFmode || mode == DDmode)))
7459 && !avoiding_indexed_address_p (mode))
7460 {
7461 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
7462 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
7463 }
7464 else if (SPE_VECTOR_MODE (mode)
7465 || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD))
7466 {
7467 if (mode == DImode)
7468 return x;
7469 /* We accept [reg + reg] and [reg + OFFSET]. */
7470
7471 if (GET_CODE (x) == PLUS)
7472 {
7473 rtx op1 = XEXP (x, 0);
7474 rtx op2 = XEXP (x, 1);
7475 rtx y;
7476
7477 op1 = force_reg (Pmode, op1);
7478
7479 if (GET_CODE (op2) != REG
7480 && (GET_CODE (op2) != CONST_INT
7481 || !SPE_CONST_OFFSET_OK (INTVAL (op2))
7482 || (GET_MODE_SIZE (mode) > 8
7483 && !SPE_CONST_OFFSET_OK (INTVAL (op2) + 8))))
7484 op2 = force_reg (Pmode, op2);
7485
7486 /* We can't always do [reg + reg] for these, because [reg +
7487 reg + offset] is not a legitimate addressing mode. */
7488 y = gen_rtx_PLUS (Pmode, op1, op2);
7489
7490 if ((GET_MODE_SIZE (mode) > 8 || mode == DDmode) && REG_P (op2))
7491 return force_reg (Pmode, y);
7492 else
7493 return y;
7494 }
7495
7496 return force_reg (Pmode, x);
7497 }
7498 else if ((TARGET_ELF
7499 #if TARGET_MACHO
7500 || !MACHO_DYNAMIC_NO_PIC_P
7501 #endif
7502 )
7503 && TARGET_32BIT
7504 && TARGET_NO_TOC
7505 && ! flag_pic
7506 && GET_CODE (x) != CONST_INT
7507 && GET_CODE (x) != CONST_WIDE_INT
7508 && GET_CODE (x) != CONST_DOUBLE
7509 && CONSTANT_P (x)
7510 && GET_MODE_NUNITS (mode) == 1
7511 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
7512 || (/* ??? Assume floating point reg based on mode? */
7513 (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
7514 && (mode == DFmode || mode == DDmode))))
7515 {
7516 rtx reg = gen_reg_rtx (Pmode);
7517 if (TARGET_ELF)
7518 emit_insn (gen_elf_high (reg, x));
7519 else
7520 emit_insn (gen_macho_high (reg, x));
7521 return gen_rtx_LO_SUM (Pmode, reg, x);
7522 }
7523 else if (TARGET_TOC
7524 && GET_CODE (x) == SYMBOL_REF
7525 && constant_pool_expr_p (x)
7526 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
7527 return create_TOC_reference (x, NULL_RTX);
7528 else
7529 return x;
7530 }
7531
7532 /* Debug version of rs6000_legitimize_address. */
7533 static rtx
7534 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
7535 {
7536 rtx ret;
7537 rtx_insn *insns;
7538
7539 start_sequence ();
7540 ret = rs6000_legitimize_address (x, oldx, mode);
7541 insns = get_insns ();
7542 end_sequence ();
7543
7544 if (ret != x)
7545 {
7546 fprintf (stderr,
7547 "\nrs6000_legitimize_address: mode %s, old code %s, "
7548 "new code %s, modified\n",
7549 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
7550 GET_RTX_NAME (GET_CODE (ret)));
7551
7552 fprintf (stderr, "Original address:\n");
7553 debug_rtx (x);
7554
7555 fprintf (stderr, "oldx:\n");
7556 debug_rtx (oldx);
7557
7558 fprintf (stderr, "New address:\n");
7559 debug_rtx (ret);
7560
7561 if (insns)
7562 {
7563 fprintf (stderr, "Insns added:\n");
7564 debug_rtx_list (insns, 20);
7565 }
7566 }
7567 else
7568 {
7569 fprintf (stderr,
7570 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
7571 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
7572
7573 debug_rtx (x);
7574 }
7575
7576 if (insns)
7577 emit_insn (insns);
7578
7579 return ret;
7580 }
7581
7582 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7583 We need to emit DTP-relative relocations. */
7584
7585 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
7586 static void
7587 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
7588 {
7589 switch (size)
7590 {
7591 case 4:
7592 fputs ("\t.long\t", file);
7593 break;
7594 case 8:
7595 fputs (DOUBLE_INT_ASM_OP, file);
7596 break;
7597 default:
7598 gcc_unreachable ();
7599 }
7600 output_addr_const (file, x);
7601 if (TARGET_ELF)
7602 fputs ("@dtprel+0x8000", file);
7603 else if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF)
7604 {
7605 switch (SYMBOL_REF_TLS_MODEL (x))
7606 {
7607 case 0:
7608 break;
7609 case TLS_MODEL_LOCAL_EXEC:
7610 fputs ("@le", file);
7611 break;
7612 case TLS_MODEL_INITIAL_EXEC:
7613 fputs ("@ie", file);
7614 break;
7615 case TLS_MODEL_GLOBAL_DYNAMIC:
7616 case TLS_MODEL_LOCAL_DYNAMIC:
7617 fputs ("@m", file);
7618 break;
7619 default:
7620 gcc_unreachable ();
7621 }
7622 }
7623 }
7624
7625 /* Return true if X is a symbol that refers to real (rather than emulated)
7626 TLS. */
7627
7628 static bool
7629 rs6000_real_tls_symbol_ref_p (rtx x)
7630 {
7631 return (GET_CODE (x) == SYMBOL_REF
7632 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
7633 }
7634
7635 /* In the name of slightly smaller debug output, and to cater to
7636 general assembler lossage, recognize various UNSPEC sequences
7637 and turn them back into a direct symbol reference. */
7638
7639 static rtx
7640 rs6000_delegitimize_address (rtx orig_x)
7641 {
7642 rtx x, y, offset;
7643
7644 orig_x = delegitimize_mem_from_attrs (orig_x);
7645 x = orig_x;
7646 if (MEM_P (x))
7647 x = XEXP (x, 0);
7648
7649 y = x;
7650 if (TARGET_CMODEL != CMODEL_SMALL
7651 && GET_CODE (y) == LO_SUM)
7652 y = XEXP (y, 1);
7653
7654 offset = NULL_RTX;
7655 if (GET_CODE (y) == PLUS
7656 && GET_MODE (y) == Pmode
7657 && CONST_INT_P (XEXP (y, 1)))
7658 {
7659 offset = XEXP (y, 1);
7660 y = XEXP (y, 0);
7661 }
7662
7663 if (GET_CODE (y) == UNSPEC
7664 && XINT (y, 1) == UNSPEC_TOCREL)
7665 {
7666 y = XVECEXP (y, 0, 0);
7667
7668 #ifdef HAVE_AS_TLS
7669 /* Do not associate thread-local symbols with the original
7670 constant pool symbol. */
7671 if (TARGET_XCOFF
7672 && GET_CODE (y) == SYMBOL_REF
7673 && CONSTANT_POOL_ADDRESS_P (y)
7674 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
7675 return orig_x;
7676 #endif
7677
7678 if (offset != NULL_RTX)
7679 y = gen_rtx_PLUS (Pmode, y, offset);
7680 if (!MEM_P (orig_x))
7681 return y;
7682 else
7683 return replace_equiv_address_nv (orig_x, y);
7684 }
7685
7686 if (TARGET_MACHO
7687 && GET_CODE (orig_x) == LO_SUM
7688 && GET_CODE (XEXP (orig_x, 1)) == CONST)
7689 {
7690 y = XEXP (XEXP (orig_x, 1), 0);
7691 if (GET_CODE (y) == UNSPEC
7692 && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
7693 return XVECEXP (y, 0, 0);
7694 }
7695
7696 return orig_x;
7697 }
7698
7699 /* Return true if X shouldn't be emitted into the debug info.
7700 The linker doesn't like .toc section references from
7701 .debug_* sections, so reject .toc section symbols. */
7702
7703 static bool
7704 rs6000_const_not_ok_for_debug_p (rtx x)
7705 {
7706 if (GET_CODE (x) == SYMBOL_REF
7707 && CONSTANT_POOL_ADDRESS_P (x))
7708 {
7709 rtx c = get_pool_constant (x);
7710 machine_mode cmode = get_pool_mode (x);
7711 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
7712 return true;
7713 }
7714
7715 return false;
7716 }
7717
7718 /* Construct the SYMBOL_REF for the tls_get_addr function. */
7719
7720 static GTY(()) rtx rs6000_tls_symbol;
7721 static rtx
7722 rs6000_tls_get_addr (void)
7723 {
7724 if (!rs6000_tls_symbol)
7725 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
7726
7727 return rs6000_tls_symbol;
7728 }
7729
7730 /* Construct the SYMBOL_REF for TLS GOT references. */
7731
7732 static GTY(()) rtx rs6000_got_symbol;
7733 static rtx
7734 rs6000_got_sym (void)
7735 {
7736 if (!rs6000_got_symbol)
7737 {
7738 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
7739 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
7740 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
7741 }
7742
7743 return rs6000_got_symbol;
7744 }
7745
7746 /* AIX Thread-Local Address support. */
7747
7748 static rtx
7749 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
7750 {
7751 rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr;
7752 const char *name;
7753 char *tlsname;
7754
7755 name = XSTR (addr, 0);
7756 /* Append TLS CSECT qualifier, unless the symbol already is qualified
7757 or the symbol will be in TLS private data section. */
7758 if (name[strlen (name) - 1] != ']'
7759 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr))
7760 || bss_initializer_p (SYMBOL_REF_DECL (addr))))
7761 {
7762 tlsname = XALLOCAVEC (char, strlen (name) + 4);
7763 strcpy (tlsname, name);
7764 strcat (tlsname,
7765 bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]");
7766 tlsaddr = copy_rtx (addr);
7767 XSTR (tlsaddr, 0) = ggc_strdup (tlsname);
7768 }
7769 else
7770 tlsaddr = addr;
7771
7772 /* Place addr into TOC constant pool. */
7773 sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr);
7774
7775 /* Output the TOC entry and create the MEM referencing the value. */
7776 if (constant_pool_expr_p (XEXP (sym, 0))
7777 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
7778 {
7779 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
7780 mem = gen_const_mem (Pmode, tocref);
7781 set_mem_alias_set (mem, get_TOC_alias_set ());
7782 }
7783 else
7784 return sym;
7785
7786 /* Use global-dynamic for local-dynamic. */
7787 if (model == TLS_MODEL_GLOBAL_DYNAMIC
7788 || model == TLS_MODEL_LOCAL_DYNAMIC)
7789 {
7790 /* Create new TOC reference for @m symbol. */
7791 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
7792 tlsname = XALLOCAVEC (char, strlen (name) + 1);
7793 strcpy (tlsname, "*LCM");
7794 strcat (tlsname, name + 3);
7795 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
7796 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
7797 tocref = create_TOC_reference (modaddr, NULL_RTX);
7798 rtx modmem = gen_const_mem (Pmode, tocref);
7799 set_mem_alias_set (modmem, get_TOC_alias_set ());
7800
7801 rtx modreg = gen_reg_rtx (Pmode);
7802 emit_insn (gen_rtx_SET (modreg, modmem));
7803
7804 tmpreg = gen_reg_rtx (Pmode);
7805 emit_insn (gen_rtx_SET (tmpreg, mem));
7806
7807 dest = gen_reg_rtx (Pmode);
7808 if (TARGET_32BIT)
7809 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
7810 else
7811 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
7812 return dest;
7813 }
7814 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
7815 else if (TARGET_32BIT)
7816 {
7817 tlsreg = gen_reg_rtx (SImode);
7818 emit_insn (gen_tls_get_tpointer (tlsreg));
7819 }
7820 else
7821 tlsreg = gen_rtx_REG (DImode, 13);
7822
7823 /* Load the TOC value into temporary register. */
7824 tmpreg = gen_reg_rtx (Pmode);
7825 emit_insn (gen_rtx_SET (tmpreg, mem));
7826 set_unique_reg_note (get_last_insn (), REG_EQUAL,
7827 gen_rtx_MINUS (Pmode, addr, tlsreg));
7828
7829 /* Add TOC symbol value to TLS pointer. */
7830 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
7831
7832 return dest;
7833 }
7834
7835 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
7836 this (thread-local) address. */
7837
7838 static rtx
7839 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
7840 {
7841 rtx dest, insn;
7842
7843 if (TARGET_XCOFF)
7844 return rs6000_legitimize_tls_address_aix (addr, model);
7845
7846 dest = gen_reg_rtx (Pmode);
7847 if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 16)
7848 {
7849 rtx tlsreg;
7850
7851 if (TARGET_64BIT)
7852 {
7853 tlsreg = gen_rtx_REG (Pmode, 13);
7854 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
7855 }
7856 else
7857 {
7858 tlsreg = gen_rtx_REG (Pmode, 2);
7859 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
7860 }
7861 emit_insn (insn);
7862 }
7863 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
7864 {
7865 rtx tlsreg, tmp;
7866
7867 tmp = gen_reg_rtx (Pmode);
7868 if (TARGET_64BIT)
7869 {
7870 tlsreg = gen_rtx_REG (Pmode, 13);
7871 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
7872 }
7873 else
7874 {
7875 tlsreg = gen_rtx_REG (Pmode, 2);
7876 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
7877 }
7878 emit_insn (insn);
7879 if (TARGET_64BIT)
7880 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
7881 else
7882 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
7883 emit_insn (insn);
7884 }
7885 else
7886 {
7887 rtx r3, got, tga, tmp1, tmp2, call_insn;
7888
7889 /* We currently use relocations like @got@tlsgd for tls, which
7890 means the linker will handle allocation of tls entries, placing
7891 them in the .got section. So use a pointer to the .got section,
7892 not one to secondary TOC sections used by 64-bit -mminimal-toc,
7893 or to secondary GOT sections used by 32-bit -fPIC. */
7894 if (TARGET_64BIT)
7895 got = gen_rtx_REG (Pmode, 2);
7896 else
7897 {
7898 if (flag_pic == 1)
7899 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
7900 else
7901 {
7902 rtx gsym = rs6000_got_sym ();
7903 got = gen_reg_rtx (Pmode);
7904 if (flag_pic == 0)
7905 rs6000_emit_move (got, gsym, Pmode);
7906 else
7907 {
7908 rtx mem, lab, last;
7909
7910 tmp1 = gen_reg_rtx (Pmode);
7911 tmp2 = gen_reg_rtx (Pmode);
7912 mem = gen_const_mem (Pmode, tmp1);
7913 lab = gen_label_rtx ();
7914 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
7915 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
7916 if (TARGET_LINK_STACK)
7917 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
7918 emit_move_insn (tmp2, mem);
7919 last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
7920 set_unique_reg_note (last, REG_EQUAL, gsym);
7921 }
7922 }
7923 }
7924
7925 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
7926 {
7927 tga = rs6000_tls_get_addr ();
7928 emit_library_call_value (tga, dest, LCT_CONST, Pmode,
7929 1, const0_rtx, Pmode);
7930
7931 r3 = gen_rtx_REG (Pmode, 3);
7932 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
7933 {
7934 if (TARGET_64BIT)
7935 insn = gen_tls_gd_aix64 (r3, got, addr, tga, const0_rtx);
7936 else
7937 insn = gen_tls_gd_aix32 (r3, got, addr, tga, const0_rtx);
7938 }
7939 else if (DEFAULT_ABI == ABI_V4)
7940 insn = gen_tls_gd_sysvsi (r3, got, addr, tga, const0_rtx);
7941 else
7942 gcc_unreachable ();
7943 call_insn = last_call_insn ();
7944 PATTERN (call_insn) = insn;
7945 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
7946 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
7947 pic_offset_table_rtx);
7948 }
7949 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
7950 {
7951 tga = rs6000_tls_get_addr ();
7952 tmp1 = gen_reg_rtx (Pmode);
7953 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode,
7954 1, const0_rtx, Pmode);
7955
7956 r3 = gen_rtx_REG (Pmode, 3);
7957 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
7958 {
7959 if (TARGET_64BIT)
7960 insn = gen_tls_ld_aix64 (r3, got, tga, const0_rtx);
7961 else
7962 insn = gen_tls_ld_aix32 (r3, got, tga, const0_rtx);
7963 }
7964 else if (DEFAULT_ABI == ABI_V4)
7965 insn = gen_tls_ld_sysvsi (r3, got, tga, const0_rtx);
7966 else
7967 gcc_unreachable ();
7968 call_insn = last_call_insn ();
7969 PATTERN (call_insn) = insn;
7970 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
7971 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
7972 pic_offset_table_rtx);
7973
7974 if (rs6000_tls_size == 16)
7975 {
7976 if (TARGET_64BIT)
7977 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
7978 else
7979 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
7980 }
7981 else if (rs6000_tls_size == 32)
7982 {
7983 tmp2 = gen_reg_rtx (Pmode);
7984 if (TARGET_64BIT)
7985 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
7986 else
7987 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
7988 emit_insn (insn);
7989 if (TARGET_64BIT)
7990 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
7991 else
7992 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
7993 }
7994 else
7995 {
7996 tmp2 = gen_reg_rtx (Pmode);
7997 if (TARGET_64BIT)
7998 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
7999 else
8000 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
8001 emit_insn (insn);
8002 insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
8003 }
8004 emit_insn (insn);
8005 }
8006 else
8007 {
8008 /* IE, or 64-bit offset LE. */
8009 tmp2 = gen_reg_rtx (Pmode);
8010 if (TARGET_64BIT)
8011 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
8012 else
8013 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
8014 emit_insn (insn);
8015 if (TARGET_64BIT)
8016 insn = gen_tls_tls_64 (dest, tmp2, addr);
8017 else
8018 insn = gen_tls_tls_32 (dest, tmp2, addr);
8019 emit_insn (insn);
8020 }
8021 }
8022
8023 return dest;
8024 }
8025
8026 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8027
8028 static bool
8029 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8030 {
8031 if (GET_CODE (x) == HIGH
8032 && GET_CODE (XEXP (x, 0)) == UNSPEC)
8033 return true;
8034
8035 /* A TLS symbol in the TOC cannot contain a sum. */
8036 if (GET_CODE (x) == CONST
8037 && GET_CODE (XEXP (x, 0)) == PLUS
8038 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
8039 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
8040 return true;
8041
8042 /* Do not place an ELF TLS symbol in the constant pool. */
8043 return TARGET_ELF && tls_referenced_p (x);
8044 }
8045
8046 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
8047 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
8048 can be addressed relative to the toc pointer. */
8049
8050 static bool
8051 use_toc_relative_ref (rtx sym, machine_mode mode)
8052 {
8053 return ((constant_pool_expr_p (sym)
8054 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
8055 get_pool_mode (sym)))
8056 || (TARGET_CMODEL == CMODEL_MEDIUM
8057 && SYMBOL_REF_LOCAL_P (sym)
8058 && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
8059 }
8060
8061 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
8062 replace the input X, or the original X if no replacement is called for.
8063 The output parameter *WIN is 1 if the calling macro should goto WIN,
8064 0 if it should not.
8065
8066 For RS/6000, we wish to handle large displacements off a base
8067 register by splitting the addend across an addiu/addis and the mem insn.
8068 This cuts number of extra insns needed from 3 to 1.
8069
8070 On Darwin, we use this to generate code for floating point constants.
8071 A movsf_low is generated so we wind up with 2 instructions rather than 3.
8072 The Darwin code is inside #if TARGET_MACHO because only then are the
8073 machopic_* functions defined. */
8074 static rtx
8075 rs6000_legitimize_reload_address (rtx x, machine_mode mode,
8076 int opnum, int type,
8077 int ind_levels ATTRIBUTE_UNUSED, int *win)
8078 {
8079 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
8080
8081 /* Nasty hack for vsx_splat_V2DF/V2DI load from mem, which takes a
8082 DFmode/DImode MEM. */
8083 if (reg_offset_p
8084 && opnum == 1
8085 && ((mode == DFmode && recog_data.operand_mode[0] == V2DFmode)
8086 || (mode == DImode && recog_data.operand_mode[0] == V2DImode)))
8087 reg_offset_p = false;
8088
8089 /* We must recognize output that we have already generated ourselves. */
8090 if (GET_CODE (x) == PLUS
8091 && GET_CODE (XEXP (x, 0)) == PLUS
8092 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
8093 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
8094 && GET_CODE (XEXP (x, 1)) == CONST_INT)
8095 {
8096 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
8097 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
8098 opnum, (enum reload_type) type);
8099 *win = 1;
8100 return x;
8101 }
8102
8103 /* Likewise for (lo_sum (high ...) ...) output we have generated. */
8104 if (GET_CODE (x) == LO_SUM
8105 && GET_CODE (XEXP (x, 0)) == HIGH)
8106 {
8107 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
8108 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
8109 opnum, (enum reload_type) type);
8110 *win = 1;
8111 return x;
8112 }
8113
8114 #if TARGET_MACHO
8115 if (DEFAULT_ABI == ABI_DARWIN && flag_pic
8116 && GET_CODE (x) == LO_SUM
8117 && GET_CODE (XEXP (x, 0)) == PLUS
8118 && XEXP (XEXP (x, 0), 0) == pic_offset_table_rtx
8119 && GET_CODE (XEXP (XEXP (x, 0), 1)) == HIGH
8120 && XEXP (XEXP (XEXP (x, 0), 1), 0) == XEXP (x, 1)
8121 && machopic_operand_p (XEXP (x, 1)))
8122 {
8123 /* Result of previous invocation of this function on Darwin
8124 floating point constant. */
8125 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
8126 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
8127 opnum, (enum reload_type) type);
8128 *win = 1;
8129 return x;
8130 }
8131 #endif
8132
8133 if (TARGET_CMODEL != CMODEL_SMALL
8134 && reg_offset_p
8135 && small_toc_ref (x, VOIDmode))
8136 {
8137 rtx hi = gen_rtx_HIGH (Pmode, copy_rtx (x));
8138 x = gen_rtx_LO_SUM (Pmode, hi, x);
8139 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
8140 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
8141 opnum, (enum reload_type) type);
8142 *win = 1;
8143 return x;
8144 }
8145
8146 if (GET_CODE (x) == PLUS
8147 && GET_CODE (XEXP (x, 0)) == REG
8148 && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
8149 && INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 1)
8150 && GET_CODE (XEXP (x, 1)) == CONST_INT
8151 && reg_offset_p
8152 && !SPE_VECTOR_MODE (mode)
8153 && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
8154 && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode)))
8155 {
8156 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
8157 HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
8158 HOST_WIDE_INT high
8159 = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
8160
8161 /* Check for 32-bit overflow. */
8162 if (high + low != val)
8163 {
8164 *win = 0;
8165 return x;
8166 }
8167
8168 /* Reload the high part into a base reg; leave the low part
8169 in the mem directly. */
8170
8171 x = gen_rtx_PLUS (GET_MODE (x),
8172 gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
8173 GEN_INT (high)),
8174 GEN_INT (low));
8175
8176 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
8177 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
8178 opnum, (enum reload_type) type);
8179 *win = 1;
8180 return x;
8181 }
8182
8183 if (GET_CODE (x) == SYMBOL_REF
8184 && reg_offset_p
8185 && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode))
8186 && !SPE_VECTOR_MODE (mode)
8187 #if TARGET_MACHO
8188 && DEFAULT_ABI == ABI_DARWIN
8189 && (flag_pic || MACHO_DYNAMIC_NO_PIC_P)
8190 && machopic_symbol_defined_p (x)
8191 #else
8192 && DEFAULT_ABI == ABI_V4
8193 && !flag_pic
8194 #endif
8195 /* Don't do this for TFmode or TDmode, since the result isn't offsettable.
8196 The same goes for DImode without 64-bit gprs and DFmode and DDmode
8197 without fprs.
8198 ??? Assume floating point reg based on mode? This assumption is
8199 violated by eg. powerpc-linux -m32 compile of gcc.dg/pr28796-2.c
8200 where reload ends up doing a DFmode load of a constant from
8201 mem using two gprs. Unfortunately, at this point reload
8202 hasn't yet selected regs so poking around in reload data
8203 won't help and even if we could figure out the regs reliably,
8204 we'd still want to allow this transformation when the mem is
8205 naturally aligned. Since we say the address is good here, we
8206 can't disable offsets from LO_SUMs in mem_operand_gpr.
8207 FIXME: Allow offset from lo_sum for other modes too, when
8208 mem is sufficiently aligned.
8209
8210 Also disallow this if the type can go in VMX/Altivec registers, since
8211 those registers do not have d-form (reg+offset) address modes. */
8212 && !reg_addr[mode].scalar_in_vmx_p
8213 && mode != TFmode
8214 && mode != TDmode
8215 && mode != IFmode
8216 && mode != KFmode
8217 && (mode != TImode || !TARGET_VSX_TIMODE)
8218 && mode != PTImode
8219 && (mode != DImode || TARGET_POWERPC64)
8220 && ((mode != DFmode && mode != DDmode) || TARGET_POWERPC64
8221 || (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)))
8222 {
8223 #if TARGET_MACHO
8224 if (flag_pic)
8225 {
8226 rtx offset = machopic_gen_offset (x);
8227 x = gen_rtx_LO_SUM (GET_MODE (x),
8228 gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
8229 gen_rtx_HIGH (Pmode, offset)), offset);
8230 }
8231 else
8232 #endif
8233 x = gen_rtx_LO_SUM (GET_MODE (x),
8234 gen_rtx_HIGH (Pmode, x), x);
8235
8236 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
8237 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
8238 opnum, (enum reload_type) type);
8239 *win = 1;
8240 return x;
8241 }
8242
8243 /* Reload an offset address wrapped by an AND that represents the
8244 masking of the lower bits. Strip the outer AND and let reload
8245 convert the offset address into an indirect address. For VSX,
8246 force reload to create the address with an AND in a separate
8247 register, because we can't guarantee an altivec register will
8248 be used. */
8249 if (VECTOR_MEM_ALTIVEC_P (mode)
8250 && GET_CODE (x) == AND
8251 && GET_CODE (XEXP (x, 0)) == PLUS
8252 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
8253 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
8254 && GET_CODE (XEXP (x, 1)) == CONST_INT
8255 && INTVAL (XEXP (x, 1)) == -16)
8256 {
8257 x = XEXP (x, 0);
8258 *win = 1;
8259 return x;
8260 }
8261
8262 if (TARGET_TOC
8263 && reg_offset_p
8264 && GET_CODE (x) == SYMBOL_REF
8265 && use_toc_relative_ref (x, mode))
8266 {
8267 x = create_TOC_reference (x, NULL_RTX);
8268 if (TARGET_CMODEL != CMODEL_SMALL)
8269 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
8270 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
8271 opnum, (enum reload_type) type);
8272 *win = 1;
8273 return x;
8274 }
8275 *win = 0;
8276 return x;
8277 }
8278
8279 /* Debug version of rs6000_legitimize_reload_address. */
8280 static rtx
8281 rs6000_debug_legitimize_reload_address (rtx x, machine_mode mode,
8282 int opnum, int type,
8283 int ind_levels, int *win)
8284 {
8285 rtx ret = rs6000_legitimize_reload_address (x, mode, opnum, type,
8286 ind_levels, win);
8287 fprintf (stderr,
8288 "\nrs6000_legitimize_reload_address: mode = %s, opnum = %d, "
8289 "type = %d, ind_levels = %d, win = %d, original addr:\n",
8290 GET_MODE_NAME (mode), opnum, type, ind_levels, *win);
8291 debug_rtx (x);
8292
8293 if (x == ret)
8294 fprintf (stderr, "Same address returned\n");
8295 else if (!ret)
8296 fprintf (stderr, "NULL returned\n");
8297 else
8298 {
8299 fprintf (stderr, "New address:\n");
8300 debug_rtx (ret);
8301 }
8302
8303 return ret;
8304 }
8305
8306 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
8307 that is a valid memory address for an instruction.
8308 The MODE argument is the machine mode for the MEM expression
8309 that wants to use this address.
8310
8311 On the RS/6000, there are four valid address: a SYMBOL_REF that
8312 refers to a constant pool entry of an address (or the sum of it
8313 plus a constant), a short (16-bit signed) constant plus a register,
8314 the sum of two registers, or a register indirect, possibly with an
8315 auto-increment. For DFmode, DDmode and DImode with a constant plus
8316 register, we must ensure that both words are addressable or PowerPC64
8317 with offset word aligned.
8318
8319 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
8320 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
8321 because adjacent memory cells are accessed by adding word-sized offsets
8322 during assembly output. */
8323 static bool
8324 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
8325 {
8326 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
8327
8328 /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
8329 if (VECTOR_MEM_ALTIVEC_P (mode)
8330 && GET_CODE (x) == AND
8331 && GET_CODE (XEXP (x, 1)) == CONST_INT
8332 && INTVAL (XEXP (x, 1)) == -16)
8333 x = XEXP (x, 0);
8334
8335 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
8336 return 0;
8337 if (legitimate_indirect_address_p (x, reg_ok_strict))
8338 return 1;
8339 if (TARGET_UPDATE
8340 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
8341 && mode_supports_pre_incdec_p (mode)
8342 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
8343 return 1;
8344 if (virtual_stack_registers_memory_p (x))
8345 return 1;
8346 if (reg_offset_p && legitimate_small_data_p (mode, x))
8347 return 1;
8348 if (reg_offset_p
8349 && legitimate_constant_pool_address_p (x, mode,
8350 reg_ok_strict || lra_in_progress))
8351 return 1;
8352 if (reg_offset_p && reg_addr[mode].fused_toc && toc_fusion_mem_wrapped (x, mode))
8353 return 1;
8354 /* For TImode, if we have load/store quad and TImode in VSX registers, only
8355 allow register indirect addresses. This will allow the values to go in
8356 either GPRs or VSX registers without reloading. The vector types would
8357 tend to go into VSX registers, so we allow REG+REG, while TImode seems
8358 somewhat split, in that some uses are GPR based, and some VSX based. */
8359 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX_TIMODE)
8360 return 0;
8361 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
8362 if (! reg_ok_strict
8363 && reg_offset_p
8364 && GET_CODE (x) == PLUS
8365 && GET_CODE (XEXP (x, 0)) == REG
8366 && (XEXP (x, 0) == virtual_stack_vars_rtx
8367 || XEXP (x, 0) == arg_pointer_rtx)
8368 && GET_CODE (XEXP (x, 1)) == CONST_INT)
8369 return 1;
8370 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
8371 return 1;
8372 if (!FLOAT128_2REG_P (mode)
8373 && ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
8374 || TARGET_POWERPC64
8375 || (mode != DFmode && mode != DDmode)
8376 || (TARGET_E500_DOUBLE && mode != DDmode))
8377 && (TARGET_POWERPC64 || mode != DImode)
8378 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
8379 && mode != PTImode
8380 && !avoiding_indexed_address_p (mode)
8381 && legitimate_indexed_address_p (x, reg_ok_strict))
8382 return 1;
8383 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
8384 && mode_supports_pre_modify_p (mode)
8385 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
8386 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
8387 reg_ok_strict, false)
8388 || (!avoiding_indexed_address_p (mode)
8389 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
8390 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8391 return 1;
8392 if (reg_offset_p && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
8393 return 1;
8394 return 0;
8395 }
8396
8397 /* Debug version of rs6000_legitimate_address_p. */
8398 static bool
8399 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
8400 bool reg_ok_strict)
8401 {
8402 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
8403 fprintf (stderr,
8404 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
8405 "strict = %d, reload = %s, code = %s\n",
8406 ret ? "true" : "false",
8407 GET_MODE_NAME (mode),
8408 reg_ok_strict,
8409 (reload_completed
8410 ? "after"
8411 : (reload_in_progress ? "progress" : "before")),
8412 GET_RTX_NAME (GET_CODE (x)));
8413 debug_rtx (x);
8414
8415 return ret;
8416 }
8417
8418 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
8419
8420 static bool
8421 rs6000_mode_dependent_address_p (const_rtx addr,
8422 addr_space_t as ATTRIBUTE_UNUSED)
8423 {
8424 return rs6000_mode_dependent_address_ptr (addr);
8425 }
8426
8427 /* Go to LABEL if ADDR (a legitimate address expression)
8428 has an effect that depends on the machine mode it is used for.
8429
8430 On the RS/6000 this is true of all integral offsets (since AltiVec
8431 and VSX modes don't allow them) or is a pre-increment or decrement.
8432
8433 ??? Except that due to conceptual problems in offsettable_address_p
8434 we can't really report the problems of integral offsets. So leave
8435 this assuming that the adjustable offset must be valid for the
8436 sub-words of a TFmode operand, which is what we had before. */
8437
8438 static bool
8439 rs6000_mode_dependent_address (const_rtx addr)
8440 {
8441 switch (GET_CODE (addr))
8442 {
8443 case PLUS:
8444 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
8445 is considered a legitimate address before reload, so there
8446 are no offset restrictions in that case. Note that this
8447 condition is safe in strict mode because any address involving
8448 virtual_stack_vars_rtx or arg_pointer_rtx would already have
8449 been rejected as illegitimate. */
8450 if (XEXP (addr, 0) != virtual_stack_vars_rtx
8451 && XEXP (addr, 0) != arg_pointer_rtx
8452 && GET_CODE (XEXP (addr, 1)) == CONST_INT)
8453 {
8454 unsigned HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
8455 return val + 0x8000 >= 0x10000 - (TARGET_POWERPC64 ? 8 : 12);
8456 }
8457 break;
8458
8459 case LO_SUM:
8460 /* Anything in the constant pool is sufficiently aligned that
8461 all bytes have the same high part address. */
8462 return !legitimate_constant_pool_address_p (addr, QImode, false);
8463
8464 /* Auto-increment cases are now treated generically in recog.c. */
8465 case PRE_MODIFY:
8466 return TARGET_UPDATE;
8467
8468 /* AND is only allowed in Altivec loads. */
8469 case AND:
8470 return true;
8471
8472 default:
8473 break;
8474 }
8475
8476 return false;
8477 }
8478
8479 /* Debug version of rs6000_mode_dependent_address. */
8480 static bool
8481 rs6000_debug_mode_dependent_address (const_rtx addr)
8482 {
8483 bool ret = rs6000_mode_dependent_address (addr);
8484
8485 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
8486 ret ? "true" : "false");
8487 debug_rtx (addr);
8488
8489 return ret;
8490 }
8491
8492 /* Implement FIND_BASE_TERM. */
8493
8494 rtx
8495 rs6000_find_base_term (rtx op)
8496 {
8497 rtx base;
8498
8499 base = op;
8500 if (GET_CODE (base) == CONST)
8501 base = XEXP (base, 0);
8502 if (GET_CODE (base) == PLUS)
8503 base = XEXP (base, 0);
8504 if (GET_CODE (base) == UNSPEC)
8505 switch (XINT (base, 1))
8506 {
8507 case UNSPEC_TOCREL:
8508 case UNSPEC_MACHOPIC_OFFSET:
8509 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
8510 for aliasing purposes. */
8511 return XVECEXP (base, 0, 0);
8512 }
8513
8514 return op;
8515 }
8516
8517 /* More elaborate version of recog's offsettable_memref_p predicate
8518 that works around the ??? note of rs6000_mode_dependent_address.
8519 In particular it accepts
8520
8521 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
8522
8523 in 32-bit mode, that the recog predicate rejects. */
8524
8525 static bool
8526 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode)
8527 {
8528 bool worst_case;
8529
8530 if (!MEM_P (op))
8531 return false;
8532
8533 /* First mimic offsettable_memref_p. */
8534 if (offsettable_address_p (true, GET_MODE (op), XEXP (op, 0)))
8535 return true;
8536
8537 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
8538 the latter predicate knows nothing about the mode of the memory
8539 reference and, therefore, assumes that it is the largest supported
8540 mode (TFmode). As a consequence, legitimate offsettable memory
8541 references are rejected. rs6000_legitimate_offset_address_p contains
8542 the correct logic for the PLUS case of rs6000_mode_dependent_address,
8543 at least with a little bit of help here given that we know the
8544 actual registers used. */
8545 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
8546 || GET_MODE_SIZE (reg_mode) == 4);
8547 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
8548 true, worst_case);
8549 }
8550
8551 /* Change register usage conditional on target flags. */
8552 static void
8553 rs6000_conditional_register_usage (void)
8554 {
8555 int i;
8556
8557 if (TARGET_DEBUG_TARGET)
8558 fprintf (stderr, "rs6000_conditional_register_usage called\n");
8559
8560 /* Set MQ register fixed (already call_used) so that it will not be
8561 allocated. */
8562 fixed_regs[64] = 1;
8563
8564 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
8565 if (TARGET_64BIT)
8566 fixed_regs[13] = call_used_regs[13]
8567 = call_really_used_regs[13] = 1;
8568
8569 /* Conditionally disable FPRs. */
8570 if (TARGET_SOFT_FLOAT || !TARGET_FPRS)
8571 for (i = 32; i < 64; i++)
8572 fixed_regs[i] = call_used_regs[i]
8573 = call_really_used_regs[i] = 1;
8574
8575 /* The TOC register is not killed across calls in a way that is
8576 visible to the compiler. */
8577 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
8578 call_really_used_regs[2] = 0;
8579
8580 if (DEFAULT_ABI == ABI_V4
8581 && PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM
8582 && flag_pic == 2)
8583 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
8584
8585 if (DEFAULT_ABI == ABI_V4
8586 && PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM
8587 && flag_pic == 1)
8588 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
8589 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
8590 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
8591
8592 if (DEFAULT_ABI == ABI_DARWIN
8593 && PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
8594 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
8595 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
8596 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
8597
8598 if (TARGET_TOC && TARGET_MINIMAL_TOC)
8599 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
8600 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
8601
8602 if (TARGET_SPE)
8603 {
8604 global_regs[SPEFSCR_REGNO] = 1;
8605 /* We used to use r14 as FIXED_SCRATCH to address SPE 64-bit
8606 registers in prologues and epilogues. We no longer use r14
8607 for FIXED_SCRATCH, but we're keeping r14 out of the allocation
8608 pool for link-compatibility with older versions of GCC. Once
8609 "old" code has died out, we can return r14 to the allocation
8610 pool. */
8611 fixed_regs[14]
8612 = call_used_regs[14]
8613 = call_really_used_regs[14] = 1;
8614 }
8615
8616 if (!TARGET_ALTIVEC && !TARGET_VSX)
8617 {
8618 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
8619 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
8620 call_really_used_regs[VRSAVE_REGNO] = 1;
8621 }
8622
8623 if (TARGET_ALTIVEC || TARGET_VSX)
8624 global_regs[VSCR_REGNO] = 1;
8625
8626 if (TARGET_ALTIVEC_ABI)
8627 {
8628 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
8629 call_used_regs[i] = call_really_used_regs[i] = 1;
8630
8631 /* AIX reserves VR20:31 in non-extended ABI mode. */
8632 if (TARGET_XCOFF)
8633 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
8634 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
8635 }
8636 }
8637
8638 \f
8639 /* Output insns to set DEST equal to the constant SOURCE as a series of
8640 lis, ori and shl instructions and return TRUE. */
8641
8642 bool
8643 rs6000_emit_set_const (rtx dest, rtx source)
8644 {
8645 machine_mode mode = GET_MODE (dest);
8646 rtx temp, set;
8647 rtx_insn *insn;
8648 HOST_WIDE_INT c;
8649
8650 gcc_checking_assert (CONST_INT_P (source));
8651 c = INTVAL (source);
8652 switch (mode)
8653 {
8654 case QImode:
8655 case HImode:
8656 emit_insn (gen_rtx_SET (dest, source));
8657 return true;
8658
8659 case SImode:
8660 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
8661
8662 emit_insn (gen_rtx_SET (copy_rtx (temp),
8663 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
8664 emit_insn (gen_rtx_SET (dest,
8665 gen_rtx_IOR (SImode, copy_rtx (temp),
8666 GEN_INT (c & 0xffff))));
8667 break;
8668
8669 case DImode:
8670 if (!TARGET_POWERPC64)
8671 {
8672 rtx hi, lo;
8673
8674 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
8675 DImode);
8676 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
8677 DImode);
8678 emit_move_insn (hi, GEN_INT (c >> 32));
8679 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
8680 emit_move_insn (lo, GEN_INT (c));
8681 }
8682 else
8683 rs6000_emit_set_long_const (dest, c);
8684 break;
8685
8686 default:
8687 gcc_unreachable ();
8688 }
8689
8690 insn = get_last_insn ();
8691 set = single_set (insn);
8692 if (! CONSTANT_P (SET_SRC (set)))
8693 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
8694
8695 return true;
8696 }
8697
8698 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
8699 Output insns to set DEST equal to the constant C as a series of
8700 lis, ori and shl instructions. */
8701
8702 static void
8703 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
8704 {
8705 rtx temp;
8706 HOST_WIDE_INT ud1, ud2, ud3, ud4;
8707
8708 ud1 = c & 0xffff;
8709 c = c >> 16;
8710 ud2 = c & 0xffff;
8711 c = c >> 16;
8712 ud3 = c & 0xffff;
8713 c = c >> 16;
8714 ud4 = c & 0xffff;
8715
8716 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
8717 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
8718 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
8719
8720 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
8721 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
8722 {
8723 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
8724
8725 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
8726 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
8727 if (ud1 != 0)
8728 emit_move_insn (dest,
8729 gen_rtx_IOR (DImode, copy_rtx (temp),
8730 GEN_INT (ud1)));
8731 }
8732 else if (ud3 == 0 && ud4 == 0)
8733 {
8734 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
8735
8736 gcc_assert (ud2 & 0x8000);
8737 emit_move_insn (copy_rtx (temp),
8738 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
8739 if (ud1 != 0)
8740 emit_move_insn (copy_rtx (temp),
8741 gen_rtx_IOR (DImode, copy_rtx (temp),
8742 GEN_INT (ud1)));
8743 emit_move_insn (dest,
8744 gen_rtx_ZERO_EXTEND (DImode,
8745 gen_lowpart (SImode,
8746 copy_rtx (temp))));
8747 }
8748 else if ((ud4 == 0xffff && (ud3 & 0x8000))
8749 || (ud4 == 0 && ! (ud3 & 0x8000)))
8750 {
8751 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
8752
8753 emit_move_insn (copy_rtx (temp),
8754 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
8755 if (ud2 != 0)
8756 emit_move_insn (copy_rtx (temp),
8757 gen_rtx_IOR (DImode, copy_rtx (temp),
8758 GEN_INT (ud2)));
8759 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
8760 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
8761 GEN_INT (16)));
8762 if (ud1 != 0)
8763 emit_move_insn (dest,
8764 gen_rtx_IOR (DImode, copy_rtx (temp),
8765 GEN_INT (ud1)));
8766 }
8767 else
8768 {
8769 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
8770
8771 emit_move_insn (copy_rtx (temp),
8772 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
8773 if (ud3 != 0)
8774 emit_move_insn (copy_rtx (temp),
8775 gen_rtx_IOR (DImode, copy_rtx (temp),
8776 GEN_INT (ud3)));
8777
8778 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
8779 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
8780 GEN_INT (32)));
8781 if (ud2 != 0)
8782 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
8783 gen_rtx_IOR (DImode, copy_rtx (temp),
8784 GEN_INT (ud2 << 16)));
8785 if (ud1 != 0)
8786 emit_move_insn (dest,
8787 gen_rtx_IOR (DImode, copy_rtx (temp),
8788 GEN_INT (ud1)));
8789 }
8790 }
8791
8792 /* Helper for the following. Get rid of [r+r] memory refs
8793 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
8794
8795 static void
8796 rs6000_eliminate_indexed_memrefs (rtx operands[2])
8797 {
8798 if (reload_in_progress)
8799 return;
8800
8801 if (GET_CODE (operands[0]) == MEM
8802 && GET_CODE (XEXP (operands[0], 0)) != REG
8803 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
8804 GET_MODE (operands[0]), false))
8805 operands[0]
8806 = replace_equiv_address (operands[0],
8807 copy_addr_to_reg (XEXP (operands[0], 0)));
8808
8809 if (GET_CODE (operands[1]) == MEM
8810 && GET_CODE (XEXP (operands[1], 0)) != REG
8811 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
8812 GET_MODE (operands[1]), false))
8813 operands[1]
8814 = replace_equiv_address (operands[1],
8815 copy_addr_to_reg (XEXP (operands[1], 0)));
8816 }
8817
8818 /* Generate a vector of constants to permute MODE for a little-endian
8819 storage operation by swapping the two halves of a vector. */
8820 static rtvec
8821 rs6000_const_vec (machine_mode mode)
8822 {
8823 int i, subparts;
8824 rtvec v;
8825
8826 switch (mode)
8827 {
8828 case V1TImode:
8829 subparts = 1;
8830 break;
8831 case V2DFmode:
8832 case V2DImode:
8833 subparts = 2;
8834 break;
8835 case V4SFmode:
8836 case V4SImode:
8837 subparts = 4;
8838 break;
8839 case V8HImode:
8840 subparts = 8;
8841 break;
8842 case V16QImode:
8843 subparts = 16;
8844 break;
8845 default:
8846 gcc_unreachable();
8847 }
8848
8849 v = rtvec_alloc (subparts);
8850
8851 for (i = 0; i < subparts / 2; ++i)
8852 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
8853 for (i = subparts / 2; i < subparts; ++i)
8854 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
8855
8856 return v;
8857 }
8858
8859 /* Generate a permute rtx that represents an lxvd2x, stxvd2x, or xxpermdi
8860 for a VSX load or store operation. */
8861 rtx
8862 rs6000_gen_le_vsx_permute (rtx source, machine_mode mode)
8863 {
8864 /* Use ROTATE instead of VEC_SELECT on IEEE 128-bit floating point. */
8865 if (FLOAT128_VECTOR_P (mode))
8866 return gen_rtx_ROTATE (mode, source, GEN_INT (64));
8867 else
8868 {
8869 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
8870 return gen_rtx_VEC_SELECT (mode, source, par);
8871 }
8872 }
8873
8874 /* Emit a little-endian load from vector memory location SOURCE to VSX
8875 register DEST in mode MODE. The load is done with two permuting
8876 insn's that represent an lxvd2x and xxpermdi. */
8877 void
8878 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
8879 {
8880 rtx tmp, permute_mem, permute_reg;
8881
8882 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
8883 V1TImode). */
8884 if (mode == TImode || mode == V1TImode)
8885 {
8886 mode = V2DImode;
8887 dest = gen_lowpart (V2DImode, dest);
8888 source = adjust_address (source, V2DImode, 0);
8889 }
8890
8891 tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
8892 permute_mem = rs6000_gen_le_vsx_permute (source, mode);
8893 permute_reg = rs6000_gen_le_vsx_permute (tmp, mode);
8894 emit_insn (gen_rtx_SET (tmp, permute_mem));
8895 emit_insn (gen_rtx_SET (dest, permute_reg));
8896 }
8897
8898 /* Emit a little-endian store to vector memory location DEST from VSX
8899 register SOURCE in mode MODE. The store is done with two permuting
8900 insn's that represent an xxpermdi and an stxvd2x. */
8901 void
8902 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
8903 {
8904 rtx tmp, permute_src, permute_tmp;
8905
8906 /* This should never be called during or after reload, because it does
8907 not re-permute the source register. It is intended only for use
8908 during expand. */
8909 gcc_assert (!reload_in_progress && !lra_in_progress && !reload_completed);
8910
8911 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
8912 V1TImode). */
8913 if (mode == TImode || mode == V1TImode)
8914 {
8915 mode = V2DImode;
8916 dest = adjust_address (dest, V2DImode, 0);
8917 source = gen_lowpart (V2DImode, source);
8918 }
8919
8920 tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
8921 permute_src = rs6000_gen_le_vsx_permute (source, mode);
8922 permute_tmp = rs6000_gen_le_vsx_permute (tmp, mode);
8923 emit_insn (gen_rtx_SET (tmp, permute_src));
8924 emit_insn (gen_rtx_SET (dest, permute_tmp));
8925 }
8926
8927 /* Emit a sequence representing a little-endian VSX load or store,
8928 moving data from SOURCE to DEST in mode MODE. This is done
8929 separately from rs6000_emit_move to ensure it is called only
8930 during expand. LE VSX loads and stores introduced later are
8931 handled with a split. The expand-time RTL generation allows
8932 us to optimize away redundant pairs of register-permutes. */
8933 void
8934 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
8935 {
8936 gcc_assert (!BYTES_BIG_ENDIAN
8937 && VECTOR_MEM_VSX_P (mode)
8938 && !gpr_or_gpr_p (dest, source)
8939 && (MEM_P (source) ^ MEM_P (dest)));
8940
8941 if (MEM_P (source))
8942 {
8943 gcc_assert (REG_P (dest) || GET_CODE (dest) == SUBREG);
8944 rs6000_emit_le_vsx_load (dest, source, mode);
8945 }
8946 else
8947 {
8948 if (!REG_P (source))
8949 source = force_reg (mode, source);
8950 rs6000_emit_le_vsx_store (dest, source, mode);
8951 }
8952 }
8953
8954 /* Emit a move from SOURCE to DEST in mode MODE. */
8955 void
8956 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
8957 {
8958 rtx operands[2];
8959 operands[0] = dest;
8960 operands[1] = source;
8961
8962 if (TARGET_DEBUG_ADDR)
8963 {
8964 fprintf (stderr,
8965 "\nrs6000_emit_move: mode = %s, reload_in_progress = %d, "
8966 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
8967 GET_MODE_NAME (mode),
8968 reload_in_progress,
8969 reload_completed,
8970 can_create_pseudo_p ());
8971 debug_rtx (dest);
8972 fprintf (stderr, "source:\n");
8973 debug_rtx (source);
8974 }
8975
8976 /* Sanity checks. Check that we get CONST_DOUBLE only when we should. */
8977 if (CONST_WIDE_INT_P (operands[1])
8978 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
8979 {
8980 /* This should be fixed with the introduction of CONST_WIDE_INT. */
8981 gcc_unreachable ();
8982 }
8983
8984 /* Check if GCC is setting up a block move that will end up using FP
8985 registers as temporaries. We must make sure this is acceptable. */
8986 if (GET_CODE (operands[0]) == MEM
8987 && GET_CODE (operands[1]) == MEM
8988 && mode == DImode
8989 && (SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[0]))
8990 || SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[1])))
8991 && ! (SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[0]) > 32
8992 ? 32 : MEM_ALIGN (operands[0])))
8993 || SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[1]) > 32
8994 ? 32
8995 : MEM_ALIGN (operands[1]))))
8996 && ! MEM_VOLATILE_P (operands [0])
8997 && ! MEM_VOLATILE_P (operands [1]))
8998 {
8999 emit_move_insn (adjust_address (operands[0], SImode, 0),
9000 adjust_address (operands[1], SImode, 0));
9001 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
9002 adjust_address (copy_rtx (operands[1]), SImode, 4));
9003 return;
9004 }
9005
9006 if (can_create_pseudo_p () && GET_CODE (operands[0]) == MEM
9007 && !gpc_reg_operand (operands[1], mode))
9008 operands[1] = force_reg (mode, operands[1]);
9009
9010 /* Recognize the case where operand[1] is a reference to thread-local
9011 data and load its address to a register. */
9012 if (tls_referenced_p (operands[1]))
9013 {
9014 enum tls_model model;
9015 rtx tmp = operands[1];
9016 rtx addend = NULL;
9017
9018 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
9019 {
9020 addend = XEXP (XEXP (tmp, 0), 1);
9021 tmp = XEXP (XEXP (tmp, 0), 0);
9022 }
9023
9024 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
9025 model = SYMBOL_REF_TLS_MODEL (tmp);
9026 gcc_assert (model != 0);
9027
9028 tmp = rs6000_legitimize_tls_address (tmp, model);
9029 if (addend)
9030 {
9031 tmp = gen_rtx_PLUS (mode, tmp, addend);
9032 tmp = force_operand (tmp, operands[0]);
9033 }
9034 operands[1] = tmp;
9035 }
9036
9037 /* Handle the case where reload calls us with an invalid address. */
9038 if (reload_in_progress && mode == Pmode
9039 && (! general_operand (operands[1], mode)
9040 || ! nonimmediate_operand (operands[0], mode)))
9041 goto emit_set;
9042
9043 /* 128-bit constant floating-point values on Darwin should really be loaded
9044 as two parts. However, this premature splitting is a problem when DFmode
9045 values can go into Altivec registers. */
9046 if (FLOAT128_IBM_P (mode) && !reg_addr[DFmode].scalar_in_vmx_p
9047 && GET_CODE (operands[1]) == CONST_DOUBLE)
9048 {
9049 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
9050 simplify_gen_subreg (DFmode, operands[1], mode, 0),
9051 DFmode);
9052 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
9053 GET_MODE_SIZE (DFmode)),
9054 simplify_gen_subreg (DFmode, operands[1], mode,
9055 GET_MODE_SIZE (DFmode)),
9056 DFmode);
9057 return;
9058 }
9059
9060 if (reload_in_progress && cfun->machine->sdmode_stack_slot != NULL_RTX)
9061 cfun->machine->sdmode_stack_slot =
9062 eliminate_regs (cfun->machine->sdmode_stack_slot, VOIDmode, NULL_RTX);
9063
9064
9065 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
9066 p1:SD) if p1 is not of floating point class and p0 is spilled as
9067 we can have no analogous movsd_store for this. */
9068 if (lra_in_progress && mode == DDmode
9069 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
9070 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
9071 && GET_CODE (operands[1]) == SUBREG && REG_P (SUBREG_REG (operands[1]))
9072 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
9073 {
9074 enum reg_class cl;
9075 int regno = REGNO (SUBREG_REG (operands[1]));
9076
9077 if (regno >= FIRST_PSEUDO_REGISTER)
9078 {
9079 cl = reg_preferred_class (regno);
9080 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
9081 }
9082 if (regno >= 0 && ! FP_REGNO_P (regno))
9083 {
9084 mode = SDmode;
9085 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
9086 operands[1] = SUBREG_REG (operands[1]);
9087 }
9088 }
9089 if (lra_in_progress
9090 && mode == SDmode
9091 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
9092 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
9093 && (REG_P (operands[1])
9094 || (GET_CODE (operands[1]) == SUBREG
9095 && REG_P (SUBREG_REG (operands[1])))))
9096 {
9097 int regno = REGNO (GET_CODE (operands[1]) == SUBREG
9098 ? SUBREG_REG (operands[1]) : operands[1]);
9099 enum reg_class cl;
9100
9101 if (regno >= FIRST_PSEUDO_REGISTER)
9102 {
9103 cl = reg_preferred_class (regno);
9104 gcc_assert (cl != NO_REGS);
9105 regno = ira_class_hard_regs[cl][0];
9106 }
9107 if (FP_REGNO_P (regno))
9108 {
9109 if (GET_MODE (operands[0]) != DDmode)
9110 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
9111 emit_insn (gen_movsd_store (operands[0], operands[1]));
9112 }
9113 else if (INT_REGNO_P (regno))
9114 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
9115 else
9116 gcc_unreachable();
9117 return;
9118 }
9119 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
9120 p:DD)) if p0 is not of floating point class and p1 is spilled as
9121 we can have no analogous movsd_load for this. */
9122 if (lra_in_progress && mode == DDmode
9123 && GET_CODE (operands[0]) == SUBREG && REG_P (SUBREG_REG (operands[0]))
9124 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
9125 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
9126 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
9127 {
9128 enum reg_class cl;
9129 int regno = REGNO (SUBREG_REG (operands[0]));
9130
9131 if (regno >= FIRST_PSEUDO_REGISTER)
9132 {
9133 cl = reg_preferred_class (regno);
9134 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
9135 }
9136 if (regno >= 0 && ! FP_REGNO_P (regno))
9137 {
9138 mode = SDmode;
9139 operands[0] = SUBREG_REG (operands[0]);
9140 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
9141 }
9142 }
9143 if (lra_in_progress
9144 && mode == SDmode
9145 && (REG_P (operands[0])
9146 || (GET_CODE (operands[0]) == SUBREG
9147 && REG_P (SUBREG_REG (operands[0]))))
9148 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
9149 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
9150 {
9151 int regno = REGNO (GET_CODE (operands[0]) == SUBREG
9152 ? SUBREG_REG (operands[0]) : operands[0]);
9153 enum reg_class cl;
9154
9155 if (regno >= FIRST_PSEUDO_REGISTER)
9156 {
9157 cl = reg_preferred_class (regno);
9158 gcc_assert (cl != NO_REGS);
9159 regno = ira_class_hard_regs[cl][0];
9160 }
9161 if (FP_REGNO_P (regno))
9162 {
9163 if (GET_MODE (operands[1]) != DDmode)
9164 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
9165 emit_insn (gen_movsd_load (operands[0], operands[1]));
9166 }
9167 else if (INT_REGNO_P (regno))
9168 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
9169 else
9170 gcc_unreachable();
9171 return;
9172 }
9173
9174 if (reload_in_progress
9175 && mode == SDmode
9176 && cfun->machine->sdmode_stack_slot != NULL_RTX
9177 && MEM_P (operands[0])
9178 && rtx_equal_p (operands[0], cfun->machine->sdmode_stack_slot)
9179 && REG_P (operands[1]))
9180 {
9181 if (FP_REGNO_P (REGNO (operands[1])))
9182 {
9183 rtx mem = adjust_address_nv (operands[0], DDmode, 0);
9184 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
9185 emit_insn (gen_movsd_store (mem, operands[1]));
9186 }
9187 else if (INT_REGNO_P (REGNO (operands[1])))
9188 {
9189 rtx mem = operands[0];
9190 if (BYTES_BIG_ENDIAN)
9191 mem = adjust_address_nv (mem, mode, 4);
9192 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
9193 emit_insn (gen_movsd_hardfloat (mem, operands[1]));
9194 }
9195 else
9196 gcc_unreachable();
9197 return;
9198 }
9199 if (reload_in_progress
9200 && mode == SDmode
9201 && REG_P (operands[0])
9202 && MEM_P (operands[1])
9203 && cfun->machine->sdmode_stack_slot != NULL_RTX
9204 && rtx_equal_p (operands[1], cfun->machine->sdmode_stack_slot))
9205 {
9206 if (FP_REGNO_P (REGNO (operands[0])))
9207 {
9208 rtx mem = adjust_address_nv (operands[1], DDmode, 0);
9209 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
9210 emit_insn (gen_movsd_load (operands[0], mem));
9211 }
9212 else if (INT_REGNO_P (REGNO (operands[0])))
9213 {
9214 rtx mem = operands[1];
9215 if (BYTES_BIG_ENDIAN)
9216 mem = adjust_address_nv (mem, mode, 4);
9217 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
9218 emit_insn (gen_movsd_hardfloat (operands[0], mem));
9219 }
9220 else
9221 gcc_unreachable();
9222 return;
9223 }
9224
9225 /* FIXME: In the long term, this switch statement should go away
9226 and be replaced by a sequence of tests based on things like
9227 mode == Pmode. */
9228 switch (mode)
9229 {
9230 case HImode:
9231 case QImode:
9232 if (CONSTANT_P (operands[1])
9233 && GET_CODE (operands[1]) != CONST_INT)
9234 operands[1] = force_const_mem (mode, operands[1]);
9235 break;
9236
9237 case TFmode:
9238 case TDmode:
9239 case IFmode:
9240 case KFmode:
9241 if (FLOAT128_2REG_P (mode))
9242 rs6000_eliminate_indexed_memrefs (operands);
9243 /* fall through */
9244
9245 case DFmode:
9246 case DDmode:
9247 case SFmode:
9248 case SDmode:
9249 if (CONSTANT_P (operands[1])
9250 && ! easy_fp_constant (operands[1], mode))
9251 operands[1] = force_const_mem (mode, operands[1]);
9252 break;
9253
9254 case V16QImode:
9255 case V8HImode:
9256 case V4SFmode:
9257 case V4SImode:
9258 case V4HImode:
9259 case V2SFmode:
9260 case V2SImode:
9261 case V1DImode:
9262 case V2DFmode:
9263 case V2DImode:
9264 case V1TImode:
9265 if (CONSTANT_P (operands[1])
9266 && !easy_vector_constant (operands[1], mode))
9267 operands[1] = force_const_mem (mode, operands[1]);
9268 break;
9269
9270 case SImode:
9271 case DImode:
9272 /* Use default pattern for address of ELF small data */
9273 if (TARGET_ELF
9274 && mode == Pmode
9275 && DEFAULT_ABI == ABI_V4
9276 && (GET_CODE (operands[1]) == SYMBOL_REF
9277 || GET_CODE (operands[1]) == CONST)
9278 && small_data_operand (operands[1], mode))
9279 {
9280 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9281 return;
9282 }
9283
9284 if (DEFAULT_ABI == ABI_V4
9285 && mode == Pmode && mode == SImode
9286 && flag_pic == 1 && got_operand (operands[1], mode))
9287 {
9288 emit_insn (gen_movsi_got (operands[0], operands[1]));
9289 return;
9290 }
9291
9292 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
9293 && TARGET_NO_TOC
9294 && ! flag_pic
9295 && mode == Pmode
9296 && CONSTANT_P (operands[1])
9297 && GET_CODE (operands[1]) != HIGH
9298 && GET_CODE (operands[1]) != CONST_INT)
9299 {
9300 rtx target = (!can_create_pseudo_p ()
9301 ? operands[0]
9302 : gen_reg_rtx (mode));
9303
9304 /* If this is a function address on -mcall-aixdesc,
9305 convert it to the address of the descriptor. */
9306 if (DEFAULT_ABI == ABI_AIX
9307 && GET_CODE (operands[1]) == SYMBOL_REF
9308 && XSTR (operands[1], 0)[0] == '.')
9309 {
9310 const char *name = XSTR (operands[1], 0);
9311 rtx new_ref;
9312 while (*name == '.')
9313 name++;
9314 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
9315 CONSTANT_POOL_ADDRESS_P (new_ref)
9316 = CONSTANT_POOL_ADDRESS_P (operands[1]);
9317 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
9318 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
9319 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
9320 operands[1] = new_ref;
9321 }
9322
9323 if (DEFAULT_ABI == ABI_DARWIN)
9324 {
9325 #if TARGET_MACHO
9326 if (MACHO_DYNAMIC_NO_PIC_P)
9327 {
9328 /* Take care of any required data indirection. */
9329 operands[1] = rs6000_machopic_legitimize_pic_address (
9330 operands[1], mode, operands[0]);
9331 if (operands[0] != operands[1])
9332 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9333 return;
9334 }
9335 #endif
9336 emit_insn (gen_macho_high (target, operands[1]));
9337 emit_insn (gen_macho_low (operands[0], target, operands[1]));
9338 return;
9339 }
9340
9341 emit_insn (gen_elf_high (target, operands[1]));
9342 emit_insn (gen_elf_low (operands[0], target, operands[1]));
9343 return;
9344 }
9345
9346 /* If this is a SYMBOL_REF that refers to a constant pool entry,
9347 and we have put it in the TOC, we just need to make a TOC-relative
9348 reference to it. */
9349 if (TARGET_TOC
9350 && GET_CODE (operands[1]) == SYMBOL_REF
9351 && use_toc_relative_ref (operands[1], mode))
9352 operands[1] = create_TOC_reference (operands[1], operands[0]);
9353 else if (mode == Pmode
9354 && CONSTANT_P (operands[1])
9355 && GET_CODE (operands[1]) != HIGH
9356 && ((GET_CODE (operands[1]) != CONST_INT
9357 && ! easy_fp_constant (operands[1], mode))
9358 || (GET_CODE (operands[1]) == CONST_INT
9359 && (num_insns_constant (operands[1], mode)
9360 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
9361 || (GET_CODE (operands[0]) == REG
9362 && FP_REGNO_P (REGNO (operands[0]))))
9363 && !toc_relative_expr_p (operands[1], false)
9364 && (TARGET_CMODEL == CMODEL_SMALL
9365 || can_create_pseudo_p ()
9366 || (REG_P (operands[0])
9367 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
9368 {
9369
9370 #if TARGET_MACHO
9371 /* Darwin uses a special PIC legitimizer. */
9372 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
9373 {
9374 operands[1] =
9375 rs6000_machopic_legitimize_pic_address (operands[1], mode,
9376 operands[0]);
9377 if (operands[0] != operands[1])
9378 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9379 return;
9380 }
9381 #endif
9382
9383 /* If we are to limit the number of things we put in the TOC and
9384 this is a symbol plus a constant we can add in one insn,
9385 just put the symbol in the TOC and add the constant. Don't do
9386 this if reload is in progress. */
9387 if (GET_CODE (operands[1]) == CONST
9388 && TARGET_NO_SUM_IN_TOC && ! reload_in_progress
9389 && GET_CODE (XEXP (operands[1], 0)) == PLUS
9390 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
9391 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
9392 || GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == SYMBOL_REF)
9393 && ! side_effects_p (operands[0]))
9394 {
9395 rtx sym =
9396 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
9397 rtx other = XEXP (XEXP (operands[1], 0), 1);
9398
9399 sym = force_reg (mode, sym);
9400 emit_insn (gen_add3_insn (operands[0], sym, other));
9401 return;
9402 }
9403
9404 operands[1] = force_const_mem (mode, operands[1]);
9405
9406 if (TARGET_TOC
9407 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
9408 && constant_pool_expr_p (XEXP (operands[1], 0))
9409 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (
9410 get_pool_constant (XEXP (operands[1], 0)),
9411 get_pool_mode (XEXP (operands[1], 0))))
9412 {
9413 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
9414 operands[0]);
9415 operands[1] = gen_const_mem (mode, tocref);
9416 set_mem_alias_set (operands[1], get_TOC_alias_set ());
9417 }
9418 }
9419 break;
9420
9421 case TImode:
9422 if (!VECTOR_MEM_VSX_P (TImode))
9423 rs6000_eliminate_indexed_memrefs (operands);
9424 break;
9425
9426 case PTImode:
9427 rs6000_eliminate_indexed_memrefs (operands);
9428 break;
9429
9430 default:
9431 fatal_insn ("bad move", gen_rtx_SET (dest, source));
9432 }
9433
9434 /* Above, we may have called force_const_mem which may have returned
9435 an invalid address. If we can, fix this up; otherwise, reload will
9436 have to deal with it. */
9437 if (GET_CODE (operands[1]) == MEM && ! reload_in_progress)
9438 operands[1] = validize_mem (operands[1]);
9439
9440 emit_set:
9441 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9442 }
9443
9444 /* Return true if a structure, union or array containing FIELD should be
9445 accessed using `BLKMODE'.
9446
9447 For the SPE, simd types are V2SI, and gcc can be tempted to put the
9448 entire thing in a DI and use subregs to access the internals.
9449 store_bit_field() will force (subreg:DI (reg:V2SI x))'s to the
9450 back-end. Because a single GPR can hold a V2SI, but not a DI, the
9451 best thing to do is set structs to BLKmode and avoid Severe Tire
9452 Damage.
9453
9454 On e500 v2, DF and DI modes suffer from the same anomaly. DF can
9455 fit into 1, whereas DI still needs two. */
9456
9457 static bool
9458 rs6000_member_type_forces_blk (const_tree field, machine_mode mode)
9459 {
9460 return ((TARGET_SPE && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
9461 || (TARGET_E500_DOUBLE && mode == DFmode));
9462 }
9463 \f
9464 /* Nonzero if we can use a floating-point register to pass this arg. */
9465 #define USE_FP_FOR_ARG_P(CUM,MODE) \
9466 (SCALAR_FLOAT_MODE_NOT_VECTOR_P (MODE) \
9467 && (CUM)->fregno <= FP_ARG_MAX_REG \
9468 && TARGET_HARD_FLOAT && TARGET_FPRS)
9469
9470 /* Nonzero if we can use an AltiVec register to pass this arg. */
9471 #define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,NAMED) \
9472 (ALTIVEC_OR_VSX_VECTOR_MODE (MODE) \
9473 && (CUM)->vregno <= ALTIVEC_ARG_MAX_REG \
9474 && TARGET_ALTIVEC_ABI \
9475 && (NAMED))
9476
9477 /* Walk down the type tree of TYPE counting consecutive base elements.
9478 If *MODEP is VOIDmode, then set it to the first valid floating point
9479 or vector type. If a non-floating point or vector type is found, or
9480 if a floating point or vector type that doesn't match a non-VOIDmode
9481 *MODEP is found, then return -1, otherwise return the count in the
9482 sub-tree. */
9483
9484 static int
9485 rs6000_aggregate_candidate (const_tree type, machine_mode *modep)
9486 {
9487 machine_mode mode;
9488 HOST_WIDE_INT size;
9489
9490 switch (TREE_CODE (type))
9491 {
9492 case REAL_TYPE:
9493 mode = TYPE_MODE (type);
9494 if (!SCALAR_FLOAT_MODE_P (mode))
9495 return -1;
9496
9497 if (*modep == VOIDmode)
9498 *modep = mode;
9499
9500 if (*modep == mode)
9501 return 1;
9502
9503 break;
9504
9505 case COMPLEX_TYPE:
9506 mode = TYPE_MODE (TREE_TYPE (type));
9507 if (!SCALAR_FLOAT_MODE_P (mode))
9508 return -1;
9509
9510 if (*modep == VOIDmode)
9511 *modep = mode;
9512
9513 if (*modep == mode)
9514 return 2;
9515
9516 break;
9517
9518 case VECTOR_TYPE:
9519 if (!TARGET_ALTIVEC_ABI || !TARGET_ALTIVEC)
9520 return -1;
9521
9522 /* Use V4SImode as representative of all 128-bit vector types. */
9523 size = int_size_in_bytes (type);
9524 switch (size)
9525 {
9526 case 16:
9527 mode = V4SImode;
9528 break;
9529 default:
9530 return -1;
9531 }
9532
9533 if (*modep == VOIDmode)
9534 *modep = mode;
9535
9536 /* Vector modes are considered to be opaque: two vectors are
9537 equivalent for the purposes of being homogeneous aggregates
9538 if they are the same size. */
9539 if (*modep == mode)
9540 return 1;
9541
9542 break;
9543
9544 case ARRAY_TYPE:
9545 {
9546 int count;
9547 tree index = TYPE_DOMAIN (type);
9548
9549 /* Can't handle incomplete types nor sizes that are not
9550 fixed. */
9551 if (!COMPLETE_TYPE_P (type)
9552 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
9553 return -1;
9554
9555 count = rs6000_aggregate_candidate (TREE_TYPE (type), modep);
9556 if (count == -1
9557 || !index
9558 || !TYPE_MAX_VALUE (index)
9559 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
9560 || !TYPE_MIN_VALUE (index)
9561 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
9562 || count < 0)
9563 return -1;
9564
9565 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
9566 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
9567
9568 /* There must be no padding. */
9569 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
9570 return -1;
9571
9572 return count;
9573 }
9574
9575 case RECORD_TYPE:
9576 {
9577 int count = 0;
9578 int sub_count;
9579 tree field;
9580
9581 /* Can't handle incomplete types nor sizes that are not
9582 fixed. */
9583 if (!COMPLETE_TYPE_P (type)
9584 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
9585 return -1;
9586
9587 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
9588 {
9589 if (TREE_CODE (field) != FIELD_DECL)
9590 continue;
9591
9592 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
9593 if (sub_count < 0)
9594 return -1;
9595 count += sub_count;
9596 }
9597
9598 /* There must be no padding. */
9599 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
9600 return -1;
9601
9602 return count;
9603 }
9604
9605 case UNION_TYPE:
9606 case QUAL_UNION_TYPE:
9607 {
9608 /* These aren't very interesting except in a degenerate case. */
9609 int count = 0;
9610 int sub_count;
9611 tree field;
9612
9613 /* Can't handle incomplete types nor sizes that are not
9614 fixed. */
9615 if (!COMPLETE_TYPE_P (type)
9616 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
9617 return -1;
9618
9619 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
9620 {
9621 if (TREE_CODE (field) != FIELD_DECL)
9622 continue;
9623
9624 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
9625 if (sub_count < 0)
9626 return -1;
9627 count = count > sub_count ? count : sub_count;
9628 }
9629
9630 /* There must be no padding. */
9631 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
9632 return -1;
9633
9634 return count;
9635 }
9636
9637 default:
9638 break;
9639 }
9640
9641 return -1;
9642 }
9643
9644 /* If an argument, whose type is described by TYPE and MODE, is a homogeneous
9645 float or vector aggregate that shall be passed in FP/vector registers
9646 according to the ELFv2 ABI, return the homogeneous element mode in
9647 *ELT_MODE and the number of elements in *N_ELTS, and return TRUE.
9648
9649 Otherwise, set *ELT_MODE to MODE and *N_ELTS to 1, and return FALSE. */
9650
9651 static bool
9652 rs6000_discover_homogeneous_aggregate (machine_mode mode, const_tree type,
9653 machine_mode *elt_mode,
9654 int *n_elts)
9655 {
9656 /* Note that we do not accept complex types at the top level as
9657 homogeneous aggregates; these types are handled via the
9658 targetm.calls.split_complex_arg mechanism. Complex types
9659 can be elements of homogeneous aggregates, however. */
9660 if (DEFAULT_ABI == ABI_ELFv2 && type && AGGREGATE_TYPE_P (type))
9661 {
9662 machine_mode field_mode = VOIDmode;
9663 int field_count = rs6000_aggregate_candidate (type, &field_mode);
9664
9665 if (field_count > 0)
9666 {
9667 int n_regs = (SCALAR_FLOAT_MODE_P (field_mode) ?
9668 (GET_MODE_SIZE (field_mode) + 7) >> 3 : 1);
9669
9670 /* The ELFv2 ABI allows homogeneous aggregates to occupy
9671 up to AGGR_ARG_NUM_REG registers. */
9672 if (field_count * n_regs <= AGGR_ARG_NUM_REG)
9673 {
9674 if (elt_mode)
9675 *elt_mode = field_mode;
9676 if (n_elts)
9677 *n_elts = field_count;
9678 return true;
9679 }
9680 }
9681 }
9682
9683 if (elt_mode)
9684 *elt_mode = mode;
9685 if (n_elts)
9686 *n_elts = 1;
9687 return false;
9688 }
9689
9690 /* Return a nonzero value to say to return the function value in
9691 memory, just as large structures are always returned. TYPE will be
9692 the data type of the value, and FNTYPE will be the type of the
9693 function doing the returning, or @code{NULL} for libcalls.
9694
9695 The AIX ABI for the RS/6000 specifies that all structures are
9696 returned in memory. The Darwin ABI does the same.
9697
9698 For the Darwin 64 Bit ABI, a function result can be returned in
9699 registers or in memory, depending on the size of the return data
9700 type. If it is returned in registers, the value occupies the same
9701 registers as it would if it were the first and only function
9702 argument. Otherwise, the function places its result in memory at
9703 the location pointed to by GPR3.
9704
9705 The SVR4 ABI specifies that structures <= 8 bytes are returned in r3/r4,
9706 but a draft put them in memory, and GCC used to implement the draft
9707 instead of the final standard. Therefore, aix_struct_return
9708 controls this instead of DEFAULT_ABI; V.4 targets needing backward
9709 compatibility can change DRAFT_V4_STRUCT_RET to override the
9710 default, and -m switches get the final word. See
9711 rs6000_option_override_internal for more details.
9712
9713 The PPC32 SVR4 ABI uses IEEE double extended for long double, if 128-bit
9714 long double support is enabled. These values are returned in memory.
9715
9716 int_size_in_bytes returns -1 for variable size objects, which go in
9717 memory always. The cast to unsigned makes -1 > 8. */
9718
9719 static bool
9720 rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9721 {
9722 /* For the Darwin64 ABI, test if we can fit the return value in regs. */
9723 if (TARGET_MACHO
9724 && rs6000_darwin64_abi
9725 && TREE_CODE (type) == RECORD_TYPE
9726 && int_size_in_bytes (type) > 0)
9727 {
9728 CUMULATIVE_ARGS valcum;
9729 rtx valret;
9730
9731 valcum.words = 0;
9732 valcum.fregno = FP_ARG_MIN_REG;
9733 valcum.vregno = ALTIVEC_ARG_MIN_REG;
9734 /* Do a trial code generation as if this were going to be passed
9735 as an argument; if any part goes in memory, we return NULL. */
9736 valret = rs6000_darwin64_record_arg (&valcum, type, true, true);
9737 if (valret)
9738 return false;
9739 /* Otherwise fall through to more conventional ABI rules. */
9740 }
9741
9742 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers */
9743 if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (type), type,
9744 NULL, NULL))
9745 return false;
9746
9747 /* The ELFv2 ABI returns aggregates up to 16B in registers */
9748 if (DEFAULT_ABI == ABI_ELFv2 && AGGREGATE_TYPE_P (type)
9749 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) <= 16)
9750 return false;
9751
9752 if (AGGREGATE_TYPE_P (type)
9753 && (aix_struct_return
9754 || (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8))
9755 return true;
9756
9757 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
9758 modes only exist for GCC vector types if -maltivec. */
9759 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI
9760 && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
9761 return false;
9762
9763 /* Return synthetic vectors in memory. */
9764 if (TREE_CODE (type) == VECTOR_TYPE
9765 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
9766 {
9767 static bool warned_for_return_big_vectors = false;
9768 if (!warned_for_return_big_vectors)
9769 {
9770 warning (0, "GCC vector returned by reference: "
9771 "non-standard ABI extension with no compatibility guarantee");
9772 warned_for_return_big_vectors = true;
9773 }
9774 return true;
9775 }
9776
9777 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
9778 && FLOAT128_IEEE_P (TYPE_MODE (type)))
9779 return true;
9780
9781 return false;
9782 }
9783
9784 /* Specify whether values returned in registers should be at the most
9785 significant end of a register. We want aggregates returned by
9786 value to match the way aggregates are passed to functions. */
9787
9788 static bool
9789 rs6000_return_in_msb (const_tree valtype)
9790 {
9791 return (DEFAULT_ABI == ABI_ELFv2
9792 && BYTES_BIG_ENDIAN
9793 && AGGREGATE_TYPE_P (valtype)
9794 && FUNCTION_ARG_PADDING (TYPE_MODE (valtype), valtype) == upward);
9795 }
9796
9797 #ifdef HAVE_AS_GNU_ATTRIBUTE
9798 /* Return TRUE if a call to function FNDECL may be one that
9799 potentially affects the function calling ABI of the object file. */
9800
9801 static bool
9802 call_ABI_of_interest (tree fndecl)
9803 {
9804 if (symtab->state == EXPANSION)
9805 {
9806 struct cgraph_node *c_node;
9807
9808 /* Libcalls are always interesting. */
9809 if (fndecl == NULL_TREE)
9810 return true;
9811
9812 /* Any call to an external function is interesting. */
9813 if (DECL_EXTERNAL (fndecl))
9814 return true;
9815
9816 /* Interesting functions that we are emitting in this object file. */
9817 c_node = cgraph_node::get (fndecl);
9818 c_node = c_node->ultimate_alias_target ();
9819 return !c_node->only_called_directly_p ();
9820 }
9821 return false;
9822 }
9823 #endif
9824
9825 /* Initialize a variable CUM of type CUMULATIVE_ARGS
9826 for a call to a function whose data type is FNTYPE.
9827 For a library call, FNTYPE is 0 and RETURN_MODE the return value mode.
9828
9829 For incoming args we set the number of arguments in the prototype large
9830 so we never return a PARALLEL. */
9831
9832 void
9833 init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype,
9834 rtx libname ATTRIBUTE_UNUSED, int incoming,
9835 int libcall, int n_named_args,
9836 tree fndecl ATTRIBUTE_UNUSED,
9837 machine_mode return_mode ATTRIBUTE_UNUSED)
9838 {
9839 static CUMULATIVE_ARGS zero_cumulative;
9840
9841 *cum = zero_cumulative;
9842 cum->words = 0;
9843 cum->fregno = FP_ARG_MIN_REG;
9844 cum->vregno = ALTIVEC_ARG_MIN_REG;
9845 cum->prototype = (fntype && prototype_p (fntype));
9846 cum->call_cookie = ((DEFAULT_ABI == ABI_V4 && libcall)
9847 ? CALL_LIBCALL : CALL_NORMAL);
9848 cum->sysv_gregno = GP_ARG_MIN_REG;
9849 cum->stdarg = stdarg_p (fntype);
9850 cum->libcall = libcall;
9851
9852 cum->nargs_prototype = 0;
9853 if (incoming || cum->prototype)
9854 cum->nargs_prototype = n_named_args;
9855
9856 /* Check for a longcall attribute. */
9857 if ((!fntype && rs6000_default_long_calls)
9858 || (fntype
9859 && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype))
9860 && !lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype))))
9861 cum->call_cookie |= CALL_LONG;
9862
9863 if (TARGET_DEBUG_ARG)
9864 {
9865 fprintf (stderr, "\ninit_cumulative_args:");
9866 if (fntype)
9867 {
9868 tree ret_type = TREE_TYPE (fntype);
9869 fprintf (stderr, " ret code = %s,",
9870 get_tree_code_name (TREE_CODE (ret_type)));
9871 }
9872
9873 if (cum->call_cookie & CALL_LONG)
9874 fprintf (stderr, " longcall,");
9875
9876 fprintf (stderr, " proto = %d, nargs = %d\n",
9877 cum->prototype, cum->nargs_prototype);
9878 }
9879
9880 #ifdef HAVE_AS_GNU_ATTRIBUTE
9881 if (DEFAULT_ABI == ABI_V4)
9882 {
9883 cum->escapes = call_ABI_of_interest (fndecl);
9884 if (cum->escapes)
9885 {
9886 tree return_type;
9887
9888 if (fntype)
9889 {
9890 return_type = TREE_TYPE (fntype);
9891 return_mode = TYPE_MODE (return_type);
9892 }
9893 else
9894 return_type = lang_hooks.types.type_for_mode (return_mode, 0);
9895
9896 if (return_type != NULL)
9897 {
9898 if (TREE_CODE (return_type) == RECORD_TYPE
9899 && TYPE_TRANSPARENT_AGGR (return_type))
9900 {
9901 return_type = TREE_TYPE (first_field (return_type));
9902 return_mode = TYPE_MODE (return_type);
9903 }
9904 if (AGGREGATE_TYPE_P (return_type)
9905 && ((unsigned HOST_WIDE_INT) int_size_in_bytes (return_type)
9906 <= 8))
9907 rs6000_returns_struct = true;
9908 }
9909 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (return_mode))
9910 rs6000_passes_float = true;
9911 else if (ALTIVEC_OR_VSX_VECTOR_MODE (return_mode)
9912 || SPE_VECTOR_MODE (return_mode))
9913 rs6000_passes_vector = true;
9914 }
9915 }
9916 #endif
9917
9918 if (fntype
9919 && !TARGET_ALTIVEC
9920 && TARGET_ALTIVEC_ABI
9921 && ALTIVEC_VECTOR_MODE (TYPE_MODE (TREE_TYPE (fntype))))
9922 {
9923 error ("cannot return value in vector register because"
9924 " altivec instructions are disabled, use -maltivec"
9925 " to enable them");
9926 }
9927 }
9928 \f
9929 /* The mode the ABI uses for a word. This is not the same as word_mode
9930 for -m32 -mpowerpc64. This is used to implement various target hooks. */
9931
9932 static machine_mode
9933 rs6000_abi_word_mode (void)
9934 {
9935 return TARGET_32BIT ? SImode : DImode;
9936 }
9937
9938 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
9939 static char *
9940 rs6000_offload_options (void)
9941 {
9942 if (TARGET_64BIT)
9943 return xstrdup ("-foffload-abi=lp64");
9944 else
9945 return xstrdup ("-foffload-abi=ilp32");
9946 }
9947
9948 /* On rs6000, function arguments are promoted, as are function return
9949 values. */
9950
9951 static machine_mode
9952 rs6000_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
9953 machine_mode mode,
9954 int *punsignedp ATTRIBUTE_UNUSED,
9955 const_tree, int)
9956 {
9957 PROMOTE_MODE (mode, *punsignedp, type);
9958
9959 return mode;
9960 }
9961
9962 /* Return true if TYPE must be passed on the stack and not in registers. */
9963
9964 static bool
9965 rs6000_must_pass_in_stack (machine_mode mode, const_tree type)
9966 {
9967 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2 || TARGET_64BIT)
9968 return must_pass_in_stack_var_size (mode, type);
9969 else
9970 return must_pass_in_stack_var_size_or_pad (mode, type);
9971 }
9972
9973 /* If defined, a C expression which determines whether, and in which
9974 direction, to pad out an argument with extra space. The value
9975 should be of type `enum direction': either `upward' to pad above
9976 the argument, `downward' to pad below, or `none' to inhibit
9977 padding.
9978
9979 For the AIX ABI structs are always stored left shifted in their
9980 argument slot. */
9981
9982 enum direction
9983 function_arg_padding (machine_mode mode, const_tree type)
9984 {
9985 #ifndef AGGREGATE_PADDING_FIXED
9986 #define AGGREGATE_PADDING_FIXED 0
9987 #endif
9988 #ifndef AGGREGATES_PAD_UPWARD_ALWAYS
9989 #define AGGREGATES_PAD_UPWARD_ALWAYS 0
9990 #endif
9991
9992 if (!AGGREGATE_PADDING_FIXED)
9993 {
9994 /* GCC used to pass structures of the same size as integer types as
9995 if they were in fact integers, ignoring FUNCTION_ARG_PADDING.
9996 i.e. Structures of size 1 or 2 (or 4 when TARGET_64BIT) were
9997 passed padded downward, except that -mstrict-align further
9998 muddied the water in that multi-component structures of 2 and 4
9999 bytes in size were passed padded upward.
10000
10001 The following arranges for best compatibility with previous
10002 versions of gcc, but removes the -mstrict-align dependency. */
10003 if (BYTES_BIG_ENDIAN)
10004 {
10005 HOST_WIDE_INT size = 0;
10006
10007 if (mode == BLKmode)
10008 {
10009 if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
10010 size = int_size_in_bytes (type);
10011 }
10012 else
10013 size = GET_MODE_SIZE (mode);
10014
10015 if (size == 1 || size == 2 || size == 4)
10016 return downward;
10017 }
10018 return upward;
10019 }
10020
10021 if (AGGREGATES_PAD_UPWARD_ALWAYS)
10022 {
10023 if (type != 0 && AGGREGATE_TYPE_P (type))
10024 return upward;
10025 }
10026
10027 /* Fall back to the default. */
10028 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
10029 }
10030
10031 /* If defined, a C expression that gives the alignment boundary, in bits,
10032 of an argument with the specified mode and type. If it is not defined,
10033 PARM_BOUNDARY is used for all arguments.
10034
10035 V.4 wants long longs and doubles to be double word aligned. Just
10036 testing the mode size is a boneheaded way to do this as it means
10037 that other types such as complex int are also double word aligned.
10038 However, we're stuck with this because changing the ABI might break
10039 existing library interfaces.
10040
10041 Doubleword align SPE vectors.
10042 Quadword align Altivec/VSX vectors.
10043 Quadword align large synthetic vector types. */
10044
10045 static unsigned int
10046 rs6000_function_arg_boundary (machine_mode mode, const_tree type)
10047 {
10048 machine_mode elt_mode;
10049 int n_elts;
10050
10051 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
10052
10053 if (DEFAULT_ABI == ABI_V4
10054 && (GET_MODE_SIZE (mode) == 8
10055 || (TARGET_HARD_FLOAT
10056 && TARGET_FPRS
10057 && FLOAT128_2REG_P (mode))))
10058 return 64;
10059 else if (FLOAT128_VECTOR_P (mode))
10060 return 128;
10061 else if (SPE_VECTOR_MODE (mode)
10062 || (type && TREE_CODE (type) == VECTOR_TYPE
10063 && int_size_in_bytes (type) >= 8
10064 && int_size_in_bytes (type) < 16))
10065 return 64;
10066 else if (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
10067 || (type && TREE_CODE (type) == VECTOR_TYPE
10068 && int_size_in_bytes (type) >= 16))
10069 return 128;
10070
10071 /* Aggregate types that need > 8 byte alignment are quadword-aligned
10072 in the parameter area in the ELFv2 ABI, and in the AIX ABI unless
10073 -mcompat-align-parm is used. */
10074 if (((DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm)
10075 || DEFAULT_ABI == ABI_ELFv2)
10076 && type && TYPE_ALIGN (type) > 64)
10077 {
10078 /* "Aggregate" means any AGGREGATE_TYPE except for single-element
10079 or homogeneous float/vector aggregates here. We already handled
10080 vector aggregates above, but still need to check for float here. */
10081 bool aggregate_p = (AGGREGATE_TYPE_P (type)
10082 && !SCALAR_FLOAT_MODE_P (elt_mode));
10083
10084 /* We used to check for BLKmode instead of the above aggregate type
10085 check. Warn when this results in any difference to the ABI. */
10086 if (aggregate_p != (mode == BLKmode))
10087 {
10088 static bool warned;
10089 if (!warned && warn_psabi)
10090 {
10091 warned = true;
10092 inform (input_location,
10093 "the ABI of passing aggregates with %d-byte alignment"
10094 " has changed in GCC 5",
10095 (int) TYPE_ALIGN (type) / BITS_PER_UNIT);
10096 }
10097 }
10098
10099 if (aggregate_p)
10100 return 128;
10101 }
10102
10103 /* Similar for the Darwin64 ABI. Note that for historical reasons we
10104 implement the "aggregate type" check as a BLKmode check here; this
10105 means certain aggregate types are in fact not aligned. */
10106 if (TARGET_MACHO && rs6000_darwin64_abi
10107 && mode == BLKmode
10108 && type && TYPE_ALIGN (type) > 64)
10109 return 128;
10110
10111 return PARM_BOUNDARY;
10112 }
10113
10114 /* The offset in words to the start of the parameter save area. */
10115
10116 static unsigned int
10117 rs6000_parm_offset (void)
10118 {
10119 return (DEFAULT_ABI == ABI_V4 ? 2
10120 : DEFAULT_ABI == ABI_ELFv2 ? 4
10121 : 6);
10122 }
10123
10124 /* For a function parm of MODE and TYPE, return the starting word in
10125 the parameter area. NWORDS of the parameter area are already used. */
10126
10127 static unsigned int
10128 rs6000_parm_start (machine_mode mode, const_tree type,
10129 unsigned int nwords)
10130 {
10131 unsigned int align;
10132
10133 align = rs6000_function_arg_boundary (mode, type) / PARM_BOUNDARY - 1;
10134 return nwords + (-(rs6000_parm_offset () + nwords) & align);
10135 }
10136
10137 /* Compute the size (in words) of a function argument. */
10138
10139 static unsigned long
10140 rs6000_arg_size (machine_mode mode, const_tree type)
10141 {
10142 unsigned long size;
10143
10144 if (mode != BLKmode)
10145 size = GET_MODE_SIZE (mode);
10146 else
10147 size = int_size_in_bytes (type);
10148
10149 if (TARGET_32BIT)
10150 return (size + 3) >> 2;
10151 else
10152 return (size + 7) >> 3;
10153 }
10154 \f
10155 /* Use this to flush pending int fields. */
10156
10157 static void
10158 rs6000_darwin64_record_arg_advance_flush (CUMULATIVE_ARGS *cum,
10159 HOST_WIDE_INT bitpos, int final)
10160 {
10161 unsigned int startbit, endbit;
10162 int intregs, intoffset;
10163 machine_mode mode;
10164
10165 /* Handle the situations where a float is taking up the first half
10166 of the GPR, and the other half is empty (typically due to
10167 alignment restrictions). We can detect this by a 8-byte-aligned
10168 int field, or by seeing that this is the final flush for this
10169 argument. Count the word and continue on. */
10170 if (cum->floats_in_gpr == 1
10171 && (cum->intoffset % 64 == 0
10172 || (cum->intoffset == -1 && final)))
10173 {
10174 cum->words++;
10175 cum->floats_in_gpr = 0;
10176 }
10177
10178 if (cum->intoffset == -1)
10179 return;
10180
10181 intoffset = cum->intoffset;
10182 cum->intoffset = -1;
10183 cum->floats_in_gpr = 0;
10184
10185 if (intoffset % BITS_PER_WORD != 0)
10186 {
10187 mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
10188 MODE_INT, 0);
10189 if (mode == BLKmode)
10190 {
10191 /* We couldn't find an appropriate mode, which happens,
10192 e.g., in packed structs when there are 3 bytes to load.
10193 Back intoffset back to the beginning of the word in this
10194 case. */
10195 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
10196 }
10197 }
10198
10199 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
10200 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
10201 intregs = (endbit - startbit) / BITS_PER_WORD;
10202 cum->words += intregs;
10203 /* words should be unsigned. */
10204 if ((unsigned)cum->words < (endbit/BITS_PER_WORD))
10205 {
10206 int pad = (endbit/BITS_PER_WORD) - cum->words;
10207 cum->words += pad;
10208 }
10209 }
10210
10211 /* The darwin64 ABI calls for us to recurse down through structs,
10212 looking for elements passed in registers. Unfortunately, we have
10213 to track int register count here also because of misalignments
10214 in powerpc alignment mode. */
10215
10216 static void
10217 rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS *cum,
10218 const_tree type,
10219 HOST_WIDE_INT startbitpos)
10220 {
10221 tree f;
10222
10223 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
10224 if (TREE_CODE (f) == FIELD_DECL)
10225 {
10226 HOST_WIDE_INT bitpos = startbitpos;
10227 tree ftype = TREE_TYPE (f);
10228 machine_mode mode;
10229 if (ftype == error_mark_node)
10230 continue;
10231 mode = TYPE_MODE (ftype);
10232
10233 if (DECL_SIZE (f) != 0
10234 && tree_fits_uhwi_p (bit_position (f)))
10235 bitpos += int_bit_position (f);
10236
10237 /* ??? FIXME: else assume zero offset. */
10238
10239 if (TREE_CODE (ftype) == RECORD_TYPE)
10240 rs6000_darwin64_record_arg_advance_recurse (cum, ftype, bitpos);
10241 else if (USE_FP_FOR_ARG_P (cum, mode))
10242 {
10243 unsigned n_fpregs = (GET_MODE_SIZE (mode) + 7) >> 3;
10244 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
10245 cum->fregno += n_fpregs;
10246 /* Single-precision floats present a special problem for
10247 us, because they are smaller than an 8-byte GPR, and so
10248 the structure-packing rules combined with the standard
10249 varargs behavior mean that we want to pack float/float
10250 and float/int combinations into a single register's
10251 space. This is complicated by the arg advance flushing,
10252 which works on arbitrarily large groups of int-type
10253 fields. */
10254 if (mode == SFmode)
10255 {
10256 if (cum->floats_in_gpr == 1)
10257 {
10258 /* Two floats in a word; count the word and reset
10259 the float count. */
10260 cum->words++;
10261 cum->floats_in_gpr = 0;
10262 }
10263 else if (bitpos % 64 == 0)
10264 {
10265 /* A float at the beginning of an 8-byte word;
10266 count it and put off adjusting cum->words until
10267 we see if a arg advance flush is going to do it
10268 for us. */
10269 cum->floats_in_gpr++;
10270 }
10271 else
10272 {
10273 /* The float is at the end of a word, preceded
10274 by integer fields, so the arg advance flush
10275 just above has already set cum->words and
10276 everything is taken care of. */
10277 }
10278 }
10279 else
10280 cum->words += n_fpregs;
10281 }
10282 else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
10283 {
10284 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
10285 cum->vregno++;
10286 cum->words += 2;
10287 }
10288 else if (cum->intoffset == -1)
10289 cum->intoffset = bitpos;
10290 }
10291 }
10292
10293 /* Check for an item that needs to be considered specially under the darwin 64
10294 bit ABI. These are record types where the mode is BLK or the structure is
10295 8 bytes in size. */
10296 static int
10297 rs6000_darwin64_struct_check_p (machine_mode mode, const_tree type)
10298 {
10299 return rs6000_darwin64_abi
10300 && ((mode == BLKmode
10301 && TREE_CODE (type) == RECORD_TYPE
10302 && int_size_in_bytes (type) > 0)
10303 || (type && TREE_CODE (type) == RECORD_TYPE
10304 && int_size_in_bytes (type) == 8)) ? 1 : 0;
10305 }
10306
10307 /* Update the data in CUM to advance over an argument
10308 of mode MODE and data type TYPE.
10309 (TYPE is null for libcalls where that information may not be available.)
10310
10311 Note that for args passed by reference, function_arg will be called
10312 with MODE and TYPE set to that of the pointer to the arg, not the arg
10313 itself. */
10314
10315 static void
10316 rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, machine_mode mode,
10317 const_tree type, bool named, int depth)
10318 {
10319 machine_mode elt_mode;
10320 int n_elts;
10321
10322 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
10323
10324 /* Only tick off an argument if we're not recursing. */
10325 if (depth == 0)
10326 cum->nargs_prototype--;
10327
10328 #ifdef HAVE_AS_GNU_ATTRIBUTE
10329 if (DEFAULT_ABI == ABI_V4
10330 && cum->escapes)
10331 {
10332 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode))
10333 rs6000_passes_float = true;
10334 else if (named && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
10335 rs6000_passes_vector = true;
10336 else if (SPE_VECTOR_MODE (mode)
10337 && !cum->stdarg
10338 && cum->sysv_gregno <= GP_ARG_MAX_REG)
10339 rs6000_passes_vector = true;
10340 }
10341 #endif
10342
10343 if (TARGET_ALTIVEC_ABI
10344 && (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
10345 || (type && TREE_CODE (type) == VECTOR_TYPE
10346 && int_size_in_bytes (type) == 16)))
10347 {
10348 bool stack = false;
10349
10350 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
10351 {
10352 cum->vregno += n_elts;
10353
10354 if (!TARGET_ALTIVEC)
10355 error ("cannot pass argument in vector register because"
10356 " altivec instructions are disabled, use -maltivec"
10357 " to enable them");
10358
10359 /* PowerPC64 Linux and AIX allocate GPRs for a vector argument
10360 even if it is going to be passed in a vector register.
10361 Darwin does the same for variable-argument functions. */
10362 if (((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
10363 && TARGET_64BIT)
10364 || (cum->stdarg && DEFAULT_ABI != ABI_V4))
10365 stack = true;
10366 }
10367 else
10368 stack = true;
10369
10370 if (stack)
10371 {
10372 int align;
10373
10374 /* Vector parameters must be 16-byte aligned. In 32-bit
10375 mode this means we need to take into account the offset
10376 to the parameter save area. In 64-bit mode, they just
10377 have to start on an even word, since the parameter save
10378 area is 16-byte aligned. */
10379 if (TARGET_32BIT)
10380 align = -(rs6000_parm_offset () + cum->words) & 3;
10381 else
10382 align = cum->words & 1;
10383 cum->words += align + rs6000_arg_size (mode, type);
10384
10385 if (TARGET_DEBUG_ARG)
10386 {
10387 fprintf (stderr, "function_adv: words = %2d, align=%d, ",
10388 cum->words, align);
10389 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s\n",
10390 cum->nargs_prototype, cum->prototype,
10391 GET_MODE_NAME (mode));
10392 }
10393 }
10394 }
10395 else if (TARGET_SPE_ABI && TARGET_SPE && SPE_VECTOR_MODE (mode)
10396 && !cum->stdarg
10397 && cum->sysv_gregno <= GP_ARG_MAX_REG)
10398 cum->sysv_gregno++;
10399
10400 else if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
10401 {
10402 int size = int_size_in_bytes (type);
10403 /* Variable sized types have size == -1 and are
10404 treated as if consisting entirely of ints.
10405 Pad to 16 byte boundary if needed. */
10406 if (TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
10407 && (cum->words % 2) != 0)
10408 cum->words++;
10409 /* For varargs, we can just go up by the size of the struct. */
10410 if (!named)
10411 cum->words += (size + 7) / 8;
10412 else
10413 {
10414 /* It is tempting to say int register count just goes up by
10415 sizeof(type)/8, but this is wrong in a case such as
10416 { int; double; int; } [powerpc alignment]. We have to
10417 grovel through the fields for these too. */
10418 cum->intoffset = 0;
10419 cum->floats_in_gpr = 0;
10420 rs6000_darwin64_record_arg_advance_recurse (cum, type, 0);
10421 rs6000_darwin64_record_arg_advance_flush (cum,
10422 size * BITS_PER_UNIT, 1);
10423 }
10424 if (TARGET_DEBUG_ARG)
10425 {
10426 fprintf (stderr, "function_adv: words = %2d, align=%d, size=%d",
10427 cum->words, TYPE_ALIGN (type), size);
10428 fprintf (stderr,
10429 "nargs = %4d, proto = %d, mode = %4s (darwin64 abi)\n",
10430 cum->nargs_prototype, cum->prototype,
10431 GET_MODE_NAME (mode));
10432 }
10433 }
10434 else if (DEFAULT_ABI == ABI_V4)
10435 {
10436 if (TARGET_HARD_FLOAT && TARGET_FPRS
10437 && ((TARGET_SINGLE_FLOAT && mode == SFmode)
10438 || (TARGET_DOUBLE_FLOAT && mode == DFmode)
10439 || FLOAT128_2REG_P (mode)
10440 || DECIMAL_FLOAT_MODE_P (mode)))
10441 {
10442 /* _Decimal128 must use an even/odd register pair. This assumes
10443 that the register number is odd when fregno is odd. */
10444 if (mode == TDmode && (cum->fregno % 2) == 1)
10445 cum->fregno++;
10446
10447 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
10448 <= FP_ARG_V4_MAX_REG)
10449 cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3;
10450 else
10451 {
10452 cum->fregno = FP_ARG_V4_MAX_REG + 1;
10453 if (mode == DFmode || FLOAT128_IBM_P (mode)
10454 || mode == DDmode || mode == TDmode)
10455 cum->words += cum->words & 1;
10456 cum->words += rs6000_arg_size (mode, type);
10457 }
10458 }
10459 else
10460 {
10461 int n_words = rs6000_arg_size (mode, type);
10462 int gregno = cum->sysv_gregno;
10463
10464 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
10465 (r7,r8) or (r9,r10). As does any other 2 word item such
10466 as complex int due to a historical mistake. */
10467 if (n_words == 2)
10468 gregno += (1 - gregno) & 1;
10469
10470 /* Multi-reg args are not split between registers and stack. */
10471 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
10472 {
10473 /* Long long and SPE vectors are aligned on the stack.
10474 So are other 2 word items such as complex int due to
10475 a historical mistake. */
10476 if (n_words == 2)
10477 cum->words += cum->words & 1;
10478 cum->words += n_words;
10479 }
10480
10481 /* Note: continuing to accumulate gregno past when we've started
10482 spilling to the stack indicates the fact that we've started
10483 spilling to the stack to expand_builtin_saveregs. */
10484 cum->sysv_gregno = gregno + n_words;
10485 }
10486
10487 if (TARGET_DEBUG_ARG)
10488 {
10489 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
10490 cum->words, cum->fregno);
10491 fprintf (stderr, "gregno = %2d, nargs = %4d, proto = %d, ",
10492 cum->sysv_gregno, cum->nargs_prototype, cum->prototype);
10493 fprintf (stderr, "mode = %4s, named = %d\n",
10494 GET_MODE_NAME (mode), named);
10495 }
10496 }
10497 else
10498 {
10499 int n_words = rs6000_arg_size (mode, type);
10500 int start_words = cum->words;
10501 int align_words = rs6000_parm_start (mode, type, start_words);
10502
10503 cum->words = align_words + n_words;
10504
10505 if (SCALAR_FLOAT_MODE_P (elt_mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
10506 {
10507 /* _Decimal128 must be passed in an even/odd float register pair.
10508 This assumes that the register number is odd when fregno is
10509 odd. */
10510 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
10511 cum->fregno++;
10512 cum->fregno += n_elts * ((GET_MODE_SIZE (elt_mode) + 7) >> 3);
10513 }
10514
10515 if (TARGET_DEBUG_ARG)
10516 {
10517 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
10518 cum->words, cum->fregno);
10519 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s, ",
10520 cum->nargs_prototype, cum->prototype, GET_MODE_NAME (mode));
10521 fprintf (stderr, "named = %d, align = %d, depth = %d\n",
10522 named, align_words - start_words, depth);
10523 }
10524 }
10525 }
10526
10527 static void
10528 rs6000_function_arg_advance (cumulative_args_t cum, machine_mode mode,
10529 const_tree type, bool named)
10530 {
10531 rs6000_function_arg_advance_1 (get_cumulative_args (cum), mode, type, named,
10532 0);
10533 }
10534
10535 static rtx
10536 spe_build_register_parallel (machine_mode mode, int gregno)
10537 {
10538 rtx r1, r3, r5, r7;
10539
10540 switch (mode)
10541 {
10542 case DFmode:
10543 r1 = gen_rtx_REG (DImode, gregno);
10544 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
10545 return gen_rtx_PARALLEL (mode, gen_rtvec (1, r1));
10546
10547 case DCmode:
10548 case TFmode:
10549 r1 = gen_rtx_REG (DImode, gregno);
10550 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
10551 r3 = gen_rtx_REG (DImode, gregno + 2);
10552 r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
10553 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r3));
10554
10555 case TCmode:
10556 r1 = gen_rtx_REG (DImode, gregno);
10557 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
10558 r3 = gen_rtx_REG (DImode, gregno + 2);
10559 r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
10560 r5 = gen_rtx_REG (DImode, gregno + 4);
10561 r5 = gen_rtx_EXPR_LIST (VOIDmode, r5, GEN_INT (16));
10562 r7 = gen_rtx_REG (DImode, gregno + 6);
10563 r7 = gen_rtx_EXPR_LIST (VOIDmode, r7, GEN_INT (24));
10564 return gen_rtx_PARALLEL (mode, gen_rtvec (4, r1, r3, r5, r7));
10565
10566 default:
10567 gcc_unreachable ();
10568 }
10569 }
10570
10571 /* Determine where to put a SIMD argument on the SPE. */
10572 static rtx
10573 rs6000_spe_function_arg (const CUMULATIVE_ARGS *cum, machine_mode mode,
10574 const_tree type)
10575 {
10576 int gregno = cum->sysv_gregno;
10577
10578 /* On E500 v2, double arithmetic is done on the full 64-bit GPR, but
10579 are passed and returned in a pair of GPRs for ABI compatibility. */
10580 if (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode
10581 || mode == DCmode || mode == TCmode))
10582 {
10583 int n_words = rs6000_arg_size (mode, type);
10584
10585 /* Doubles go in an odd/even register pair (r5/r6, etc). */
10586 if (mode == DFmode)
10587 gregno += (1 - gregno) & 1;
10588
10589 /* Multi-reg args are not split between registers and stack. */
10590 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
10591 return NULL_RTX;
10592
10593 return spe_build_register_parallel (mode, gregno);
10594 }
10595 if (cum->stdarg)
10596 {
10597 int n_words = rs6000_arg_size (mode, type);
10598
10599 /* SPE vectors are put in odd registers. */
10600 if (n_words == 2 && (gregno & 1) == 0)
10601 gregno += 1;
10602
10603 if (gregno + n_words - 1 <= GP_ARG_MAX_REG)
10604 {
10605 rtx r1, r2;
10606 machine_mode m = SImode;
10607
10608 r1 = gen_rtx_REG (m, gregno);
10609 r1 = gen_rtx_EXPR_LIST (m, r1, const0_rtx);
10610 r2 = gen_rtx_REG (m, gregno + 1);
10611 r2 = gen_rtx_EXPR_LIST (m, r2, GEN_INT (4));
10612 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
10613 }
10614 else
10615 return NULL_RTX;
10616 }
10617 else
10618 {
10619 if (gregno <= GP_ARG_MAX_REG)
10620 return gen_rtx_REG (mode, gregno);
10621 else
10622 return NULL_RTX;
10623 }
10624 }
10625
10626 /* A subroutine of rs6000_darwin64_record_arg. Assign the bits of the
10627 structure between cum->intoffset and bitpos to integer registers. */
10628
10629 static void
10630 rs6000_darwin64_record_arg_flush (CUMULATIVE_ARGS *cum,
10631 HOST_WIDE_INT bitpos, rtx rvec[], int *k)
10632 {
10633 machine_mode mode;
10634 unsigned int regno;
10635 unsigned int startbit, endbit;
10636 int this_regno, intregs, intoffset;
10637 rtx reg;
10638
10639 if (cum->intoffset == -1)
10640 return;
10641
10642 intoffset = cum->intoffset;
10643 cum->intoffset = -1;
10644
10645 /* If this is the trailing part of a word, try to only load that
10646 much into the register. Otherwise load the whole register. Note
10647 that in the latter case we may pick up unwanted bits. It's not a
10648 problem at the moment but may wish to revisit. */
10649
10650 if (intoffset % BITS_PER_WORD != 0)
10651 {
10652 mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
10653 MODE_INT, 0);
10654 if (mode == BLKmode)
10655 {
10656 /* We couldn't find an appropriate mode, which happens,
10657 e.g., in packed structs when there are 3 bytes to load.
10658 Back intoffset back to the beginning of the word in this
10659 case. */
10660 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
10661 mode = word_mode;
10662 }
10663 }
10664 else
10665 mode = word_mode;
10666
10667 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
10668 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
10669 intregs = (endbit - startbit) / BITS_PER_WORD;
10670 this_regno = cum->words + intoffset / BITS_PER_WORD;
10671
10672 if (intregs > 0 && intregs > GP_ARG_NUM_REG - this_regno)
10673 cum->use_stack = 1;
10674
10675 intregs = MIN (intregs, GP_ARG_NUM_REG - this_regno);
10676 if (intregs <= 0)
10677 return;
10678
10679 intoffset /= BITS_PER_UNIT;
10680 do
10681 {
10682 regno = GP_ARG_MIN_REG + this_regno;
10683 reg = gen_rtx_REG (mode, regno);
10684 rvec[(*k)++] =
10685 gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
10686
10687 this_regno += 1;
10688 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
10689 mode = word_mode;
10690 intregs -= 1;
10691 }
10692 while (intregs > 0);
10693 }
10694
10695 /* Recursive workhorse for the following. */
10696
10697 static void
10698 rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS *cum, const_tree type,
10699 HOST_WIDE_INT startbitpos, rtx rvec[],
10700 int *k)
10701 {
10702 tree f;
10703
10704 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
10705 if (TREE_CODE (f) == FIELD_DECL)
10706 {
10707 HOST_WIDE_INT bitpos = startbitpos;
10708 tree ftype = TREE_TYPE (f);
10709 machine_mode mode;
10710 if (ftype == error_mark_node)
10711 continue;
10712 mode = TYPE_MODE (ftype);
10713
10714 if (DECL_SIZE (f) != 0
10715 && tree_fits_uhwi_p (bit_position (f)))
10716 bitpos += int_bit_position (f);
10717
10718 /* ??? FIXME: else assume zero offset. */
10719
10720 if (TREE_CODE (ftype) == RECORD_TYPE)
10721 rs6000_darwin64_record_arg_recurse (cum, ftype, bitpos, rvec, k);
10722 else if (cum->named && USE_FP_FOR_ARG_P (cum, mode))
10723 {
10724 unsigned n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
10725 #if 0
10726 switch (mode)
10727 {
10728 case SCmode: mode = SFmode; break;
10729 case DCmode: mode = DFmode; break;
10730 case TCmode: mode = TFmode; break;
10731 default: break;
10732 }
10733 #endif
10734 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
10735 if (cum->fregno + n_fpreg > FP_ARG_MAX_REG + 1)
10736 {
10737 gcc_assert (cum->fregno == FP_ARG_MAX_REG
10738 && (mode == TFmode || mode == TDmode));
10739 /* Long double or _Decimal128 split over regs and memory. */
10740 mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : DFmode;
10741 cum->use_stack=1;
10742 }
10743 rvec[(*k)++]
10744 = gen_rtx_EXPR_LIST (VOIDmode,
10745 gen_rtx_REG (mode, cum->fregno++),
10746 GEN_INT (bitpos / BITS_PER_UNIT));
10747 if (FLOAT128_2REG_P (mode))
10748 cum->fregno++;
10749 }
10750 else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
10751 {
10752 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
10753 rvec[(*k)++]
10754 = gen_rtx_EXPR_LIST (VOIDmode,
10755 gen_rtx_REG (mode, cum->vregno++),
10756 GEN_INT (bitpos / BITS_PER_UNIT));
10757 }
10758 else if (cum->intoffset == -1)
10759 cum->intoffset = bitpos;
10760 }
10761 }
10762
10763 /* For the darwin64 ABI, we want to construct a PARALLEL consisting of
10764 the register(s) to be used for each field and subfield of a struct
10765 being passed by value, along with the offset of where the
10766 register's value may be found in the block. FP fields go in FP
10767 register, vector fields go in vector registers, and everything
10768 else goes in int registers, packed as in memory.
10769
10770 This code is also used for function return values. RETVAL indicates
10771 whether this is the case.
10772
10773 Much of this is taken from the SPARC V9 port, which has a similar
10774 calling convention. */
10775
10776 static rtx
10777 rs6000_darwin64_record_arg (CUMULATIVE_ARGS *orig_cum, const_tree type,
10778 bool named, bool retval)
10779 {
10780 rtx rvec[FIRST_PSEUDO_REGISTER];
10781 int k = 1, kbase = 1;
10782 HOST_WIDE_INT typesize = int_size_in_bytes (type);
10783 /* This is a copy; modifications are not visible to our caller. */
10784 CUMULATIVE_ARGS copy_cum = *orig_cum;
10785 CUMULATIVE_ARGS *cum = &copy_cum;
10786
10787 /* Pad to 16 byte boundary if needed. */
10788 if (!retval && TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
10789 && (cum->words % 2) != 0)
10790 cum->words++;
10791
10792 cum->intoffset = 0;
10793 cum->use_stack = 0;
10794 cum->named = named;
10795
10796 /* Put entries into rvec[] for individual FP and vector fields, and
10797 for the chunks of memory that go in int regs. Note we start at
10798 element 1; 0 is reserved for an indication of using memory, and
10799 may or may not be filled in below. */
10800 rs6000_darwin64_record_arg_recurse (cum, type, /* startbit pos= */ 0, rvec, &k);
10801 rs6000_darwin64_record_arg_flush (cum, typesize * BITS_PER_UNIT, rvec, &k);
10802
10803 /* If any part of the struct went on the stack put all of it there.
10804 This hack is because the generic code for
10805 FUNCTION_ARG_PARTIAL_NREGS cannot handle cases where the register
10806 parts of the struct are not at the beginning. */
10807 if (cum->use_stack)
10808 {
10809 if (retval)
10810 return NULL_RTX; /* doesn't go in registers at all */
10811 kbase = 0;
10812 rvec[0] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
10813 }
10814 if (k > 1 || cum->use_stack)
10815 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (k - kbase, &rvec[kbase]));
10816 else
10817 return NULL_RTX;
10818 }
10819
10820 /* Determine where to place an argument in 64-bit mode with 32-bit ABI. */
10821
10822 static rtx
10823 rs6000_mixed_function_arg (machine_mode mode, const_tree type,
10824 int align_words)
10825 {
10826 int n_units;
10827 int i, k;
10828 rtx rvec[GP_ARG_NUM_REG + 1];
10829
10830 if (align_words >= GP_ARG_NUM_REG)
10831 return NULL_RTX;
10832
10833 n_units = rs6000_arg_size (mode, type);
10834
10835 /* Optimize the simple case where the arg fits in one gpr, except in
10836 the case of BLKmode due to assign_parms assuming that registers are
10837 BITS_PER_WORD wide. */
10838 if (n_units == 0
10839 || (n_units == 1 && mode != BLKmode))
10840 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
10841
10842 k = 0;
10843 if (align_words + n_units > GP_ARG_NUM_REG)
10844 /* Not all of the arg fits in gprs. Say that it goes in memory too,
10845 using a magic NULL_RTX component.
10846 This is not strictly correct. Only some of the arg belongs in
10847 memory, not all of it. However, the normal scheme using
10848 function_arg_partial_nregs can result in unusual subregs, eg.
10849 (subreg:SI (reg:DF) 4), which are not handled well. The code to
10850 store the whole arg to memory is often more efficient than code
10851 to store pieces, and we know that space is available in the right
10852 place for the whole arg. */
10853 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
10854
10855 i = 0;
10856 do
10857 {
10858 rtx r = gen_rtx_REG (SImode, GP_ARG_MIN_REG + align_words);
10859 rtx off = GEN_INT (i++ * 4);
10860 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10861 }
10862 while (++align_words < GP_ARG_NUM_REG && --n_units != 0);
10863
10864 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
10865 }
10866
10867 /* We have an argument of MODE and TYPE that goes into FPRs or VRs,
10868 but must also be copied into the parameter save area starting at
10869 offset ALIGN_WORDS. Fill in RVEC with the elements corresponding
10870 to the GPRs and/or memory. Return the number of elements used. */
10871
10872 static int
10873 rs6000_psave_function_arg (machine_mode mode, const_tree type,
10874 int align_words, rtx *rvec)
10875 {
10876 int k = 0;
10877
10878 if (align_words < GP_ARG_NUM_REG)
10879 {
10880 int n_words = rs6000_arg_size (mode, type);
10881
10882 if (align_words + n_words > GP_ARG_NUM_REG
10883 || mode == BLKmode
10884 || (TARGET_32BIT && TARGET_POWERPC64))
10885 {
10886 /* If this is partially on the stack, then we only
10887 include the portion actually in registers here. */
10888 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
10889 int i = 0;
10890
10891 if (align_words + n_words > GP_ARG_NUM_REG)
10892 {
10893 /* Not all of the arg fits in gprs. Say that it goes in memory
10894 too, using a magic NULL_RTX component. Also see comment in
10895 rs6000_mixed_function_arg for why the normal
10896 function_arg_partial_nregs scheme doesn't work in this case. */
10897 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
10898 }
10899
10900 do
10901 {
10902 rtx r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
10903 rtx off = GEN_INT (i++ * GET_MODE_SIZE (rmode));
10904 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10905 }
10906 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
10907 }
10908 else
10909 {
10910 /* The whole arg fits in gprs. */
10911 rtx r = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
10912 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
10913 }
10914 }
10915 else
10916 {
10917 /* It's entirely in memory. */
10918 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
10919 }
10920
10921 return k;
10922 }
10923
10924 /* RVEC is a vector of K components of an argument of mode MODE.
10925 Construct the final function_arg return value from it. */
10926
10927 static rtx
10928 rs6000_finish_function_arg (machine_mode mode, rtx *rvec, int k)
10929 {
10930 gcc_assert (k >= 1);
10931
10932 /* Avoid returning a PARALLEL in the trivial cases. */
10933 if (k == 1)
10934 {
10935 if (XEXP (rvec[0], 0) == NULL_RTX)
10936 return NULL_RTX;
10937
10938 if (GET_MODE (XEXP (rvec[0], 0)) == mode)
10939 return XEXP (rvec[0], 0);
10940 }
10941
10942 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
10943 }
10944
10945 /* Determine where to put an argument to a function.
10946 Value is zero to push the argument on the stack,
10947 or a hard register in which to store the argument.
10948
10949 MODE is the argument's machine mode.
10950 TYPE is the data type of the argument (as a tree).
10951 This is null for libcalls where that information may
10952 not be available.
10953 CUM is a variable of type CUMULATIVE_ARGS which gives info about
10954 the preceding args and about the function being called. It is
10955 not modified in this routine.
10956 NAMED is nonzero if this argument is a named parameter
10957 (otherwise it is an extra parameter matching an ellipsis).
10958
10959 On RS/6000 the first eight words of non-FP are normally in registers
10960 and the rest are pushed. Under AIX, the first 13 FP args are in registers.
10961 Under V.4, the first 8 FP args are in registers.
10962
10963 If this is floating-point and no prototype is specified, we use
10964 both an FP and integer register (or possibly FP reg and stack). Library
10965 functions (when CALL_LIBCALL is set) always have the proper types for args,
10966 so we can pass the FP value just in one register. emit_library_function
10967 doesn't support PARALLEL anyway.
10968
10969 Note that for args passed by reference, function_arg will be called
10970 with MODE and TYPE set to that of the pointer to the arg, not the arg
10971 itself. */
10972
10973 static rtx
10974 rs6000_function_arg (cumulative_args_t cum_v, machine_mode mode,
10975 const_tree type, bool named)
10976 {
10977 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
10978 enum rs6000_abi abi = DEFAULT_ABI;
10979 machine_mode elt_mode;
10980 int n_elts;
10981
10982 /* Return a marker to indicate whether CR1 needs to set or clear the
10983 bit that V.4 uses to say fp args were passed in registers.
10984 Assume that we don't need the marker for software floating point,
10985 or compiler generated library calls. */
10986 if (mode == VOIDmode)
10987 {
10988 if (abi == ABI_V4
10989 && (cum->call_cookie & CALL_LIBCALL) == 0
10990 && (cum->stdarg
10991 || (cum->nargs_prototype < 0
10992 && (cum->prototype || TARGET_NO_PROTOTYPE))))
10993 {
10994 /* For the SPE, we need to crxor CR6 always. */
10995 if (TARGET_SPE_ABI)
10996 return GEN_INT (cum->call_cookie | CALL_V4_SET_FP_ARGS);
10997 else if (TARGET_HARD_FLOAT && TARGET_FPRS)
10998 return GEN_INT (cum->call_cookie
10999 | ((cum->fregno == FP_ARG_MIN_REG)
11000 ? CALL_V4_SET_FP_ARGS
11001 : CALL_V4_CLEAR_FP_ARGS));
11002 }
11003
11004 return GEN_INT (cum->call_cookie & ~CALL_LIBCALL);
11005 }
11006
11007 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
11008
11009 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
11010 {
11011 rtx rslt = rs6000_darwin64_record_arg (cum, type, named, /*retval= */false);
11012 if (rslt != NULL_RTX)
11013 return rslt;
11014 /* Else fall through to usual handling. */
11015 }
11016
11017 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
11018 {
11019 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
11020 rtx r, off;
11021 int i, k = 0;
11022
11023 /* Do we also need to pass this argument in the parameter save area?
11024 Library support functions for IEEE 128-bit are assumed to not need the
11025 value passed both in GPRs and in vector registers. */
11026 if (TARGET_64BIT && !cum->prototype
11027 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
11028 {
11029 int align_words = ROUND_UP (cum->words, 2);
11030 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
11031 }
11032
11033 /* Describe where this argument goes in the vector registers. */
11034 for (i = 0; i < n_elts && cum->vregno + i <= ALTIVEC_ARG_MAX_REG; i++)
11035 {
11036 r = gen_rtx_REG (elt_mode, cum->vregno + i);
11037 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
11038 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
11039 }
11040
11041 return rs6000_finish_function_arg (mode, rvec, k);
11042 }
11043 else if (TARGET_ALTIVEC_ABI
11044 && (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
11045 || (type && TREE_CODE (type) == VECTOR_TYPE
11046 && int_size_in_bytes (type) == 16)))
11047 {
11048 if (named || abi == ABI_V4)
11049 return NULL_RTX;
11050 else
11051 {
11052 /* Vector parameters to varargs functions under AIX or Darwin
11053 get passed in memory and possibly also in GPRs. */
11054 int align, align_words, n_words;
11055 machine_mode part_mode;
11056
11057 /* Vector parameters must be 16-byte aligned. In 32-bit
11058 mode this means we need to take into account the offset
11059 to the parameter save area. In 64-bit mode, they just
11060 have to start on an even word, since the parameter save
11061 area is 16-byte aligned. */
11062 if (TARGET_32BIT)
11063 align = -(rs6000_parm_offset () + cum->words) & 3;
11064 else
11065 align = cum->words & 1;
11066 align_words = cum->words + align;
11067
11068 /* Out of registers? Memory, then. */
11069 if (align_words >= GP_ARG_NUM_REG)
11070 return NULL_RTX;
11071
11072 if (TARGET_32BIT && TARGET_POWERPC64)
11073 return rs6000_mixed_function_arg (mode, type, align_words);
11074
11075 /* The vector value goes in GPRs. Only the part of the
11076 value in GPRs is reported here. */
11077 part_mode = mode;
11078 n_words = rs6000_arg_size (mode, type);
11079 if (align_words + n_words > GP_ARG_NUM_REG)
11080 /* Fortunately, there are only two possibilities, the value
11081 is either wholly in GPRs or half in GPRs and half not. */
11082 part_mode = DImode;
11083
11084 return gen_rtx_REG (part_mode, GP_ARG_MIN_REG + align_words);
11085 }
11086 }
11087 else if (TARGET_SPE_ABI && TARGET_SPE
11088 && (SPE_VECTOR_MODE (mode)
11089 || (TARGET_E500_DOUBLE && (mode == DFmode
11090 || mode == DCmode
11091 || mode == TFmode
11092 || mode == TCmode))))
11093 return rs6000_spe_function_arg (cum, mode, type);
11094
11095 else if (abi == ABI_V4)
11096 {
11097 if (TARGET_HARD_FLOAT && TARGET_FPRS
11098 && ((TARGET_SINGLE_FLOAT && mode == SFmode)
11099 || (TARGET_DOUBLE_FLOAT && mode == DFmode)
11100 || FLOAT128_2REG_P (mode)
11101 || DECIMAL_FLOAT_MODE_P (mode)))
11102 {
11103 /* _Decimal128 must use an even/odd register pair. This assumes
11104 that the register number is odd when fregno is odd. */
11105 if (mode == TDmode && (cum->fregno % 2) == 1)
11106 cum->fregno++;
11107
11108 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
11109 <= FP_ARG_V4_MAX_REG)
11110 return gen_rtx_REG (mode, cum->fregno);
11111 else
11112 return NULL_RTX;
11113 }
11114 else
11115 {
11116 int n_words = rs6000_arg_size (mode, type);
11117 int gregno = cum->sysv_gregno;
11118
11119 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
11120 (r7,r8) or (r9,r10). As does any other 2 word item such
11121 as complex int due to a historical mistake. */
11122 if (n_words == 2)
11123 gregno += (1 - gregno) & 1;
11124
11125 /* Multi-reg args are not split between registers and stack. */
11126 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
11127 return NULL_RTX;
11128
11129 if (TARGET_32BIT && TARGET_POWERPC64)
11130 return rs6000_mixed_function_arg (mode, type,
11131 gregno - GP_ARG_MIN_REG);
11132 return gen_rtx_REG (mode, gregno);
11133 }
11134 }
11135 else
11136 {
11137 int align_words = rs6000_parm_start (mode, type, cum->words);
11138
11139 /* _Decimal128 must be passed in an even/odd float register pair.
11140 This assumes that the register number is odd when fregno is odd. */
11141 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
11142 cum->fregno++;
11143
11144 if (USE_FP_FOR_ARG_P (cum, elt_mode))
11145 {
11146 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
11147 rtx r, off;
11148 int i, k = 0;
11149 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
11150 int fpr_words;
11151
11152 /* Do we also need to pass this argument in the parameter
11153 save area? */
11154 if (type && (cum->nargs_prototype <= 0
11155 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
11156 && TARGET_XL_COMPAT
11157 && align_words >= GP_ARG_NUM_REG)))
11158 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
11159
11160 /* Describe where this argument goes in the fprs. */
11161 for (i = 0; i < n_elts
11162 && cum->fregno + i * n_fpreg <= FP_ARG_MAX_REG; i++)
11163 {
11164 /* Check if the argument is split over registers and memory.
11165 This can only ever happen for long double or _Decimal128;
11166 complex types are handled via split_complex_arg. */
11167 machine_mode fmode = elt_mode;
11168 if (cum->fregno + (i + 1) * n_fpreg > FP_ARG_MAX_REG + 1)
11169 {
11170 gcc_assert (FLOAT128_2REG_P (fmode));
11171 fmode = DECIMAL_FLOAT_MODE_P (fmode) ? DDmode : DFmode;
11172 }
11173
11174 r = gen_rtx_REG (fmode, cum->fregno + i * n_fpreg);
11175 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
11176 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
11177 }
11178
11179 /* If there were not enough FPRs to hold the argument, the rest
11180 usually goes into memory. However, if the current position
11181 is still within the register parameter area, a portion may
11182 actually have to go into GPRs.
11183
11184 Note that it may happen that the portion of the argument
11185 passed in the first "half" of the first GPR was already
11186 passed in the last FPR as well.
11187
11188 For unnamed arguments, we already set up GPRs to cover the
11189 whole argument in rs6000_psave_function_arg, so there is
11190 nothing further to do at this point. */
11191 fpr_words = (i * GET_MODE_SIZE (elt_mode)) / (TARGET_32BIT ? 4 : 8);
11192 if (i < n_elts && align_words + fpr_words < GP_ARG_NUM_REG
11193 && cum->nargs_prototype > 0)
11194 {
11195 static bool warned;
11196
11197 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
11198 int n_words = rs6000_arg_size (mode, type);
11199
11200 align_words += fpr_words;
11201 n_words -= fpr_words;
11202
11203 do
11204 {
11205 r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
11206 off = GEN_INT (fpr_words++ * GET_MODE_SIZE (rmode));
11207 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
11208 }
11209 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
11210
11211 if (!warned && warn_psabi)
11212 {
11213 warned = true;
11214 inform (input_location,
11215 "the ABI of passing homogeneous float aggregates"
11216 " has changed in GCC 5");
11217 }
11218 }
11219
11220 return rs6000_finish_function_arg (mode, rvec, k);
11221 }
11222 else if (align_words < GP_ARG_NUM_REG)
11223 {
11224 if (TARGET_32BIT && TARGET_POWERPC64)
11225 return rs6000_mixed_function_arg (mode, type, align_words);
11226
11227 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
11228 }
11229 else
11230 return NULL_RTX;
11231 }
11232 }
11233 \f
11234 /* For an arg passed partly in registers and partly in memory, this is
11235 the number of bytes passed in registers. For args passed entirely in
11236 registers or entirely in memory, zero. When an arg is described by a
11237 PARALLEL, perhaps using more than one register type, this function
11238 returns the number of bytes used by the first element of the PARALLEL. */
11239
11240 static int
11241 rs6000_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
11242 tree type, bool named)
11243 {
11244 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
11245 bool passed_in_gprs = true;
11246 int ret = 0;
11247 int align_words;
11248 machine_mode elt_mode;
11249 int n_elts;
11250
11251 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
11252
11253 if (DEFAULT_ABI == ABI_V4)
11254 return 0;
11255
11256 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
11257 {
11258 /* If we are passing this arg in the fixed parameter save area (gprs or
11259 memory) as well as VRs, we do not use the partial bytes mechanism;
11260 instead, rs6000_function_arg will return a PARALLEL including a memory
11261 element as necessary. Library support functions for IEEE 128-bit are
11262 assumed to not need the value passed both in GPRs and in vector
11263 registers. */
11264 if (TARGET_64BIT && !cum->prototype
11265 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
11266 return 0;
11267
11268 /* Otherwise, we pass in VRs only. Check for partial copies. */
11269 passed_in_gprs = false;
11270 if (cum->vregno + n_elts > ALTIVEC_ARG_MAX_REG + 1)
11271 ret = (ALTIVEC_ARG_MAX_REG + 1 - cum->vregno) * 16;
11272 }
11273
11274 /* In this complicated case we just disable the partial_nregs code. */
11275 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
11276 return 0;
11277
11278 align_words = rs6000_parm_start (mode, type, cum->words);
11279
11280 if (USE_FP_FOR_ARG_P (cum, elt_mode))
11281 {
11282 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
11283
11284 /* If we are passing this arg in the fixed parameter save area
11285 (gprs or memory) as well as FPRs, we do not use the partial
11286 bytes mechanism; instead, rs6000_function_arg will return a
11287 PARALLEL including a memory element as necessary. */
11288 if (type
11289 && (cum->nargs_prototype <= 0
11290 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
11291 && TARGET_XL_COMPAT
11292 && align_words >= GP_ARG_NUM_REG)))
11293 return 0;
11294
11295 /* Otherwise, we pass in FPRs only. Check for partial copies. */
11296 passed_in_gprs = false;
11297 if (cum->fregno + n_elts * n_fpreg > FP_ARG_MAX_REG + 1)
11298 {
11299 /* Compute number of bytes / words passed in FPRs. If there
11300 is still space available in the register parameter area
11301 *after* that amount, a part of the argument will be passed
11302 in GPRs. In that case, the total amount passed in any
11303 registers is equal to the amount that would have been passed
11304 in GPRs if everything were passed there, so we fall back to
11305 the GPR code below to compute the appropriate value. */
11306 int fpr = ((FP_ARG_MAX_REG + 1 - cum->fregno)
11307 * MIN (8, GET_MODE_SIZE (elt_mode)));
11308 int fpr_words = fpr / (TARGET_32BIT ? 4 : 8);
11309
11310 if (align_words + fpr_words < GP_ARG_NUM_REG)
11311 passed_in_gprs = true;
11312 else
11313 ret = fpr;
11314 }
11315 }
11316
11317 if (passed_in_gprs
11318 && align_words < GP_ARG_NUM_REG
11319 && GP_ARG_NUM_REG < align_words + rs6000_arg_size (mode, type))
11320 ret = (GP_ARG_NUM_REG - align_words) * (TARGET_32BIT ? 4 : 8);
11321
11322 if (ret != 0 && TARGET_DEBUG_ARG)
11323 fprintf (stderr, "rs6000_arg_partial_bytes: %d\n", ret);
11324
11325 return ret;
11326 }
11327 \f
11328 /* A C expression that indicates when an argument must be passed by
11329 reference. If nonzero for an argument, a copy of that argument is
11330 made in memory and a pointer to the argument is passed instead of
11331 the argument itself. The pointer is passed in whatever way is
11332 appropriate for passing a pointer to that type.
11333
11334 Under V.4, aggregates and long double are passed by reference.
11335
11336 As an extension to all 32-bit ABIs, AltiVec vectors are passed by
11337 reference unless the AltiVec vector extension ABI is in force.
11338
11339 As an extension to all ABIs, variable sized types are passed by
11340 reference. */
11341
11342 static bool
11343 rs6000_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
11344 machine_mode mode, const_tree type,
11345 bool named ATTRIBUTE_UNUSED)
11346 {
11347 if (!type)
11348 return 0;
11349
11350 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
11351 && FLOAT128_IEEE_P (TYPE_MODE (type)))
11352 {
11353 if (TARGET_DEBUG_ARG)
11354 fprintf (stderr, "function_arg_pass_by_reference: V4 IEEE 128-bit\n");
11355 return 1;
11356 }
11357
11358 if (DEFAULT_ABI == ABI_V4 && AGGREGATE_TYPE_P (type))
11359 {
11360 if (TARGET_DEBUG_ARG)
11361 fprintf (stderr, "function_arg_pass_by_reference: V4 aggregate\n");
11362 return 1;
11363 }
11364
11365 if (int_size_in_bytes (type) < 0)
11366 {
11367 if (TARGET_DEBUG_ARG)
11368 fprintf (stderr, "function_arg_pass_by_reference: variable size\n");
11369 return 1;
11370 }
11371
11372 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
11373 modes only exist for GCC vector types if -maltivec. */
11374 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
11375 {
11376 if (TARGET_DEBUG_ARG)
11377 fprintf (stderr, "function_arg_pass_by_reference: AltiVec\n");
11378 return 1;
11379 }
11380
11381 /* Pass synthetic vectors in memory. */
11382 if (TREE_CODE (type) == VECTOR_TYPE
11383 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
11384 {
11385 static bool warned_for_pass_big_vectors = false;
11386 if (TARGET_DEBUG_ARG)
11387 fprintf (stderr, "function_arg_pass_by_reference: synthetic vector\n");
11388 if (!warned_for_pass_big_vectors)
11389 {
11390 warning (0, "GCC vector passed by reference: "
11391 "non-standard ABI extension with no compatibility guarantee");
11392 warned_for_pass_big_vectors = true;
11393 }
11394 return 1;
11395 }
11396
11397 return 0;
11398 }
11399
11400 /* Process parameter of type TYPE after ARGS_SO_FAR parameters were
11401 already processes. Return true if the parameter must be passed
11402 (fully or partially) on the stack. */
11403
11404 static bool
11405 rs6000_parm_needs_stack (cumulative_args_t args_so_far, tree type)
11406 {
11407 machine_mode mode;
11408 int unsignedp;
11409 rtx entry_parm;
11410
11411 /* Catch errors. */
11412 if (type == NULL || type == error_mark_node)
11413 return true;
11414
11415 /* Handle types with no storage requirement. */
11416 if (TYPE_MODE (type) == VOIDmode)
11417 return false;
11418
11419 /* Handle complex types. */
11420 if (TREE_CODE (type) == COMPLEX_TYPE)
11421 return (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type))
11422 || rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type)));
11423
11424 /* Handle transparent aggregates. */
11425 if ((TREE_CODE (type) == UNION_TYPE || TREE_CODE (type) == RECORD_TYPE)
11426 && TYPE_TRANSPARENT_AGGR (type))
11427 type = TREE_TYPE (first_field (type));
11428
11429 /* See if this arg was passed by invisible reference. */
11430 if (pass_by_reference (get_cumulative_args (args_so_far),
11431 TYPE_MODE (type), type, true))
11432 type = build_pointer_type (type);
11433
11434 /* Find mode as it is passed by the ABI. */
11435 unsignedp = TYPE_UNSIGNED (type);
11436 mode = promote_mode (type, TYPE_MODE (type), &unsignedp);
11437
11438 /* If we must pass in stack, we need a stack. */
11439 if (rs6000_must_pass_in_stack (mode, type))
11440 return true;
11441
11442 /* If there is no incoming register, we need a stack. */
11443 entry_parm = rs6000_function_arg (args_so_far, mode, type, true);
11444 if (entry_parm == NULL)
11445 return true;
11446
11447 /* Likewise if we need to pass both in registers and on the stack. */
11448 if (GET_CODE (entry_parm) == PARALLEL
11449 && XEXP (XVECEXP (entry_parm, 0, 0), 0) == NULL_RTX)
11450 return true;
11451
11452 /* Also true if we're partially in registers and partially not. */
11453 if (rs6000_arg_partial_bytes (args_so_far, mode, type, true) != 0)
11454 return true;
11455
11456 /* Update info on where next arg arrives in registers. */
11457 rs6000_function_arg_advance (args_so_far, mode, type, true);
11458 return false;
11459 }
11460
11461 /* Return true if FUN has no prototype, has a variable argument
11462 list, or passes any parameter in memory. */
11463
11464 static bool
11465 rs6000_function_parms_need_stack (tree fun, bool incoming)
11466 {
11467 tree fntype, result;
11468 CUMULATIVE_ARGS args_so_far_v;
11469 cumulative_args_t args_so_far;
11470
11471 if (!fun)
11472 /* Must be a libcall, all of which only use reg parms. */
11473 return false;
11474
11475 fntype = fun;
11476 if (!TYPE_P (fun))
11477 fntype = TREE_TYPE (fun);
11478
11479 /* Varargs functions need the parameter save area. */
11480 if ((!incoming && !prototype_p (fntype)) || stdarg_p (fntype))
11481 return true;
11482
11483 INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v, fntype, NULL_RTX);
11484 args_so_far = pack_cumulative_args (&args_so_far_v);
11485
11486 /* When incoming, we will have been passed the function decl.
11487 It is necessary to use the decl to handle K&R style functions,
11488 where TYPE_ARG_TYPES may not be available. */
11489 if (incoming)
11490 {
11491 gcc_assert (DECL_P (fun));
11492 result = DECL_RESULT (fun);
11493 }
11494 else
11495 result = TREE_TYPE (fntype);
11496
11497 if (result && aggregate_value_p (result, fntype))
11498 {
11499 if (!TYPE_P (result))
11500 result = TREE_TYPE (result);
11501 result = build_pointer_type (result);
11502 rs6000_parm_needs_stack (args_so_far, result);
11503 }
11504
11505 if (incoming)
11506 {
11507 tree parm;
11508
11509 for (parm = DECL_ARGUMENTS (fun);
11510 parm && parm != void_list_node;
11511 parm = TREE_CHAIN (parm))
11512 if (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (parm)))
11513 return true;
11514 }
11515 else
11516 {
11517 function_args_iterator args_iter;
11518 tree arg_type;
11519
11520 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
11521 if (rs6000_parm_needs_stack (args_so_far, arg_type))
11522 return true;
11523 }
11524
11525 return false;
11526 }
11527
11528 /* Return the size of the REG_PARM_STACK_SPACE are for FUN. This is
11529 usually a constant depending on the ABI. However, in the ELFv2 ABI
11530 the register parameter area is optional when calling a function that
11531 has a prototype is scope, has no variable argument list, and passes
11532 all parameters in registers. */
11533
11534 int
11535 rs6000_reg_parm_stack_space (tree fun, bool incoming)
11536 {
11537 int reg_parm_stack_space;
11538
11539 switch (DEFAULT_ABI)
11540 {
11541 default:
11542 reg_parm_stack_space = 0;
11543 break;
11544
11545 case ABI_AIX:
11546 case ABI_DARWIN:
11547 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
11548 break;
11549
11550 case ABI_ELFv2:
11551 /* ??? Recomputing this every time is a bit expensive. Is there
11552 a place to cache this information? */
11553 if (rs6000_function_parms_need_stack (fun, incoming))
11554 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
11555 else
11556 reg_parm_stack_space = 0;
11557 break;
11558 }
11559
11560 return reg_parm_stack_space;
11561 }
11562
11563 static void
11564 rs6000_move_block_from_reg (int regno, rtx x, int nregs)
11565 {
11566 int i;
11567 machine_mode reg_mode = TARGET_32BIT ? SImode : DImode;
11568
11569 if (nregs == 0)
11570 return;
11571
11572 for (i = 0; i < nregs; i++)
11573 {
11574 rtx tem = adjust_address_nv (x, reg_mode, i * GET_MODE_SIZE (reg_mode));
11575 if (reload_completed)
11576 {
11577 if (! strict_memory_address_p (reg_mode, XEXP (tem, 0)))
11578 tem = NULL_RTX;
11579 else
11580 tem = simplify_gen_subreg (reg_mode, x, BLKmode,
11581 i * GET_MODE_SIZE (reg_mode));
11582 }
11583 else
11584 tem = replace_equiv_address (tem, XEXP (tem, 0));
11585
11586 gcc_assert (tem);
11587
11588 emit_move_insn (tem, gen_rtx_REG (reg_mode, regno + i));
11589 }
11590 }
11591 \f
11592 /* Perform any needed actions needed for a function that is receiving a
11593 variable number of arguments.
11594
11595 CUM is as above.
11596
11597 MODE and TYPE are the mode and type of the current parameter.
11598
11599 PRETEND_SIZE is a variable that should be set to the amount of stack
11600 that must be pushed by the prolog to pretend that our caller pushed
11601 it.
11602
11603 Normally, this macro will push all remaining incoming registers on the
11604 stack and set PRETEND_SIZE to the length of the registers pushed. */
11605
11606 static void
11607 setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
11608 tree type, int *pretend_size ATTRIBUTE_UNUSED,
11609 int no_rtl)
11610 {
11611 CUMULATIVE_ARGS next_cum;
11612 int reg_size = TARGET_32BIT ? 4 : 8;
11613 rtx save_area = NULL_RTX, mem;
11614 int first_reg_offset;
11615 alias_set_type set;
11616
11617 /* Skip the last named argument. */
11618 next_cum = *get_cumulative_args (cum);
11619 rs6000_function_arg_advance_1 (&next_cum, mode, type, true, 0);
11620
11621 if (DEFAULT_ABI == ABI_V4)
11622 {
11623 first_reg_offset = next_cum.sysv_gregno - GP_ARG_MIN_REG;
11624
11625 if (! no_rtl)
11626 {
11627 int gpr_reg_num = 0, gpr_size = 0, fpr_size = 0;
11628 HOST_WIDE_INT offset = 0;
11629
11630 /* Try to optimize the size of the varargs save area.
11631 The ABI requires that ap.reg_save_area is doubleword
11632 aligned, but we don't need to allocate space for all
11633 the bytes, only those to which we actually will save
11634 anything. */
11635 if (cfun->va_list_gpr_size && first_reg_offset < GP_ARG_NUM_REG)
11636 gpr_reg_num = GP_ARG_NUM_REG - first_reg_offset;
11637 if (TARGET_HARD_FLOAT && TARGET_FPRS
11638 && next_cum.fregno <= FP_ARG_V4_MAX_REG
11639 && cfun->va_list_fpr_size)
11640 {
11641 if (gpr_reg_num)
11642 fpr_size = (next_cum.fregno - FP_ARG_MIN_REG)
11643 * UNITS_PER_FP_WORD;
11644 if (cfun->va_list_fpr_size
11645 < FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
11646 fpr_size += cfun->va_list_fpr_size * UNITS_PER_FP_WORD;
11647 else
11648 fpr_size += (FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
11649 * UNITS_PER_FP_WORD;
11650 }
11651 if (gpr_reg_num)
11652 {
11653 offset = -((first_reg_offset * reg_size) & ~7);
11654 if (!fpr_size && gpr_reg_num > cfun->va_list_gpr_size)
11655 {
11656 gpr_reg_num = cfun->va_list_gpr_size;
11657 if (reg_size == 4 && (first_reg_offset & 1))
11658 gpr_reg_num++;
11659 }
11660 gpr_size = (gpr_reg_num * reg_size + 7) & ~7;
11661 }
11662 else if (fpr_size)
11663 offset = - (int) (next_cum.fregno - FP_ARG_MIN_REG)
11664 * UNITS_PER_FP_WORD
11665 - (int) (GP_ARG_NUM_REG * reg_size);
11666
11667 if (gpr_size + fpr_size)
11668 {
11669 rtx reg_save_area
11670 = assign_stack_local (BLKmode, gpr_size + fpr_size, 64);
11671 gcc_assert (GET_CODE (reg_save_area) == MEM);
11672 reg_save_area = XEXP (reg_save_area, 0);
11673 if (GET_CODE (reg_save_area) == PLUS)
11674 {
11675 gcc_assert (XEXP (reg_save_area, 0)
11676 == virtual_stack_vars_rtx);
11677 gcc_assert (GET_CODE (XEXP (reg_save_area, 1)) == CONST_INT);
11678 offset += INTVAL (XEXP (reg_save_area, 1));
11679 }
11680 else
11681 gcc_assert (reg_save_area == virtual_stack_vars_rtx);
11682 }
11683
11684 cfun->machine->varargs_save_offset = offset;
11685 save_area = plus_constant (Pmode, virtual_stack_vars_rtx, offset);
11686 }
11687 }
11688 else
11689 {
11690 first_reg_offset = next_cum.words;
11691 save_area = crtl->args.internal_arg_pointer;
11692
11693 if (targetm.calls.must_pass_in_stack (mode, type))
11694 first_reg_offset += rs6000_arg_size (TYPE_MODE (type), type);
11695 }
11696
11697 set = get_varargs_alias_set ();
11698 if (! no_rtl && first_reg_offset < GP_ARG_NUM_REG
11699 && cfun->va_list_gpr_size)
11700 {
11701 int n_gpr, nregs = GP_ARG_NUM_REG - first_reg_offset;
11702
11703 if (va_list_gpr_counter_field)
11704 /* V4 va_list_gpr_size counts number of registers needed. */
11705 n_gpr = cfun->va_list_gpr_size;
11706 else
11707 /* char * va_list instead counts number of bytes needed. */
11708 n_gpr = (cfun->va_list_gpr_size + reg_size - 1) / reg_size;
11709
11710 if (nregs > n_gpr)
11711 nregs = n_gpr;
11712
11713 mem = gen_rtx_MEM (BLKmode,
11714 plus_constant (Pmode, save_area,
11715 first_reg_offset * reg_size));
11716 MEM_NOTRAP_P (mem) = 1;
11717 set_mem_alias_set (mem, set);
11718 set_mem_align (mem, BITS_PER_WORD);
11719
11720 rs6000_move_block_from_reg (GP_ARG_MIN_REG + first_reg_offset, mem,
11721 nregs);
11722 }
11723
11724 /* Save FP registers if needed. */
11725 if (DEFAULT_ABI == ABI_V4
11726 && TARGET_HARD_FLOAT && TARGET_FPRS
11727 && ! no_rtl
11728 && next_cum.fregno <= FP_ARG_V4_MAX_REG
11729 && cfun->va_list_fpr_size)
11730 {
11731 int fregno = next_cum.fregno, nregs;
11732 rtx cr1 = gen_rtx_REG (CCmode, CR1_REGNO);
11733 rtx lab = gen_label_rtx ();
11734 int off = (GP_ARG_NUM_REG * reg_size) + ((fregno - FP_ARG_MIN_REG)
11735 * UNITS_PER_FP_WORD);
11736
11737 emit_jump_insn
11738 (gen_rtx_SET (pc_rtx,
11739 gen_rtx_IF_THEN_ELSE (VOIDmode,
11740 gen_rtx_NE (VOIDmode, cr1,
11741 const0_rtx),
11742 gen_rtx_LABEL_REF (VOIDmode, lab),
11743 pc_rtx)));
11744
11745 for (nregs = 0;
11746 fregno <= FP_ARG_V4_MAX_REG && nregs < cfun->va_list_fpr_size;
11747 fregno++, off += UNITS_PER_FP_WORD, nregs++)
11748 {
11749 mem = gen_rtx_MEM ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
11750 ? DFmode : SFmode,
11751 plus_constant (Pmode, save_area, off));
11752 MEM_NOTRAP_P (mem) = 1;
11753 set_mem_alias_set (mem, set);
11754 set_mem_align (mem, GET_MODE_ALIGNMENT (
11755 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
11756 ? DFmode : SFmode));
11757 emit_move_insn (mem, gen_rtx_REG (
11758 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
11759 ? DFmode : SFmode, fregno));
11760 }
11761
11762 emit_label (lab);
11763 }
11764 }
11765
11766 /* Create the va_list data type. */
11767
11768 static tree
11769 rs6000_build_builtin_va_list (void)
11770 {
11771 tree f_gpr, f_fpr, f_res, f_ovf, f_sav, record, type_decl;
11772
11773 /* For AIX, prefer 'char *' because that's what the system
11774 header files like. */
11775 if (DEFAULT_ABI != ABI_V4)
11776 return build_pointer_type (char_type_node);
11777
11778 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
11779 type_decl = build_decl (BUILTINS_LOCATION, TYPE_DECL,
11780 get_identifier ("__va_list_tag"), record);
11781
11782 f_gpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("gpr"),
11783 unsigned_char_type_node);
11784 f_fpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("fpr"),
11785 unsigned_char_type_node);
11786 /* Give the two bytes of padding a name, so that -Wpadded won't warn on
11787 every user file. */
11788 f_res = build_decl (BUILTINS_LOCATION, FIELD_DECL,
11789 get_identifier ("reserved"), short_unsigned_type_node);
11790 f_ovf = build_decl (BUILTINS_LOCATION, FIELD_DECL,
11791 get_identifier ("overflow_arg_area"),
11792 ptr_type_node);
11793 f_sav = build_decl (BUILTINS_LOCATION, FIELD_DECL,
11794 get_identifier ("reg_save_area"),
11795 ptr_type_node);
11796
11797 va_list_gpr_counter_field = f_gpr;
11798 va_list_fpr_counter_field = f_fpr;
11799
11800 DECL_FIELD_CONTEXT (f_gpr) = record;
11801 DECL_FIELD_CONTEXT (f_fpr) = record;
11802 DECL_FIELD_CONTEXT (f_res) = record;
11803 DECL_FIELD_CONTEXT (f_ovf) = record;
11804 DECL_FIELD_CONTEXT (f_sav) = record;
11805
11806 TYPE_STUB_DECL (record) = type_decl;
11807 TYPE_NAME (record) = type_decl;
11808 TYPE_FIELDS (record) = f_gpr;
11809 DECL_CHAIN (f_gpr) = f_fpr;
11810 DECL_CHAIN (f_fpr) = f_res;
11811 DECL_CHAIN (f_res) = f_ovf;
11812 DECL_CHAIN (f_ovf) = f_sav;
11813
11814 layout_type (record);
11815
11816 /* The correct type is an array type of one element. */
11817 return build_array_type (record, build_index_type (size_zero_node));
11818 }
11819
11820 /* Implement va_start. */
11821
11822 static void
11823 rs6000_va_start (tree valist, rtx nextarg)
11824 {
11825 HOST_WIDE_INT words, n_gpr, n_fpr;
11826 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
11827 tree gpr, fpr, ovf, sav, t;
11828
11829 /* Only SVR4 needs something special. */
11830 if (DEFAULT_ABI != ABI_V4)
11831 {
11832 std_expand_builtin_va_start (valist, nextarg);
11833 return;
11834 }
11835
11836 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
11837 f_fpr = DECL_CHAIN (f_gpr);
11838 f_res = DECL_CHAIN (f_fpr);
11839 f_ovf = DECL_CHAIN (f_res);
11840 f_sav = DECL_CHAIN (f_ovf);
11841
11842 valist = build_simple_mem_ref (valist);
11843 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
11844 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
11845 f_fpr, NULL_TREE);
11846 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
11847 f_ovf, NULL_TREE);
11848 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
11849 f_sav, NULL_TREE);
11850
11851 /* Count number of gp and fp argument registers used. */
11852 words = crtl->args.info.words;
11853 n_gpr = MIN (crtl->args.info.sysv_gregno - GP_ARG_MIN_REG,
11854 GP_ARG_NUM_REG);
11855 n_fpr = MIN (crtl->args.info.fregno - FP_ARG_MIN_REG,
11856 FP_ARG_NUM_REG);
11857
11858 if (TARGET_DEBUG_ARG)
11859 fprintf (stderr, "va_start: words = " HOST_WIDE_INT_PRINT_DEC", n_gpr = "
11860 HOST_WIDE_INT_PRINT_DEC", n_fpr = " HOST_WIDE_INT_PRINT_DEC"\n",
11861 words, n_gpr, n_fpr);
11862
11863 if (cfun->va_list_gpr_size)
11864 {
11865 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
11866 build_int_cst (NULL_TREE, n_gpr));
11867 TREE_SIDE_EFFECTS (t) = 1;
11868 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11869 }
11870
11871 if (cfun->va_list_fpr_size)
11872 {
11873 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
11874 build_int_cst (NULL_TREE, n_fpr));
11875 TREE_SIDE_EFFECTS (t) = 1;
11876 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11877
11878 #ifdef HAVE_AS_GNU_ATTRIBUTE
11879 if (call_ABI_of_interest (cfun->decl))
11880 rs6000_passes_float = true;
11881 #endif
11882 }
11883
11884 /* Find the overflow area. */
11885 t = make_tree (TREE_TYPE (ovf), crtl->args.internal_arg_pointer);
11886 if (words != 0)
11887 t = fold_build_pointer_plus_hwi (t, words * MIN_UNITS_PER_WORD);
11888 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
11889 TREE_SIDE_EFFECTS (t) = 1;
11890 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11891
11892 /* If there were no va_arg invocations, don't set up the register
11893 save area. */
11894 if (!cfun->va_list_gpr_size
11895 && !cfun->va_list_fpr_size
11896 && n_gpr < GP_ARG_NUM_REG
11897 && n_fpr < FP_ARG_V4_MAX_REG)
11898 return;
11899
11900 /* Find the register save area. */
11901 t = make_tree (TREE_TYPE (sav), virtual_stack_vars_rtx);
11902 if (cfun->machine->varargs_save_offset)
11903 t = fold_build_pointer_plus_hwi (t, cfun->machine->varargs_save_offset);
11904 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
11905 TREE_SIDE_EFFECTS (t) = 1;
11906 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11907 }
11908
11909 /* Implement va_arg. */
11910
11911 static tree
11912 rs6000_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
11913 gimple_seq *post_p)
11914 {
11915 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
11916 tree gpr, fpr, ovf, sav, reg, t, u;
11917 int size, rsize, n_reg, sav_ofs, sav_scale;
11918 tree lab_false, lab_over, addr;
11919 int align;
11920 tree ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
11921 int regalign = 0;
11922 gimple *stmt;
11923
11924 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
11925 {
11926 t = rs6000_gimplify_va_arg (valist, ptrtype, pre_p, post_p);
11927 return build_va_arg_indirect_ref (t);
11928 }
11929
11930 /* We need to deal with the fact that the darwin ppc64 ABI is defined by an
11931 earlier version of gcc, with the property that it always applied alignment
11932 adjustments to the va-args (even for zero-sized types). The cheapest way
11933 to deal with this is to replicate the effect of the part of
11934 std_gimplify_va_arg_expr that carries out the align adjust, for the case
11935 of relevance.
11936 We don't need to check for pass-by-reference because of the test above.
11937 We can return a simplifed answer, since we know there's no offset to add. */
11938
11939 if (((TARGET_MACHO
11940 && rs6000_darwin64_abi)
11941 || DEFAULT_ABI == ABI_ELFv2
11942 || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
11943 && integer_zerop (TYPE_SIZE (type)))
11944 {
11945 unsigned HOST_WIDE_INT align, boundary;
11946 tree valist_tmp = get_initialized_tmp_var (valist, pre_p, NULL);
11947 align = PARM_BOUNDARY / BITS_PER_UNIT;
11948 boundary = rs6000_function_arg_boundary (TYPE_MODE (type), type);
11949 if (boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
11950 boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
11951 boundary /= BITS_PER_UNIT;
11952 if (boundary > align)
11953 {
11954 tree t ;
11955 /* This updates arg ptr by the amount that would be necessary
11956 to align the zero-sized (but not zero-alignment) item. */
11957 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
11958 fold_build_pointer_plus_hwi (valist_tmp, boundary - 1));
11959 gimplify_and_add (t, pre_p);
11960
11961 t = fold_convert (sizetype, valist_tmp);
11962 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
11963 fold_convert (TREE_TYPE (valist),
11964 fold_build2 (BIT_AND_EXPR, sizetype, t,
11965 size_int (-boundary))));
11966 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
11967 gimplify_and_add (t, pre_p);
11968 }
11969 /* Since it is zero-sized there's no increment for the item itself. */
11970 valist_tmp = fold_convert (build_pointer_type (type), valist_tmp);
11971 return build_va_arg_indirect_ref (valist_tmp);
11972 }
11973
11974 if (DEFAULT_ABI != ABI_V4)
11975 {
11976 if (targetm.calls.split_complex_arg && TREE_CODE (type) == COMPLEX_TYPE)
11977 {
11978 tree elem_type = TREE_TYPE (type);
11979 machine_mode elem_mode = TYPE_MODE (elem_type);
11980 int elem_size = GET_MODE_SIZE (elem_mode);
11981
11982 if (elem_size < UNITS_PER_WORD)
11983 {
11984 tree real_part, imag_part;
11985 gimple_seq post = NULL;
11986
11987 real_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
11988 &post);
11989 /* Copy the value into a temporary, lest the formal temporary
11990 be reused out from under us. */
11991 real_part = get_initialized_tmp_var (real_part, pre_p, &post);
11992 gimple_seq_add_seq (pre_p, post);
11993
11994 imag_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
11995 post_p);
11996
11997 return build2 (COMPLEX_EXPR, type, real_part, imag_part);
11998 }
11999 }
12000
12001 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
12002 }
12003
12004 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12005 f_fpr = DECL_CHAIN (f_gpr);
12006 f_res = DECL_CHAIN (f_fpr);
12007 f_ovf = DECL_CHAIN (f_res);
12008 f_sav = DECL_CHAIN (f_ovf);
12009
12010 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12011 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
12012 f_fpr, NULL_TREE);
12013 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
12014 f_ovf, NULL_TREE);
12015 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
12016 f_sav, NULL_TREE);
12017
12018 size = int_size_in_bytes (type);
12019 rsize = (size + 3) / 4;
12020 align = 1;
12021
12022 if (TARGET_HARD_FLOAT && TARGET_FPRS
12023 && ((TARGET_SINGLE_FLOAT && TYPE_MODE (type) == SFmode)
12024 || (TARGET_DOUBLE_FLOAT
12025 && (TYPE_MODE (type) == DFmode
12026 || FLOAT128_2REG_P (TYPE_MODE (type))
12027 || DECIMAL_FLOAT_MODE_P (TYPE_MODE (type))))))
12028 {
12029 /* FP args go in FP registers, if present. */
12030 reg = fpr;
12031 n_reg = (size + 7) / 8;
12032 sav_ofs = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4) * 4;
12033 sav_scale = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4);
12034 if (TYPE_MODE (type) != SFmode && TYPE_MODE (type) != SDmode)
12035 align = 8;
12036 }
12037 else
12038 {
12039 /* Otherwise into GP registers. */
12040 reg = gpr;
12041 n_reg = rsize;
12042 sav_ofs = 0;
12043 sav_scale = 4;
12044 if (n_reg == 2)
12045 align = 8;
12046 }
12047
12048 /* Pull the value out of the saved registers.... */
12049
12050 lab_over = NULL;
12051 addr = create_tmp_var (ptr_type_node, "addr");
12052
12053 /* AltiVec vectors never go in registers when -mabi=altivec. */
12054 if (TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
12055 align = 16;
12056 else
12057 {
12058 lab_false = create_artificial_label (input_location);
12059 lab_over = create_artificial_label (input_location);
12060
12061 /* Long long and SPE vectors are aligned in the registers.
12062 As are any other 2 gpr item such as complex int due to a
12063 historical mistake. */
12064 u = reg;
12065 if (n_reg == 2 && reg == gpr)
12066 {
12067 regalign = 1;
12068 u = build2 (BIT_AND_EXPR, TREE_TYPE (reg), unshare_expr (reg),
12069 build_int_cst (TREE_TYPE (reg), n_reg - 1));
12070 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg),
12071 unshare_expr (reg), u);
12072 }
12073 /* _Decimal128 is passed in even/odd fpr pairs; the stored
12074 reg number is 0 for f1, so we want to make it odd. */
12075 else if (reg == fpr && TYPE_MODE (type) == TDmode)
12076 {
12077 t = build2 (BIT_IOR_EXPR, TREE_TYPE (reg), unshare_expr (reg),
12078 build_int_cst (TREE_TYPE (reg), 1));
12079 u = build2 (MODIFY_EXPR, void_type_node, unshare_expr (reg), t);
12080 }
12081
12082 t = fold_convert (TREE_TYPE (reg), size_int (8 - n_reg + 1));
12083 t = build2 (GE_EXPR, boolean_type_node, u, t);
12084 u = build1 (GOTO_EXPR, void_type_node, lab_false);
12085 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
12086 gimplify_and_add (t, pre_p);
12087
12088 t = sav;
12089 if (sav_ofs)
12090 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
12091
12092 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg), unshare_expr (reg),
12093 build_int_cst (TREE_TYPE (reg), n_reg));
12094 u = fold_convert (sizetype, u);
12095 u = build2 (MULT_EXPR, sizetype, u, size_int (sav_scale));
12096 t = fold_build_pointer_plus (t, u);
12097
12098 /* _Decimal32 varargs are located in the second word of the 64-bit
12099 FP register for 32-bit binaries. */
12100 if (TARGET_32BIT
12101 && TARGET_HARD_FLOAT && TARGET_FPRS
12102 && TYPE_MODE (type) == SDmode)
12103 t = fold_build_pointer_plus_hwi (t, size);
12104
12105 gimplify_assign (addr, t, pre_p);
12106
12107 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
12108
12109 stmt = gimple_build_label (lab_false);
12110 gimple_seq_add_stmt (pre_p, stmt);
12111
12112 if ((n_reg == 2 && !regalign) || n_reg > 2)
12113 {
12114 /* Ensure that we don't find any more args in regs.
12115 Alignment has taken care of for special cases. */
12116 gimplify_assign (reg, build_int_cst (TREE_TYPE (reg), 8), pre_p);
12117 }
12118 }
12119
12120 /* ... otherwise out of the overflow area. */
12121
12122 /* Care for on-stack alignment if needed. */
12123 t = ovf;
12124 if (align != 1)
12125 {
12126 t = fold_build_pointer_plus_hwi (t, align - 1);
12127 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
12128 build_int_cst (TREE_TYPE (t), -align));
12129 }
12130 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
12131
12132 gimplify_assign (unshare_expr (addr), t, pre_p);
12133
12134 t = fold_build_pointer_plus_hwi (t, size);
12135 gimplify_assign (unshare_expr (ovf), t, pre_p);
12136
12137 if (lab_over)
12138 {
12139 stmt = gimple_build_label (lab_over);
12140 gimple_seq_add_stmt (pre_p, stmt);
12141 }
12142
12143 if (STRICT_ALIGNMENT
12144 && (TYPE_ALIGN (type)
12145 > (unsigned) BITS_PER_UNIT * (align < 4 ? 4 : align)))
12146 {
12147 /* The value (of type complex double, for example) may not be
12148 aligned in memory in the saved registers, so copy via a
12149 temporary. (This is the same code as used for SPARC.) */
12150 tree tmp = create_tmp_var (type, "va_arg_tmp");
12151 tree dest_addr = build_fold_addr_expr (tmp);
12152
12153 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
12154 3, dest_addr, addr, size_int (rsize * 4));
12155
12156 gimplify_and_add (copy, pre_p);
12157 addr = dest_addr;
12158 }
12159
12160 addr = fold_convert (ptrtype, addr);
12161 return build_va_arg_indirect_ref (addr);
12162 }
12163
12164 /* Builtins. */
12165
12166 static void
12167 def_builtin (const char *name, tree type, enum rs6000_builtins code)
12168 {
12169 tree t;
12170 unsigned classify = rs6000_builtin_info[(int)code].attr;
12171 const char *attr_string = "";
12172
12173 gcc_assert (name != NULL);
12174 gcc_assert (IN_RANGE ((int)code, 0, (int)RS6000_BUILTIN_COUNT));
12175
12176 if (rs6000_builtin_decls[(int)code])
12177 fatal_error (input_location,
12178 "internal error: builtin function %s already processed", name);
12179
12180 rs6000_builtin_decls[(int)code] = t =
12181 add_builtin_function (name, type, (int)code, BUILT_IN_MD, NULL, NULL_TREE);
12182
12183 /* Set any special attributes. */
12184 if ((classify & RS6000_BTC_CONST) != 0)
12185 {
12186 /* const function, function only depends on the inputs. */
12187 TREE_READONLY (t) = 1;
12188 TREE_NOTHROW (t) = 1;
12189 attr_string = ", pure";
12190 }
12191 else if ((classify & RS6000_BTC_PURE) != 0)
12192 {
12193 /* pure function, function can read global memory, but does not set any
12194 external state. */
12195 DECL_PURE_P (t) = 1;
12196 TREE_NOTHROW (t) = 1;
12197 attr_string = ", const";
12198 }
12199 else if ((classify & RS6000_BTC_FP) != 0)
12200 {
12201 /* Function is a math function. If rounding mode is on, then treat the
12202 function as not reading global memory, but it can have arbitrary side
12203 effects. If it is off, then assume the function is a const function.
12204 This mimics the ATTR_MATHFN_FPROUNDING attribute in
12205 builtin-attribute.def that is used for the math functions. */
12206 TREE_NOTHROW (t) = 1;
12207 if (flag_rounding_math)
12208 {
12209 DECL_PURE_P (t) = 1;
12210 DECL_IS_NOVOPS (t) = 1;
12211 attr_string = ", fp, pure";
12212 }
12213 else
12214 {
12215 TREE_READONLY (t) = 1;
12216 attr_string = ", fp, const";
12217 }
12218 }
12219 else if ((classify & RS6000_BTC_ATTR_MASK) != 0)
12220 gcc_unreachable ();
12221
12222 if (TARGET_DEBUG_BUILTIN)
12223 fprintf (stderr, "rs6000_builtin, code = %4d, %s%s\n",
12224 (int)code, name, attr_string);
12225 }
12226
12227 /* Simple ternary operations: VECd = foo (VECa, VECb, VECc). */
12228
12229 #undef RS6000_BUILTIN_1
12230 #undef RS6000_BUILTIN_2
12231 #undef RS6000_BUILTIN_3
12232 #undef RS6000_BUILTIN_A
12233 #undef RS6000_BUILTIN_D
12234 #undef RS6000_BUILTIN_E
12235 #undef RS6000_BUILTIN_H
12236 #undef RS6000_BUILTIN_P
12237 #undef RS6000_BUILTIN_Q
12238 #undef RS6000_BUILTIN_S
12239 #undef RS6000_BUILTIN_X
12240
12241 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12242 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12243 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
12244 { MASK, ICODE, NAME, ENUM },
12245
12246 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12247 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12248 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
12249 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12250 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12251 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
12252 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
12253 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12254
12255 static const struct builtin_description bdesc_3arg[] =
12256 {
12257 #include "rs6000-builtin.def"
12258 };
12259
12260 /* DST operations: void foo (void *, const int, const char). */
12261
12262 #undef RS6000_BUILTIN_1
12263 #undef RS6000_BUILTIN_2
12264 #undef RS6000_BUILTIN_3
12265 #undef RS6000_BUILTIN_A
12266 #undef RS6000_BUILTIN_D
12267 #undef RS6000_BUILTIN_E
12268 #undef RS6000_BUILTIN_H
12269 #undef RS6000_BUILTIN_P
12270 #undef RS6000_BUILTIN_Q
12271 #undef RS6000_BUILTIN_S
12272 #undef RS6000_BUILTIN_X
12273
12274 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12275 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12276 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12277 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12278 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
12279 { MASK, ICODE, NAME, ENUM },
12280
12281 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
12282 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12283 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12284 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
12285 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
12286 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12287
12288 static const struct builtin_description bdesc_dst[] =
12289 {
12290 #include "rs6000-builtin.def"
12291 };
12292
12293 /* Simple binary operations: VECc = foo (VECa, VECb). */
12294
12295 #undef RS6000_BUILTIN_1
12296 #undef RS6000_BUILTIN_2
12297 #undef RS6000_BUILTIN_3
12298 #undef RS6000_BUILTIN_A
12299 #undef RS6000_BUILTIN_D
12300 #undef RS6000_BUILTIN_E
12301 #undef RS6000_BUILTIN_H
12302 #undef RS6000_BUILTIN_P
12303 #undef RS6000_BUILTIN_Q
12304 #undef RS6000_BUILTIN_S
12305 #undef RS6000_BUILTIN_X
12306
12307 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12308 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
12309 { MASK, ICODE, NAME, ENUM },
12310
12311 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12312 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12313 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12314 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
12315 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12316 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12317 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
12318 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
12319 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12320
12321 static const struct builtin_description bdesc_2arg[] =
12322 {
12323 #include "rs6000-builtin.def"
12324 };
12325
12326 #undef RS6000_BUILTIN_1
12327 #undef RS6000_BUILTIN_2
12328 #undef RS6000_BUILTIN_3
12329 #undef RS6000_BUILTIN_A
12330 #undef RS6000_BUILTIN_D
12331 #undef RS6000_BUILTIN_E
12332 #undef RS6000_BUILTIN_H
12333 #undef RS6000_BUILTIN_P
12334 #undef RS6000_BUILTIN_Q
12335 #undef RS6000_BUILTIN_S
12336 #undef RS6000_BUILTIN_X
12337
12338 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12339 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12340 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12341 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12342 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12343 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
12344 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12345 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
12346 { MASK, ICODE, NAME, ENUM },
12347
12348 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
12349 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
12350 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12351
12352 /* AltiVec predicates. */
12353
12354 static const struct builtin_description bdesc_altivec_preds[] =
12355 {
12356 #include "rs6000-builtin.def"
12357 };
12358
12359 /* SPE predicates. */
12360 #undef RS6000_BUILTIN_1
12361 #undef RS6000_BUILTIN_2
12362 #undef RS6000_BUILTIN_3
12363 #undef RS6000_BUILTIN_A
12364 #undef RS6000_BUILTIN_D
12365 #undef RS6000_BUILTIN_E
12366 #undef RS6000_BUILTIN_H
12367 #undef RS6000_BUILTIN_P
12368 #undef RS6000_BUILTIN_Q
12369 #undef RS6000_BUILTIN_S
12370 #undef RS6000_BUILTIN_X
12371
12372 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12373 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12374 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12375 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12376 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12377 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
12378 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12379 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12380 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
12381 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
12382 { MASK, ICODE, NAME, ENUM },
12383
12384 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12385
12386 static const struct builtin_description bdesc_spe_predicates[] =
12387 {
12388 #include "rs6000-builtin.def"
12389 };
12390
12391 /* SPE evsel predicates. */
12392 #undef RS6000_BUILTIN_1
12393 #undef RS6000_BUILTIN_2
12394 #undef RS6000_BUILTIN_3
12395 #undef RS6000_BUILTIN_A
12396 #undef RS6000_BUILTIN_D
12397 #undef RS6000_BUILTIN_E
12398 #undef RS6000_BUILTIN_H
12399 #undef RS6000_BUILTIN_P
12400 #undef RS6000_BUILTIN_Q
12401 #undef RS6000_BUILTIN_S
12402 #undef RS6000_BUILTIN_X
12403
12404 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12405 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12406 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12407 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12408 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12409 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
12410 { MASK, ICODE, NAME, ENUM },
12411
12412 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12413 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12414 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
12415 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
12416 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12417
12418 static const struct builtin_description bdesc_spe_evsel[] =
12419 {
12420 #include "rs6000-builtin.def"
12421 };
12422
12423 /* PAIRED predicates. */
12424 #undef RS6000_BUILTIN_1
12425 #undef RS6000_BUILTIN_2
12426 #undef RS6000_BUILTIN_3
12427 #undef RS6000_BUILTIN_A
12428 #undef RS6000_BUILTIN_D
12429 #undef RS6000_BUILTIN_E
12430 #undef RS6000_BUILTIN_H
12431 #undef RS6000_BUILTIN_P
12432 #undef RS6000_BUILTIN_Q
12433 #undef RS6000_BUILTIN_S
12434 #undef RS6000_BUILTIN_X
12435
12436 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12437 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12438 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12439 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12440 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12441 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
12442 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12443 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12444 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
12445 { MASK, ICODE, NAME, ENUM },
12446
12447 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
12448 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12449
12450 static const struct builtin_description bdesc_paired_preds[] =
12451 {
12452 #include "rs6000-builtin.def"
12453 };
12454
12455 /* ABS* operations. */
12456
12457 #undef RS6000_BUILTIN_1
12458 #undef RS6000_BUILTIN_2
12459 #undef RS6000_BUILTIN_3
12460 #undef RS6000_BUILTIN_A
12461 #undef RS6000_BUILTIN_D
12462 #undef RS6000_BUILTIN_E
12463 #undef RS6000_BUILTIN_H
12464 #undef RS6000_BUILTIN_P
12465 #undef RS6000_BUILTIN_Q
12466 #undef RS6000_BUILTIN_S
12467 #undef RS6000_BUILTIN_X
12468
12469 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12470 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12471 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12472 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
12473 { MASK, ICODE, NAME, ENUM },
12474
12475 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12476 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
12477 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12478 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12479 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
12480 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
12481 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12482
12483 static const struct builtin_description bdesc_abs[] =
12484 {
12485 #include "rs6000-builtin.def"
12486 };
12487
12488 /* Simple unary operations: VECb = foo (unsigned literal) or VECb =
12489 foo (VECa). */
12490
12491 #undef RS6000_BUILTIN_1
12492 #undef RS6000_BUILTIN_2
12493 #undef RS6000_BUILTIN_3
12494 #undef RS6000_BUILTIN_A
12495 #undef RS6000_BUILTIN_D
12496 #undef RS6000_BUILTIN_E
12497 #undef RS6000_BUILTIN_H
12498 #undef RS6000_BUILTIN_P
12499 #undef RS6000_BUILTIN_Q
12500 #undef RS6000_BUILTIN_S
12501 #undef RS6000_BUILTIN_X
12502
12503 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
12504 { MASK, ICODE, NAME, ENUM },
12505
12506 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12507 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12508 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12509 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12510 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
12511 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12512 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12513 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
12514 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
12515 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12516
12517 static const struct builtin_description bdesc_1arg[] =
12518 {
12519 #include "rs6000-builtin.def"
12520 };
12521
12522 /* HTM builtins. */
12523 #undef RS6000_BUILTIN_1
12524 #undef RS6000_BUILTIN_2
12525 #undef RS6000_BUILTIN_3
12526 #undef RS6000_BUILTIN_A
12527 #undef RS6000_BUILTIN_D
12528 #undef RS6000_BUILTIN_E
12529 #undef RS6000_BUILTIN_H
12530 #undef RS6000_BUILTIN_P
12531 #undef RS6000_BUILTIN_Q
12532 #undef RS6000_BUILTIN_S
12533 #undef RS6000_BUILTIN_X
12534
12535 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12536 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12537 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12538 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12539 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12540 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
12541 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
12542 { MASK, ICODE, NAME, ENUM },
12543
12544 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12545 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
12546 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
12547 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12548
12549 static const struct builtin_description bdesc_htm[] =
12550 {
12551 #include "rs6000-builtin.def"
12552 };
12553
12554 #undef RS6000_BUILTIN_1
12555 #undef RS6000_BUILTIN_2
12556 #undef RS6000_BUILTIN_3
12557 #undef RS6000_BUILTIN_A
12558 #undef RS6000_BUILTIN_D
12559 #undef RS6000_BUILTIN_E
12560 #undef RS6000_BUILTIN_H
12561 #undef RS6000_BUILTIN_P
12562 #undef RS6000_BUILTIN_Q
12563 #undef RS6000_BUILTIN_S
12564
12565 /* Return true if a builtin function is overloaded. */
12566 bool
12567 rs6000_overloaded_builtin_p (enum rs6000_builtins fncode)
12568 {
12569 return (rs6000_builtin_info[(int)fncode].attr & RS6000_BTC_OVERLOADED) != 0;
12570 }
12571
12572 /* Expand an expression EXP that calls a builtin without arguments. */
12573 static rtx
12574 rs6000_expand_zeroop_builtin (enum insn_code icode, rtx target)
12575 {
12576 rtx pat;
12577 machine_mode tmode = insn_data[icode].operand[0].mode;
12578
12579 if (icode == CODE_FOR_nothing)
12580 /* Builtin not supported on this processor. */
12581 return 0;
12582
12583 if (target == 0
12584 || GET_MODE (target) != tmode
12585 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12586 target = gen_reg_rtx (tmode);
12587
12588 pat = GEN_FCN (icode) (target);
12589 if (! pat)
12590 return 0;
12591 emit_insn (pat);
12592
12593 return target;
12594 }
12595
12596
12597 static rtx
12598 rs6000_expand_mtfsf_builtin (enum insn_code icode, tree exp)
12599 {
12600 rtx pat;
12601 tree arg0 = CALL_EXPR_ARG (exp, 0);
12602 tree arg1 = CALL_EXPR_ARG (exp, 1);
12603 rtx op0 = expand_normal (arg0);
12604 rtx op1 = expand_normal (arg1);
12605 machine_mode mode0 = insn_data[icode].operand[0].mode;
12606 machine_mode mode1 = insn_data[icode].operand[1].mode;
12607
12608 if (icode == CODE_FOR_nothing)
12609 /* Builtin not supported on this processor. */
12610 return 0;
12611
12612 /* If we got invalid arguments bail out before generating bad rtl. */
12613 if (arg0 == error_mark_node || arg1 == error_mark_node)
12614 return const0_rtx;
12615
12616 if (GET_CODE (op0) != CONST_INT
12617 || INTVAL (op0) > 255
12618 || INTVAL (op0) < 0)
12619 {
12620 error ("argument 1 must be an 8-bit field value");
12621 return const0_rtx;
12622 }
12623
12624 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
12625 op0 = copy_to_mode_reg (mode0, op0);
12626
12627 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12628 op1 = copy_to_mode_reg (mode1, op1);
12629
12630 pat = GEN_FCN (icode) (op0, op1);
12631 if (! pat)
12632 return const0_rtx;
12633 emit_insn (pat);
12634
12635 return NULL_RTX;
12636 }
12637
12638
12639 static rtx
12640 rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target)
12641 {
12642 rtx pat;
12643 tree arg0 = CALL_EXPR_ARG (exp, 0);
12644 rtx op0 = expand_normal (arg0);
12645 machine_mode tmode = insn_data[icode].operand[0].mode;
12646 machine_mode mode0 = insn_data[icode].operand[1].mode;
12647
12648 if (icode == CODE_FOR_nothing)
12649 /* Builtin not supported on this processor. */
12650 return 0;
12651
12652 /* If we got invalid arguments bail out before generating bad rtl. */
12653 if (arg0 == error_mark_node)
12654 return const0_rtx;
12655
12656 if (icode == CODE_FOR_altivec_vspltisb
12657 || icode == CODE_FOR_altivec_vspltish
12658 || icode == CODE_FOR_altivec_vspltisw
12659 || icode == CODE_FOR_spe_evsplatfi
12660 || icode == CODE_FOR_spe_evsplati)
12661 {
12662 /* Only allow 5-bit *signed* literals. */
12663 if (GET_CODE (op0) != CONST_INT
12664 || INTVAL (op0) > 15
12665 || INTVAL (op0) < -16)
12666 {
12667 error ("argument 1 must be a 5-bit signed literal");
12668 return const0_rtx;
12669 }
12670 }
12671
12672 if (target == 0
12673 || GET_MODE (target) != tmode
12674 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12675 target = gen_reg_rtx (tmode);
12676
12677 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12678 op0 = copy_to_mode_reg (mode0, op0);
12679
12680 pat = GEN_FCN (icode) (target, op0);
12681 if (! pat)
12682 return 0;
12683 emit_insn (pat);
12684
12685 return target;
12686 }
12687
12688 static rtx
12689 altivec_expand_abs_builtin (enum insn_code icode, tree exp, rtx target)
12690 {
12691 rtx pat, scratch1, scratch2;
12692 tree arg0 = CALL_EXPR_ARG (exp, 0);
12693 rtx op0 = expand_normal (arg0);
12694 machine_mode tmode = insn_data[icode].operand[0].mode;
12695 machine_mode mode0 = insn_data[icode].operand[1].mode;
12696
12697 /* If we have invalid arguments, bail out before generating bad rtl. */
12698 if (arg0 == error_mark_node)
12699 return const0_rtx;
12700
12701 if (target == 0
12702 || GET_MODE (target) != tmode
12703 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12704 target = gen_reg_rtx (tmode);
12705
12706 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12707 op0 = copy_to_mode_reg (mode0, op0);
12708
12709 scratch1 = gen_reg_rtx (mode0);
12710 scratch2 = gen_reg_rtx (mode0);
12711
12712 pat = GEN_FCN (icode) (target, op0, scratch1, scratch2);
12713 if (! pat)
12714 return 0;
12715 emit_insn (pat);
12716
12717 return target;
12718 }
12719
12720 static rtx
12721 rs6000_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
12722 {
12723 rtx pat;
12724 tree arg0 = CALL_EXPR_ARG (exp, 0);
12725 tree arg1 = CALL_EXPR_ARG (exp, 1);
12726 rtx op0 = expand_normal (arg0);
12727 rtx op1 = expand_normal (arg1);
12728 machine_mode tmode = insn_data[icode].operand[0].mode;
12729 machine_mode mode0 = insn_data[icode].operand[1].mode;
12730 machine_mode mode1 = insn_data[icode].operand[2].mode;
12731
12732 if (icode == CODE_FOR_nothing)
12733 /* Builtin not supported on this processor. */
12734 return 0;
12735
12736 /* If we got invalid arguments bail out before generating bad rtl. */
12737 if (arg0 == error_mark_node || arg1 == error_mark_node)
12738 return const0_rtx;
12739
12740 if (icode == CODE_FOR_altivec_vcfux
12741 || icode == CODE_FOR_altivec_vcfsx
12742 || icode == CODE_FOR_altivec_vctsxs
12743 || icode == CODE_FOR_altivec_vctuxs
12744 || icode == CODE_FOR_altivec_vspltb
12745 || icode == CODE_FOR_altivec_vsplth
12746 || icode == CODE_FOR_altivec_vspltw
12747 || icode == CODE_FOR_spe_evaddiw
12748 || icode == CODE_FOR_spe_evldd
12749 || icode == CODE_FOR_spe_evldh
12750 || icode == CODE_FOR_spe_evldw
12751 || icode == CODE_FOR_spe_evlhhesplat
12752 || icode == CODE_FOR_spe_evlhhossplat
12753 || icode == CODE_FOR_spe_evlhhousplat
12754 || icode == CODE_FOR_spe_evlwhe
12755 || icode == CODE_FOR_spe_evlwhos
12756 || icode == CODE_FOR_spe_evlwhou
12757 || icode == CODE_FOR_spe_evlwhsplat
12758 || icode == CODE_FOR_spe_evlwwsplat
12759 || icode == CODE_FOR_spe_evrlwi
12760 || icode == CODE_FOR_spe_evslwi
12761 || icode == CODE_FOR_spe_evsrwis
12762 || icode == CODE_FOR_spe_evsubifw
12763 || icode == CODE_FOR_spe_evsrwiu)
12764 {
12765 /* Only allow 5-bit unsigned literals. */
12766 STRIP_NOPS (arg1);
12767 if (TREE_CODE (arg1) != INTEGER_CST
12768 || TREE_INT_CST_LOW (arg1) & ~0x1f)
12769 {
12770 error ("argument 2 must be a 5-bit unsigned literal");
12771 return const0_rtx;
12772 }
12773 }
12774
12775 if (target == 0
12776 || GET_MODE (target) != tmode
12777 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12778 target = gen_reg_rtx (tmode);
12779
12780 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12781 op0 = copy_to_mode_reg (mode0, op0);
12782 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12783 op1 = copy_to_mode_reg (mode1, op1);
12784
12785 pat = GEN_FCN (icode) (target, op0, op1);
12786 if (! pat)
12787 return 0;
12788 emit_insn (pat);
12789
12790 return target;
12791 }
12792
12793 static rtx
12794 altivec_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
12795 {
12796 rtx pat, scratch;
12797 tree cr6_form = CALL_EXPR_ARG (exp, 0);
12798 tree arg0 = CALL_EXPR_ARG (exp, 1);
12799 tree arg1 = CALL_EXPR_ARG (exp, 2);
12800 rtx op0 = expand_normal (arg0);
12801 rtx op1 = expand_normal (arg1);
12802 machine_mode tmode = SImode;
12803 machine_mode mode0 = insn_data[icode].operand[1].mode;
12804 machine_mode mode1 = insn_data[icode].operand[2].mode;
12805 int cr6_form_int;
12806
12807 if (TREE_CODE (cr6_form) != INTEGER_CST)
12808 {
12809 error ("argument 1 of __builtin_altivec_predicate must be a constant");
12810 return const0_rtx;
12811 }
12812 else
12813 cr6_form_int = TREE_INT_CST_LOW (cr6_form);
12814
12815 gcc_assert (mode0 == mode1);
12816
12817 /* If we have invalid arguments, bail out before generating bad rtl. */
12818 if (arg0 == error_mark_node || arg1 == error_mark_node)
12819 return const0_rtx;
12820
12821 if (target == 0
12822 || GET_MODE (target) != tmode
12823 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12824 target = gen_reg_rtx (tmode);
12825
12826 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12827 op0 = copy_to_mode_reg (mode0, op0);
12828 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12829 op1 = copy_to_mode_reg (mode1, op1);
12830
12831 scratch = gen_reg_rtx (mode0);
12832
12833 pat = GEN_FCN (icode) (scratch, op0, op1);
12834 if (! pat)
12835 return 0;
12836 emit_insn (pat);
12837
12838 /* The vec_any* and vec_all* predicates use the same opcodes for two
12839 different operations, but the bits in CR6 will be different
12840 depending on what information we want. So we have to play tricks
12841 with CR6 to get the right bits out.
12842
12843 If you think this is disgusting, look at the specs for the
12844 AltiVec predicates. */
12845
12846 switch (cr6_form_int)
12847 {
12848 case 0:
12849 emit_insn (gen_cr6_test_for_zero (target));
12850 break;
12851 case 1:
12852 emit_insn (gen_cr6_test_for_zero_reverse (target));
12853 break;
12854 case 2:
12855 emit_insn (gen_cr6_test_for_lt (target));
12856 break;
12857 case 3:
12858 emit_insn (gen_cr6_test_for_lt_reverse (target));
12859 break;
12860 default:
12861 error ("argument 1 of __builtin_altivec_predicate is out of range");
12862 break;
12863 }
12864
12865 return target;
12866 }
12867
12868 static rtx
12869 paired_expand_lv_builtin (enum insn_code icode, tree exp, rtx target)
12870 {
12871 rtx pat, addr;
12872 tree arg0 = CALL_EXPR_ARG (exp, 0);
12873 tree arg1 = CALL_EXPR_ARG (exp, 1);
12874 machine_mode tmode = insn_data[icode].operand[0].mode;
12875 machine_mode mode0 = Pmode;
12876 machine_mode mode1 = Pmode;
12877 rtx op0 = expand_normal (arg0);
12878 rtx op1 = expand_normal (arg1);
12879
12880 if (icode == CODE_FOR_nothing)
12881 /* Builtin not supported on this processor. */
12882 return 0;
12883
12884 /* If we got invalid arguments bail out before generating bad rtl. */
12885 if (arg0 == error_mark_node || arg1 == error_mark_node)
12886 return const0_rtx;
12887
12888 if (target == 0
12889 || GET_MODE (target) != tmode
12890 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12891 target = gen_reg_rtx (tmode);
12892
12893 op1 = copy_to_mode_reg (mode1, op1);
12894
12895 if (op0 == const0_rtx)
12896 {
12897 addr = gen_rtx_MEM (tmode, op1);
12898 }
12899 else
12900 {
12901 op0 = copy_to_mode_reg (mode0, op0);
12902 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op0, op1));
12903 }
12904
12905 pat = GEN_FCN (icode) (target, addr);
12906
12907 if (! pat)
12908 return 0;
12909 emit_insn (pat);
12910
12911 return target;
12912 }
12913
12914 /* Return a constant vector for use as a little-endian permute control vector
12915 to reverse the order of elements of the given vector mode. */
12916 static rtx
12917 swap_selector_for_mode (machine_mode mode)
12918 {
12919 /* These are little endian vectors, so their elements are reversed
12920 from what you would normally expect for a permute control vector. */
12921 unsigned int swap2[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8};
12922 unsigned int swap4[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12};
12923 unsigned int swap8[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14};
12924 unsigned int swap16[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
12925 unsigned int *swaparray, i;
12926 rtx perm[16];
12927
12928 switch (mode)
12929 {
12930 case V2DFmode:
12931 case V2DImode:
12932 swaparray = swap2;
12933 break;
12934 case V4SFmode:
12935 case V4SImode:
12936 swaparray = swap4;
12937 break;
12938 case V8HImode:
12939 swaparray = swap8;
12940 break;
12941 case V16QImode:
12942 swaparray = swap16;
12943 break;
12944 default:
12945 gcc_unreachable ();
12946 }
12947
12948 for (i = 0; i < 16; ++i)
12949 perm[i] = GEN_INT (swaparray[i]);
12950
12951 return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm)));
12952 }
12953
12954 /* Generate code for an "lvx", "lvxl", or "lve*x" built-in for a little endian target
12955 with -maltivec=be specified. Issue the load followed by an element-reversing
12956 permute. */
12957 void
12958 altivec_expand_lvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
12959 {
12960 rtx tmp = gen_reg_rtx (mode);
12961 rtx load = gen_rtx_SET (tmp, op1);
12962 rtx lvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
12963 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, load, lvx));
12964 rtx sel = swap_selector_for_mode (mode);
12965 rtx vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, tmp, tmp, sel), UNSPEC_VPERM);
12966
12967 gcc_assert (REG_P (op0));
12968 emit_insn (par);
12969 emit_insn (gen_rtx_SET (op0, vperm));
12970 }
12971
12972 /* Generate code for a "stvx" or "stvxl" built-in for a little endian target
12973 with -maltivec=be specified. Issue the store preceded by an element-reversing
12974 permute. */
12975 void
12976 altivec_expand_stvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
12977 {
12978 rtx tmp = gen_reg_rtx (mode);
12979 rtx store = gen_rtx_SET (op0, tmp);
12980 rtx stvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
12981 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, store, stvx));
12982 rtx sel = swap_selector_for_mode (mode);
12983 rtx vperm;
12984
12985 gcc_assert (REG_P (op1));
12986 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
12987 emit_insn (gen_rtx_SET (tmp, vperm));
12988 emit_insn (par);
12989 }
12990
12991 /* Generate code for a "stve*x" built-in for a little endian target with -maltivec=be
12992 specified. Issue the store preceded by an element-reversing permute. */
12993 void
12994 altivec_expand_stvex_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
12995 {
12996 machine_mode inner_mode = GET_MODE_INNER (mode);
12997 rtx tmp = gen_reg_rtx (mode);
12998 rtx stvx = gen_rtx_UNSPEC (inner_mode, gen_rtvec (1, tmp), unspec);
12999 rtx sel = swap_selector_for_mode (mode);
13000 rtx vperm;
13001
13002 gcc_assert (REG_P (op1));
13003 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
13004 emit_insn (gen_rtx_SET (tmp, vperm));
13005 emit_insn (gen_rtx_SET (op0, stvx));
13006 }
13007
13008 static rtx
13009 altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
13010 {
13011 rtx pat, addr;
13012 tree arg0 = CALL_EXPR_ARG (exp, 0);
13013 tree arg1 = CALL_EXPR_ARG (exp, 1);
13014 machine_mode tmode = insn_data[icode].operand[0].mode;
13015 machine_mode mode0 = Pmode;
13016 machine_mode mode1 = Pmode;
13017 rtx op0 = expand_normal (arg0);
13018 rtx op1 = expand_normal (arg1);
13019
13020 if (icode == CODE_FOR_nothing)
13021 /* Builtin not supported on this processor. */
13022 return 0;
13023
13024 /* If we got invalid arguments bail out before generating bad rtl. */
13025 if (arg0 == error_mark_node || arg1 == error_mark_node)
13026 return const0_rtx;
13027
13028 if (target == 0
13029 || GET_MODE (target) != tmode
13030 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13031 target = gen_reg_rtx (tmode);
13032
13033 op1 = copy_to_mode_reg (mode1, op1);
13034
13035 if (op0 == const0_rtx)
13036 {
13037 addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1);
13038 }
13039 else
13040 {
13041 op0 = copy_to_mode_reg (mode0, op0);
13042 addr = gen_rtx_MEM (blk ? BLKmode : tmode, gen_rtx_PLUS (Pmode, op0, op1));
13043 }
13044
13045 pat = GEN_FCN (icode) (target, addr);
13046
13047 if (! pat)
13048 return 0;
13049 emit_insn (pat);
13050
13051 return target;
13052 }
13053
13054 static rtx
13055 spe_expand_stv_builtin (enum insn_code icode, tree exp)
13056 {
13057 tree arg0 = CALL_EXPR_ARG (exp, 0);
13058 tree arg1 = CALL_EXPR_ARG (exp, 1);
13059 tree arg2 = CALL_EXPR_ARG (exp, 2);
13060 rtx op0 = expand_normal (arg0);
13061 rtx op1 = expand_normal (arg1);
13062 rtx op2 = expand_normal (arg2);
13063 rtx pat;
13064 machine_mode mode0 = insn_data[icode].operand[0].mode;
13065 machine_mode mode1 = insn_data[icode].operand[1].mode;
13066 machine_mode mode2 = insn_data[icode].operand[2].mode;
13067
13068 /* Invalid arguments. Bail before doing anything stoopid! */
13069 if (arg0 == error_mark_node
13070 || arg1 == error_mark_node
13071 || arg2 == error_mark_node)
13072 return const0_rtx;
13073
13074 if (! (*insn_data[icode].operand[2].predicate) (op0, mode2))
13075 op0 = copy_to_mode_reg (mode2, op0);
13076 if (! (*insn_data[icode].operand[0].predicate) (op1, mode0))
13077 op1 = copy_to_mode_reg (mode0, op1);
13078 if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
13079 op2 = copy_to_mode_reg (mode1, op2);
13080
13081 pat = GEN_FCN (icode) (op1, op2, op0);
13082 if (pat)
13083 emit_insn (pat);
13084 return NULL_RTX;
13085 }
13086
13087 static rtx
13088 paired_expand_stv_builtin (enum insn_code icode, tree exp)
13089 {
13090 tree arg0 = CALL_EXPR_ARG (exp, 0);
13091 tree arg1 = CALL_EXPR_ARG (exp, 1);
13092 tree arg2 = CALL_EXPR_ARG (exp, 2);
13093 rtx op0 = expand_normal (arg0);
13094 rtx op1 = expand_normal (arg1);
13095 rtx op2 = expand_normal (arg2);
13096 rtx pat, addr;
13097 machine_mode tmode = insn_data[icode].operand[0].mode;
13098 machine_mode mode1 = Pmode;
13099 machine_mode mode2 = Pmode;
13100
13101 /* Invalid arguments. Bail before doing anything stoopid! */
13102 if (arg0 == error_mark_node
13103 || arg1 == error_mark_node
13104 || arg2 == error_mark_node)
13105 return const0_rtx;
13106
13107 if (! (*insn_data[icode].operand[1].predicate) (op0, tmode))
13108 op0 = copy_to_mode_reg (tmode, op0);
13109
13110 op2 = copy_to_mode_reg (mode2, op2);
13111
13112 if (op1 == const0_rtx)
13113 {
13114 addr = gen_rtx_MEM (tmode, op2);
13115 }
13116 else
13117 {
13118 op1 = copy_to_mode_reg (mode1, op1);
13119 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op1, op2));
13120 }
13121
13122 pat = GEN_FCN (icode) (addr, op0);
13123 if (pat)
13124 emit_insn (pat);
13125 return NULL_RTX;
13126 }
13127
13128 static rtx
13129 altivec_expand_stv_builtin (enum insn_code icode, tree exp)
13130 {
13131 tree arg0 = CALL_EXPR_ARG (exp, 0);
13132 tree arg1 = CALL_EXPR_ARG (exp, 1);
13133 tree arg2 = CALL_EXPR_ARG (exp, 2);
13134 rtx op0 = expand_normal (arg0);
13135 rtx op1 = expand_normal (arg1);
13136 rtx op2 = expand_normal (arg2);
13137 rtx pat, addr;
13138 machine_mode tmode = insn_data[icode].operand[0].mode;
13139 machine_mode smode = insn_data[icode].operand[1].mode;
13140 machine_mode mode1 = Pmode;
13141 machine_mode mode2 = Pmode;
13142
13143 /* Invalid arguments. Bail before doing anything stoopid! */
13144 if (arg0 == error_mark_node
13145 || arg1 == error_mark_node
13146 || arg2 == error_mark_node)
13147 return const0_rtx;
13148
13149 if (! (*insn_data[icode].operand[1].predicate) (op0, smode))
13150 op0 = copy_to_mode_reg (smode, op0);
13151
13152 op2 = copy_to_mode_reg (mode2, op2);
13153
13154 if (op1 == const0_rtx)
13155 {
13156 addr = gen_rtx_MEM (tmode, op2);
13157 }
13158 else
13159 {
13160 op1 = copy_to_mode_reg (mode1, op1);
13161 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op1, op2));
13162 }
13163
13164 pat = GEN_FCN (icode) (addr, op0);
13165 if (pat)
13166 emit_insn (pat);
13167 return NULL_RTX;
13168 }
13169
13170 /* Return the appropriate SPR number associated with the given builtin. */
13171 static inline HOST_WIDE_INT
13172 htm_spr_num (enum rs6000_builtins code)
13173 {
13174 if (code == HTM_BUILTIN_GET_TFHAR
13175 || code == HTM_BUILTIN_SET_TFHAR)
13176 return TFHAR_SPR;
13177 else if (code == HTM_BUILTIN_GET_TFIAR
13178 || code == HTM_BUILTIN_SET_TFIAR)
13179 return TFIAR_SPR;
13180 else if (code == HTM_BUILTIN_GET_TEXASR
13181 || code == HTM_BUILTIN_SET_TEXASR)
13182 return TEXASR_SPR;
13183 gcc_assert (code == HTM_BUILTIN_GET_TEXASRU
13184 || code == HTM_BUILTIN_SET_TEXASRU);
13185 return TEXASRU_SPR;
13186 }
13187
13188 /* Return the appropriate SPR regno associated with the given builtin. */
13189 static inline HOST_WIDE_INT
13190 htm_spr_regno (enum rs6000_builtins code)
13191 {
13192 if (code == HTM_BUILTIN_GET_TFHAR
13193 || code == HTM_BUILTIN_SET_TFHAR)
13194 return TFHAR_REGNO;
13195 else if (code == HTM_BUILTIN_GET_TFIAR
13196 || code == HTM_BUILTIN_SET_TFIAR)
13197 return TFIAR_REGNO;
13198 gcc_assert (code == HTM_BUILTIN_GET_TEXASR
13199 || code == HTM_BUILTIN_SET_TEXASR
13200 || code == HTM_BUILTIN_GET_TEXASRU
13201 || code == HTM_BUILTIN_SET_TEXASRU);
13202 return TEXASR_REGNO;
13203 }
13204
13205 /* Return the correct ICODE value depending on whether we are
13206 setting or reading the HTM SPRs. */
13207 static inline enum insn_code
13208 rs6000_htm_spr_icode (bool nonvoid)
13209 {
13210 if (nonvoid)
13211 return (TARGET_POWERPC64) ? CODE_FOR_htm_mfspr_di : CODE_FOR_htm_mfspr_si;
13212 else
13213 return (TARGET_POWERPC64) ? CODE_FOR_htm_mtspr_di : CODE_FOR_htm_mtspr_si;
13214 }
13215
13216 /* Expand the HTM builtin in EXP and store the result in TARGET.
13217 Store true in *EXPANDEDP if we found a builtin to expand. */
13218 static rtx
13219 htm_expand_builtin (tree exp, rtx target, bool * expandedp)
13220 {
13221 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13222 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
13223 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
13224 const struct builtin_description *d;
13225 size_t i;
13226
13227 *expandedp = true;
13228
13229 if (!TARGET_POWERPC64
13230 && (fcode == HTM_BUILTIN_TABORTDC
13231 || fcode == HTM_BUILTIN_TABORTDCI))
13232 {
13233 size_t uns_fcode = (size_t)fcode;
13234 const char *name = rs6000_builtin_info[uns_fcode].name;
13235 error ("builtin %s is only valid in 64-bit mode", name);
13236 return const0_rtx;
13237 }
13238
13239 /* Expand the HTM builtins. */
13240 d = bdesc_htm;
13241 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
13242 if (d->code == fcode)
13243 {
13244 rtx op[MAX_HTM_OPERANDS], pat;
13245 int nopnds = 0;
13246 tree arg;
13247 call_expr_arg_iterator iter;
13248 unsigned attr = rs6000_builtin_info[fcode].attr;
13249 enum insn_code icode = d->icode;
13250 const struct insn_operand_data *insn_op;
13251 bool uses_spr = (attr & RS6000_BTC_SPR);
13252 rtx cr = NULL_RTX;
13253
13254 if (uses_spr)
13255 icode = rs6000_htm_spr_icode (nonvoid);
13256 insn_op = &insn_data[icode].operand[0];
13257
13258 if (nonvoid)
13259 {
13260 machine_mode tmode = (uses_spr) ? insn_op->mode : SImode;
13261 if (!target
13262 || GET_MODE (target) != tmode
13263 || (uses_spr && !(*insn_op->predicate) (target, tmode)))
13264 target = gen_reg_rtx (tmode);
13265 if (uses_spr)
13266 op[nopnds++] = target;
13267 }
13268
13269 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
13270 {
13271 if (arg == error_mark_node || nopnds >= MAX_HTM_OPERANDS)
13272 return const0_rtx;
13273
13274 insn_op = &insn_data[icode].operand[nopnds];
13275
13276 op[nopnds] = expand_normal (arg);
13277
13278 if (!(*insn_op->predicate) (op[nopnds], insn_op->mode))
13279 {
13280 if (!strcmp (insn_op->constraint, "n"))
13281 {
13282 int arg_num = (nonvoid) ? nopnds : nopnds + 1;
13283 if (!CONST_INT_P (op[nopnds]))
13284 error ("argument %d must be an unsigned literal", arg_num);
13285 else
13286 error ("argument %d is an unsigned literal that is "
13287 "out of range", arg_num);
13288 return const0_rtx;
13289 }
13290 op[nopnds] = copy_to_mode_reg (insn_op->mode, op[nopnds]);
13291 }
13292
13293 nopnds++;
13294 }
13295
13296 /* Handle the builtins for extended mnemonics. These accept
13297 no arguments, but map to builtins that take arguments. */
13298 switch (fcode)
13299 {
13300 case HTM_BUILTIN_TENDALL: /* Alias for: tend. 1 */
13301 case HTM_BUILTIN_TRESUME: /* Alias for: tsr. 1 */
13302 op[nopnds++] = GEN_INT (1);
13303 if (flag_checking)
13304 attr |= RS6000_BTC_UNARY;
13305 break;
13306 case HTM_BUILTIN_TSUSPEND: /* Alias for: tsr. 0 */
13307 op[nopnds++] = GEN_INT (0);
13308 if (flag_checking)
13309 attr |= RS6000_BTC_UNARY;
13310 break;
13311 default:
13312 break;
13313 }
13314
13315 /* If this builtin accesses SPRs, then pass in the appropriate
13316 SPR number and SPR regno as the last two operands. */
13317 if (uses_spr)
13318 {
13319 machine_mode mode = (TARGET_POWERPC64) ? DImode : SImode;
13320 op[nopnds++] = gen_rtx_CONST_INT (mode, htm_spr_num (fcode));
13321 op[nopnds++] = gen_rtx_REG (mode, htm_spr_regno (fcode));
13322 }
13323 /* If this builtin accesses a CR, then pass in a scratch
13324 CR as the last operand. */
13325 else if (attr & RS6000_BTC_CR)
13326 { cr = gen_reg_rtx (CCmode);
13327 op[nopnds++] = cr;
13328 }
13329
13330 if (flag_checking)
13331 {
13332 int expected_nopnds = 0;
13333 if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_UNARY)
13334 expected_nopnds = 1;
13335 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_BINARY)
13336 expected_nopnds = 2;
13337 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_TERNARY)
13338 expected_nopnds = 3;
13339 if (!(attr & RS6000_BTC_VOID))
13340 expected_nopnds += 1;
13341 if (uses_spr)
13342 expected_nopnds += 2;
13343
13344 gcc_assert (nopnds == expected_nopnds
13345 && nopnds <= MAX_HTM_OPERANDS);
13346 }
13347
13348 switch (nopnds)
13349 {
13350 case 1:
13351 pat = GEN_FCN (icode) (op[0]);
13352 break;
13353 case 2:
13354 pat = GEN_FCN (icode) (op[0], op[1]);
13355 break;
13356 case 3:
13357 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
13358 break;
13359 case 4:
13360 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
13361 break;
13362 default:
13363 gcc_unreachable ();
13364 }
13365 if (!pat)
13366 return NULL_RTX;
13367 emit_insn (pat);
13368
13369 if (attr & RS6000_BTC_CR)
13370 {
13371 if (fcode == HTM_BUILTIN_TBEGIN)
13372 {
13373 /* Emit code to set TARGET to true or false depending on
13374 whether the tbegin. instruction successfully or failed
13375 to start a transaction. We do this by placing the 1's
13376 complement of CR's EQ bit into TARGET. */
13377 rtx scratch = gen_reg_rtx (SImode);
13378 emit_insn (gen_rtx_SET (scratch,
13379 gen_rtx_EQ (SImode, cr,
13380 const0_rtx)));
13381 emit_insn (gen_rtx_SET (target,
13382 gen_rtx_XOR (SImode, scratch,
13383 GEN_INT (1))));
13384 }
13385 else
13386 {
13387 /* Emit code to copy the 4-bit condition register field
13388 CR into the least significant end of register TARGET. */
13389 rtx scratch1 = gen_reg_rtx (SImode);
13390 rtx scratch2 = gen_reg_rtx (SImode);
13391 rtx subreg = simplify_gen_subreg (CCmode, scratch1, SImode, 0);
13392 emit_insn (gen_movcc (subreg, cr));
13393 emit_insn (gen_lshrsi3 (scratch2, scratch1, GEN_INT (28)));
13394 emit_insn (gen_andsi3 (target, scratch2, GEN_INT (0xf)));
13395 }
13396 }
13397
13398 if (nonvoid)
13399 return target;
13400 return const0_rtx;
13401 }
13402
13403 *expandedp = false;
13404 return NULL_RTX;
13405 }
13406
13407 static rtx
13408 rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
13409 {
13410 rtx pat;
13411 tree arg0 = CALL_EXPR_ARG (exp, 0);
13412 tree arg1 = CALL_EXPR_ARG (exp, 1);
13413 tree arg2 = CALL_EXPR_ARG (exp, 2);
13414 rtx op0 = expand_normal (arg0);
13415 rtx op1 = expand_normal (arg1);
13416 rtx op2 = expand_normal (arg2);
13417 machine_mode tmode = insn_data[icode].operand[0].mode;
13418 machine_mode mode0 = insn_data[icode].operand[1].mode;
13419 machine_mode mode1 = insn_data[icode].operand[2].mode;
13420 machine_mode mode2 = insn_data[icode].operand[3].mode;
13421
13422 if (icode == CODE_FOR_nothing)
13423 /* Builtin not supported on this processor. */
13424 return 0;
13425
13426 /* If we got invalid arguments bail out before generating bad rtl. */
13427 if (arg0 == error_mark_node
13428 || arg1 == error_mark_node
13429 || arg2 == error_mark_node)
13430 return const0_rtx;
13431
13432 /* Check and prepare argument depending on the instruction code.
13433
13434 Note that a switch statement instead of the sequence of tests
13435 would be incorrect as many of the CODE_FOR values could be
13436 CODE_FOR_nothing and that would yield multiple alternatives
13437 with identical values. We'd never reach here at runtime in
13438 this case. */
13439 if (icode == CODE_FOR_altivec_vsldoi_v4sf
13440 || icode == CODE_FOR_altivec_vsldoi_v4si
13441 || icode == CODE_FOR_altivec_vsldoi_v8hi
13442 || icode == CODE_FOR_altivec_vsldoi_v16qi)
13443 {
13444 /* Only allow 4-bit unsigned literals. */
13445 STRIP_NOPS (arg2);
13446 if (TREE_CODE (arg2) != INTEGER_CST
13447 || TREE_INT_CST_LOW (arg2) & ~0xf)
13448 {
13449 error ("argument 3 must be a 4-bit unsigned literal");
13450 return const0_rtx;
13451 }
13452 }
13453 else if (icode == CODE_FOR_vsx_xxpermdi_v2df
13454 || icode == CODE_FOR_vsx_xxpermdi_v2di
13455 || icode == CODE_FOR_vsx_xxsldwi_v16qi
13456 || icode == CODE_FOR_vsx_xxsldwi_v8hi
13457 || icode == CODE_FOR_vsx_xxsldwi_v4si
13458 || icode == CODE_FOR_vsx_xxsldwi_v4sf
13459 || icode == CODE_FOR_vsx_xxsldwi_v2di
13460 || icode == CODE_FOR_vsx_xxsldwi_v2df)
13461 {
13462 /* Only allow 2-bit unsigned literals. */
13463 STRIP_NOPS (arg2);
13464 if (TREE_CODE (arg2) != INTEGER_CST
13465 || TREE_INT_CST_LOW (arg2) & ~0x3)
13466 {
13467 error ("argument 3 must be a 2-bit unsigned literal");
13468 return const0_rtx;
13469 }
13470 }
13471 else if (icode == CODE_FOR_vsx_set_v2df
13472 || icode == CODE_FOR_vsx_set_v2di
13473 || icode == CODE_FOR_bcdadd
13474 || icode == CODE_FOR_bcdadd_lt
13475 || icode == CODE_FOR_bcdadd_eq
13476 || icode == CODE_FOR_bcdadd_gt
13477 || icode == CODE_FOR_bcdsub
13478 || icode == CODE_FOR_bcdsub_lt
13479 || icode == CODE_FOR_bcdsub_eq
13480 || icode == CODE_FOR_bcdsub_gt)
13481 {
13482 /* Only allow 1-bit unsigned literals. */
13483 STRIP_NOPS (arg2);
13484 if (TREE_CODE (arg2) != INTEGER_CST
13485 || TREE_INT_CST_LOW (arg2) & ~0x1)
13486 {
13487 error ("argument 3 must be a 1-bit unsigned literal");
13488 return const0_rtx;
13489 }
13490 }
13491 else if (icode == CODE_FOR_dfp_ddedpd_dd
13492 || icode == CODE_FOR_dfp_ddedpd_td)
13493 {
13494 /* Only allow 2-bit unsigned literals where the value is 0 or 2. */
13495 STRIP_NOPS (arg0);
13496 if (TREE_CODE (arg0) != INTEGER_CST
13497 || TREE_INT_CST_LOW (arg2) & ~0x3)
13498 {
13499 error ("argument 1 must be 0 or 2");
13500 return const0_rtx;
13501 }
13502 }
13503 else if (icode == CODE_FOR_dfp_denbcd_dd
13504 || icode == CODE_FOR_dfp_denbcd_td)
13505 {
13506 /* Only allow 1-bit unsigned literals. */
13507 STRIP_NOPS (arg0);
13508 if (TREE_CODE (arg0) != INTEGER_CST
13509 || TREE_INT_CST_LOW (arg0) & ~0x1)
13510 {
13511 error ("argument 1 must be a 1-bit unsigned literal");
13512 return const0_rtx;
13513 }
13514 }
13515 else if (icode == CODE_FOR_dfp_dscli_dd
13516 || icode == CODE_FOR_dfp_dscli_td
13517 || icode == CODE_FOR_dfp_dscri_dd
13518 || icode == CODE_FOR_dfp_dscri_td)
13519 {
13520 /* Only allow 6-bit unsigned literals. */
13521 STRIP_NOPS (arg1);
13522 if (TREE_CODE (arg1) != INTEGER_CST
13523 || TREE_INT_CST_LOW (arg1) & ~0x3f)
13524 {
13525 error ("argument 2 must be a 6-bit unsigned literal");
13526 return const0_rtx;
13527 }
13528 }
13529 else if (icode == CODE_FOR_crypto_vshasigmaw
13530 || icode == CODE_FOR_crypto_vshasigmad)
13531 {
13532 /* Check whether the 2nd and 3rd arguments are integer constants and in
13533 range and prepare arguments. */
13534 STRIP_NOPS (arg1);
13535 if (TREE_CODE (arg1) != INTEGER_CST || wi::geu_p (arg1, 2))
13536 {
13537 error ("argument 2 must be 0 or 1");
13538 return const0_rtx;
13539 }
13540
13541 STRIP_NOPS (arg2);
13542 if (TREE_CODE (arg2) != INTEGER_CST || wi::geu_p (arg1, 16))
13543 {
13544 error ("argument 3 must be in the range 0..15");
13545 return const0_rtx;
13546 }
13547 }
13548
13549 if (target == 0
13550 || GET_MODE (target) != tmode
13551 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13552 target = gen_reg_rtx (tmode);
13553
13554 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13555 op0 = copy_to_mode_reg (mode0, op0);
13556 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13557 op1 = copy_to_mode_reg (mode1, op1);
13558 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13559 op2 = copy_to_mode_reg (mode2, op2);
13560
13561 if (TARGET_PAIRED_FLOAT && icode == CODE_FOR_selv2sf4)
13562 pat = GEN_FCN (icode) (target, op0, op1, op2, CONST0_RTX (SFmode));
13563 else
13564 pat = GEN_FCN (icode) (target, op0, op1, op2);
13565 if (! pat)
13566 return 0;
13567 emit_insn (pat);
13568
13569 return target;
13570 }
13571
13572 /* Expand the lvx builtins. */
13573 static rtx
13574 altivec_expand_ld_builtin (tree exp, rtx target, bool *expandedp)
13575 {
13576 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13577 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13578 tree arg0;
13579 machine_mode tmode, mode0;
13580 rtx pat, op0;
13581 enum insn_code icode;
13582
13583 switch (fcode)
13584 {
13585 case ALTIVEC_BUILTIN_LD_INTERNAL_16qi:
13586 icode = CODE_FOR_vector_altivec_load_v16qi;
13587 break;
13588 case ALTIVEC_BUILTIN_LD_INTERNAL_8hi:
13589 icode = CODE_FOR_vector_altivec_load_v8hi;
13590 break;
13591 case ALTIVEC_BUILTIN_LD_INTERNAL_4si:
13592 icode = CODE_FOR_vector_altivec_load_v4si;
13593 break;
13594 case ALTIVEC_BUILTIN_LD_INTERNAL_4sf:
13595 icode = CODE_FOR_vector_altivec_load_v4sf;
13596 break;
13597 case ALTIVEC_BUILTIN_LD_INTERNAL_2df:
13598 icode = CODE_FOR_vector_altivec_load_v2df;
13599 break;
13600 case ALTIVEC_BUILTIN_LD_INTERNAL_2di:
13601 icode = CODE_FOR_vector_altivec_load_v2di;
13602 case ALTIVEC_BUILTIN_LD_INTERNAL_1ti:
13603 icode = CODE_FOR_vector_altivec_load_v1ti;
13604 break;
13605 default:
13606 *expandedp = false;
13607 return NULL_RTX;
13608 }
13609
13610 *expandedp = true;
13611
13612 arg0 = CALL_EXPR_ARG (exp, 0);
13613 op0 = expand_normal (arg0);
13614 tmode = insn_data[icode].operand[0].mode;
13615 mode0 = insn_data[icode].operand[1].mode;
13616
13617 if (target == 0
13618 || GET_MODE (target) != tmode
13619 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13620 target = gen_reg_rtx (tmode);
13621
13622 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13623 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13624
13625 pat = GEN_FCN (icode) (target, op0);
13626 if (! pat)
13627 return 0;
13628 emit_insn (pat);
13629 return target;
13630 }
13631
13632 /* Expand the stvx builtins. */
13633 static rtx
13634 altivec_expand_st_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
13635 bool *expandedp)
13636 {
13637 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13638 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13639 tree arg0, arg1;
13640 machine_mode mode0, mode1;
13641 rtx pat, op0, op1;
13642 enum insn_code icode;
13643
13644 switch (fcode)
13645 {
13646 case ALTIVEC_BUILTIN_ST_INTERNAL_16qi:
13647 icode = CODE_FOR_vector_altivec_store_v16qi;
13648 break;
13649 case ALTIVEC_BUILTIN_ST_INTERNAL_8hi:
13650 icode = CODE_FOR_vector_altivec_store_v8hi;
13651 break;
13652 case ALTIVEC_BUILTIN_ST_INTERNAL_4si:
13653 icode = CODE_FOR_vector_altivec_store_v4si;
13654 break;
13655 case ALTIVEC_BUILTIN_ST_INTERNAL_4sf:
13656 icode = CODE_FOR_vector_altivec_store_v4sf;
13657 break;
13658 case ALTIVEC_BUILTIN_ST_INTERNAL_2df:
13659 icode = CODE_FOR_vector_altivec_store_v2df;
13660 break;
13661 case ALTIVEC_BUILTIN_ST_INTERNAL_2di:
13662 icode = CODE_FOR_vector_altivec_store_v2di;
13663 case ALTIVEC_BUILTIN_ST_INTERNAL_1ti:
13664 icode = CODE_FOR_vector_altivec_store_v1ti;
13665 break;
13666 default:
13667 *expandedp = false;
13668 return NULL_RTX;
13669 }
13670
13671 arg0 = CALL_EXPR_ARG (exp, 0);
13672 arg1 = CALL_EXPR_ARG (exp, 1);
13673 op0 = expand_normal (arg0);
13674 op1 = expand_normal (arg1);
13675 mode0 = insn_data[icode].operand[0].mode;
13676 mode1 = insn_data[icode].operand[1].mode;
13677
13678 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13679 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13680 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13681 op1 = copy_to_mode_reg (mode1, op1);
13682
13683 pat = GEN_FCN (icode) (op0, op1);
13684 if (pat)
13685 emit_insn (pat);
13686
13687 *expandedp = true;
13688 return NULL_RTX;
13689 }
13690
13691 /* Expand the dst builtins. */
13692 static rtx
13693 altivec_expand_dst_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
13694 bool *expandedp)
13695 {
13696 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13697 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
13698 tree arg0, arg1, arg2;
13699 machine_mode mode0, mode1;
13700 rtx pat, op0, op1, op2;
13701 const struct builtin_description *d;
13702 size_t i;
13703
13704 *expandedp = false;
13705
13706 /* Handle DST variants. */
13707 d = bdesc_dst;
13708 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
13709 if (d->code == fcode)
13710 {
13711 arg0 = CALL_EXPR_ARG (exp, 0);
13712 arg1 = CALL_EXPR_ARG (exp, 1);
13713 arg2 = CALL_EXPR_ARG (exp, 2);
13714 op0 = expand_normal (arg0);
13715 op1 = expand_normal (arg1);
13716 op2 = expand_normal (arg2);
13717 mode0 = insn_data[d->icode].operand[0].mode;
13718 mode1 = insn_data[d->icode].operand[1].mode;
13719
13720 /* Invalid arguments, bail out before generating bad rtl. */
13721 if (arg0 == error_mark_node
13722 || arg1 == error_mark_node
13723 || arg2 == error_mark_node)
13724 return const0_rtx;
13725
13726 *expandedp = true;
13727 STRIP_NOPS (arg2);
13728 if (TREE_CODE (arg2) != INTEGER_CST
13729 || TREE_INT_CST_LOW (arg2) & ~0x3)
13730 {
13731 error ("argument to %qs must be a 2-bit unsigned literal", d->name);
13732 return const0_rtx;
13733 }
13734
13735 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13736 op0 = copy_to_mode_reg (Pmode, op0);
13737 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13738 op1 = copy_to_mode_reg (mode1, op1);
13739
13740 pat = GEN_FCN (d->icode) (op0, op1, op2);
13741 if (pat != 0)
13742 emit_insn (pat);
13743
13744 return NULL_RTX;
13745 }
13746
13747 return NULL_RTX;
13748 }
13749
13750 /* Expand vec_init builtin. */
13751 static rtx
13752 altivec_expand_vec_init_builtin (tree type, tree exp, rtx target)
13753 {
13754 machine_mode tmode = TYPE_MODE (type);
13755 machine_mode inner_mode = GET_MODE_INNER (tmode);
13756 int i, n_elt = GET_MODE_NUNITS (tmode);
13757
13758 gcc_assert (VECTOR_MODE_P (tmode));
13759 gcc_assert (n_elt == call_expr_nargs (exp));
13760
13761 if (!target || !register_operand (target, tmode))
13762 target = gen_reg_rtx (tmode);
13763
13764 /* If we have a vector compromised of a single element, such as V1TImode, do
13765 the initialization directly. */
13766 if (n_elt == 1 && GET_MODE_SIZE (tmode) == GET_MODE_SIZE (inner_mode))
13767 {
13768 rtx x = expand_normal (CALL_EXPR_ARG (exp, 0));
13769 emit_move_insn (target, gen_lowpart (tmode, x));
13770 }
13771 else
13772 {
13773 rtvec v = rtvec_alloc (n_elt);
13774
13775 for (i = 0; i < n_elt; ++i)
13776 {
13777 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
13778 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
13779 }
13780
13781 rs6000_expand_vector_init (target, gen_rtx_PARALLEL (tmode, v));
13782 }
13783
13784 return target;
13785 }
13786
13787 /* Return the integer constant in ARG. Constrain it to be in the range
13788 of the subparts of VEC_TYPE; issue an error if not. */
13789
13790 static int
13791 get_element_number (tree vec_type, tree arg)
13792 {
13793 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
13794
13795 if (!tree_fits_uhwi_p (arg)
13796 || (elt = tree_to_uhwi (arg), elt > max))
13797 {
13798 error ("selector must be an integer constant in the range 0..%wi", max);
13799 return 0;
13800 }
13801
13802 return elt;
13803 }
13804
13805 /* Expand vec_set builtin. */
13806 static rtx
13807 altivec_expand_vec_set_builtin (tree exp)
13808 {
13809 machine_mode tmode, mode1;
13810 tree arg0, arg1, arg2;
13811 int elt;
13812 rtx op0, op1;
13813
13814 arg0 = CALL_EXPR_ARG (exp, 0);
13815 arg1 = CALL_EXPR_ARG (exp, 1);
13816 arg2 = CALL_EXPR_ARG (exp, 2);
13817
13818 tmode = TYPE_MODE (TREE_TYPE (arg0));
13819 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
13820 gcc_assert (VECTOR_MODE_P (tmode));
13821
13822 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
13823 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
13824 elt = get_element_number (TREE_TYPE (arg0), arg2);
13825
13826 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
13827 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
13828
13829 op0 = force_reg (tmode, op0);
13830 op1 = force_reg (mode1, op1);
13831
13832 rs6000_expand_vector_set (op0, op1, elt);
13833
13834 return op0;
13835 }
13836
13837 /* Expand vec_ext builtin. */
13838 static rtx
13839 altivec_expand_vec_ext_builtin (tree exp, rtx target)
13840 {
13841 machine_mode tmode, mode0;
13842 tree arg0, arg1;
13843 int elt;
13844 rtx op0;
13845
13846 arg0 = CALL_EXPR_ARG (exp, 0);
13847 arg1 = CALL_EXPR_ARG (exp, 1);
13848
13849 op0 = expand_normal (arg0);
13850 elt = get_element_number (TREE_TYPE (arg0), arg1);
13851
13852 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
13853 mode0 = TYPE_MODE (TREE_TYPE (arg0));
13854 gcc_assert (VECTOR_MODE_P (mode0));
13855
13856 op0 = force_reg (mode0, op0);
13857
13858 if (optimize || !target || !register_operand (target, tmode))
13859 target = gen_reg_rtx (tmode);
13860
13861 rs6000_expand_vector_extract (target, op0, elt);
13862
13863 return target;
13864 }
13865
13866 /* Expand the builtin in EXP and store the result in TARGET. Store
13867 true in *EXPANDEDP if we found a builtin to expand. */
13868 static rtx
13869 altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
13870 {
13871 const struct builtin_description *d;
13872 size_t i;
13873 enum insn_code icode;
13874 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13875 tree arg0;
13876 rtx op0, pat;
13877 machine_mode tmode, mode0;
13878 enum rs6000_builtins fcode
13879 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
13880
13881 if (rs6000_overloaded_builtin_p (fcode))
13882 {
13883 *expandedp = true;
13884 error ("unresolved overload for Altivec builtin %qF", fndecl);
13885
13886 /* Given it is invalid, just generate a normal call. */
13887 return expand_call (exp, target, false);
13888 }
13889
13890 target = altivec_expand_ld_builtin (exp, target, expandedp);
13891 if (*expandedp)
13892 return target;
13893
13894 target = altivec_expand_st_builtin (exp, target, expandedp);
13895 if (*expandedp)
13896 return target;
13897
13898 target = altivec_expand_dst_builtin (exp, target, expandedp);
13899 if (*expandedp)
13900 return target;
13901
13902 *expandedp = true;
13903
13904 switch (fcode)
13905 {
13906 case ALTIVEC_BUILTIN_STVX_V2DF:
13907 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2df, exp);
13908 case ALTIVEC_BUILTIN_STVX_V2DI:
13909 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2di, exp);
13910 case ALTIVEC_BUILTIN_STVX_V4SF:
13911 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4sf, exp);
13912 case ALTIVEC_BUILTIN_STVX:
13913 case ALTIVEC_BUILTIN_STVX_V4SI:
13914 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si, exp);
13915 case ALTIVEC_BUILTIN_STVX_V8HI:
13916 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v8hi, exp);
13917 case ALTIVEC_BUILTIN_STVX_V16QI:
13918 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v16qi, exp);
13919 case ALTIVEC_BUILTIN_STVEBX:
13920 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx, exp);
13921 case ALTIVEC_BUILTIN_STVEHX:
13922 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx, exp);
13923 case ALTIVEC_BUILTIN_STVEWX:
13924 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx, exp);
13925 case ALTIVEC_BUILTIN_STVXL_V2DF:
13926 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2df, exp);
13927 case ALTIVEC_BUILTIN_STVXL_V2DI:
13928 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2di, exp);
13929 case ALTIVEC_BUILTIN_STVXL_V4SF:
13930 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4sf, exp);
13931 case ALTIVEC_BUILTIN_STVXL:
13932 case ALTIVEC_BUILTIN_STVXL_V4SI:
13933 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4si, exp);
13934 case ALTIVEC_BUILTIN_STVXL_V8HI:
13935 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v8hi, exp);
13936 case ALTIVEC_BUILTIN_STVXL_V16QI:
13937 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v16qi, exp);
13938
13939 case ALTIVEC_BUILTIN_STVLX:
13940 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlx, exp);
13941 case ALTIVEC_BUILTIN_STVLXL:
13942 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlxl, exp);
13943 case ALTIVEC_BUILTIN_STVRX:
13944 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrx, exp);
13945 case ALTIVEC_BUILTIN_STVRXL:
13946 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrxl, exp);
13947
13948 case VSX_BUILTIN_STXVD2X_V1TI:
13949 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v1ti, exp);
13950 case VSX_BUILTIN_STXVD2X_V2DF:
13951 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2df, exp);
13952 case VSX_BUILTIN_STXVD2X_V2DI:
13953 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2di, exp);
13954 case VSX_BUILTIN_STXVW4X_V4SF:
13955 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4sf, exp);
13956 case VSX_BUILTIN_STXVW4X_V4SI:
13957 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4si, exp);
13958 case VSX_BUILTIN_STXVW4X_V8HI:
13959 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v8hi, exp);
13960 case VSX_BUILTIN_STXVW4X_V16QI:
13961 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v16qi, exp);
13962
13963 case ALTIVEC_BUILTIN_MFVSCR:
13964 icode = CODE_FOR_altivec_mfvscr;
13965 tmode = insn_data[icode].operand[0].mode;
13966
13967 if (target == 0
13968 || GET_MODE (target) != tmode
13969 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13970 target = gen_reg_rtx (tmode);
13971
13972 pat = GEN_FCN (icode) (target);
13973 if (! pat)
13974 return 0;
13975 emit_insn (pat);
13976 return target;
13977
13978 case ALTIVEC_BUILTIN_MTVSCR:
13979 icode = CODE_FOR_altivec_mtvscr;
13980 arg0 = CALL_EXPR_ARG (exp, 0);
13981 op0 = expand_normal (arg0);
13982 mode0 = insn_data[icode].operand[0].mode;
13983
13984 /* If we got invalid arguments bail out before generating bad rtl. */
13985 if (arg0 == error_mark_node)
13986 return const0_rtx;
13987
13988 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13989 op0 = copy_to_mode_reg (mode0, op0);
13990
13991 pat = GEN_FCN (icode) (op0);
13992 if (pat)
13993 emit_insn (pat);
13994 return NULL_RTX;
13995
13996 case ALTIVEC_BUILTIN_DSSALL:
13997 emit_insn (gen_altivec_dssall ());
13998 return NULL_RTX;
13999
14000 case ALTIVEC_BUILTIN_DSS:
14001 icode = CODE_FOR_altivec_dss;
14002 arg0 = CALL_EXPR_ARG (exp, 0);
14003 STRIP_NOPS (arg0);
14004 op0 = expand_normal (arg0);
14005 mode0 = insn_data[icode].operand[0].mode;
14006
14007 /* If we got invalid arguments bail out before generating bad rtl. */
14008 if (arg0 == error_mark_node)
14009 return const0_rtx;
14010
14011 if (TREE_CODE (arg0) != INTEGER_CST
14012 || TREE_INT_CST_LOW (arg0) & ~0x3)
14013 {
14014 error ("argument to dss must be a 2-bit unsigned literal");
14015 return const0_rtx;
14016 }
14017
14018 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14019 op0 = copy_to_mode_reg (mode0, op0);
14020
14021 emit_insn (gen_altivec_dss (op0));
14022 return NULL_RTX;
14023
14024 case ALTIVEC_BUILTIN_VEC_INIT_V4SI:
14025 case ALTIVEC_BUILTIN_VEC_INIT_V8HI:
14026 case ALTIVEC_BUILTIN_VEC_INIT_V16QI:
14027 case ALTIVEC_BUILTIN_VEC_INIT_V4SF:
14028 case VSX_BUILTIN_VEC_INIT_V2DF:
14029 case VSX_BUILTIN_VEC_INIT_V2DI:
14030 case VSX_BUILTIN_VEC_INIT_V1TI:
14031 return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
14032
14033 case ALTIVEC_BUILTIN_VEC_SET_V4SI:
14034 case ALTIVEC_BUILTIN_VEC_SET_V8HI:
14035 case ALTIVEC_BUILTIN_VEC_SET_V16QI:
14036 case ALTIVEC_BUILTIN_VEC_SET_V4SF:
14037 case VSX_BUILTIN_VEC_SET_V2DF:
14038 case VSX_BUILTIN_VEC_SET_V2DI:
14039 case VSX_BUILTIN_VEC_SET_V1TI:
14040 return altivec_expand_vec_set_builtin (exp);
14041
14042 case ALTIVEC_BUILTIN_VEC_EXT_V4SI:
14043 case ALTIVEC_BUILTIN_VEC_EXT_V8HI:
14044 case ALTIVEC_BUILTIN_VEC_EXT_V16QI:
14045 case ALTIVEC_BUILTIN_VEC_EXT_V4SF:
14046 case VSX_BUILTIN_VEC_EXT_V2DF:
14047 case VSX_BUILTIN_VEC_EXT_V2DI:
14048 case VSX_BUILTIN_VEC_EXT_V1TI:
14049 return altivec_expand_vec_ext_builtin (exp, target);
14050
14051 default:
14052 break;
14053 /* Fall through. */
14054 }
14055
14056 /* Expand abs* operations. */
14057 d = bdesc_abs;
14058 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
14059 if (d->code == fcode)
14060 return altivec_expand_abs_builtin (d->icode, exp, target);
14061
14062 /* Expand the AltiVec predicates. */
14063 d = bdesc_altivec_preds;
14064 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
14065 if (d->code == fcode)
14066 return altivec_expand_predicate_builtin (d->icode, exp, target);
14067
14068 /* LV* are funky. We initialized them differently. */
14069 switch (fcode)
14070 {
14071 case ALTIVEC_BUILTIN_LVSL:
14072 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsl,
14073 exp, target, false);
14074 case ALTIVEC_BUILTIN_LVSR:
14075 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsr,
14076 exp, target, false);
14077 case ALTIVEC_BUILTIN_LVEBX:
14078 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvebx,
14079 exp, target, false);
14080 case ALTIVEC_BUILTIN_LVEHX:
14081 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvehx,
14082 exp, target, false);
14083 case ALTIVEC_BUILTIN_LVEWX:
14084 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvewx,
14085 exp, target, false);
14086 case ALTIVEC_BUILTIN_LVXL_V2DF:
14087 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2df,
14088 exp, target, false);
14089 case ALTIVEC_BUILTIN_LVXL_V2DI:
14090 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2di,
14091 exp, target, false);
14092 case ALTIVEC_BUILTIN_LVXL_V4SF:
14093 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4sf,
14094 exp, target, false);
14095 case ALTIVEC_BUILTIN_LVXL:
14096 case ALTIVEC_BUILTIN_LVXL_V4SI:
14097 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4si,
14098 exp, target, false);
14099 case ALTIVEC_BUILTIN_LVXL_V8HI:
14100 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v8hi,
14101 exp, target, false);
14102 case ALTIVEC_BUILTIN_LVXL_V16QI:
14103 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v16qi,
14104 exp, target, false);
14105 case ALTIVEC_BUILTIN_LVX_V2DF:
14106 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2df,
14107 exp, target, false);
14108 case ALTIVEC_BUILTIN_LVX_V2DI:
14109 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2di,
14110 exp, target, false);
14111 case ALTIVEC_BUILTIN_LVX_V4SF:
14112 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4sf,
14113 exp, target, false);
14114 case ALTIVEC_BUILTIN_LVX:
14115 case ALTIVEC_BUILTIN_LVX_V4SI:
14116 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si,
14117 exp, target, false);
14118 case ALTIVEC_BUILTIN_LVX_V8HI:
14119 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v8hi,
14120 exp, target, false);
14121 case ALTIVEC_BUILTIN_LVX_V16QI:
14122 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v16qi,
14123 exp, target, false);
14124 case ALTIVEC_BUILTIN_LVLX:
14125 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlx,
14126 exp, target, true);
14127 case ALTIVEC_BUILTIN_LVLXL:
14128 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlxl,
14129 exp, target, true);
14130 case ALTIVEC_BUILTIN_LVRX:
14131 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrx,
14132 exp, target, true);
14133 case ALTIVEC_BUILTIN_LVRXL:
14134 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrxl,
14135 exp, target, true);
14136 case VSX_BUILTIN_LXVD2X_V1TI:
14137 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v1ti,
14138 exp, target, false);
14139 case VSX_BUILTIN_LXVD2X_V2DF:
14140 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2df,
14141 exp, target, false);
14142 case VSX_BUILTIN_LXVD2X_V2DI:
14143 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2di,
14144 exp, target, false);
14145 case VSX_BUILTIN_LXVW4X_V4SF:
14146 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4sf,
14147 exp, target, false);
14148 case VSX_BUILTIN_LXVW4X_V4SI:
14149 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4si,
14150 exp, target, false);
14151 case VSX_BUILTIN_LXVW4X_V8HI:
14152 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v8hi,
14153 exp, target, false);
14154 case VSX_BUILTIN_LXVW4X_V16QI:
14155 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v16qi,
14156 exp, target, false);
14157 break;
14158 default:
14159 break;
14160 /* Fall through. */
14161 }
14162
14163 *expandedp = false;
14164 return NULL_RTX;
14165 }
14166
14167 /* Expand the builtin in EXP and store the result in TARGET. Store
14168 true in *EXPANDEDP if we found a builtin to expand. */
14169 static rtx
14170 paired_expand_builtin (tree exp, rtx target, bool * expandedp)
14171 {
14172 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
14173 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
14174 const struct builtin_description *d;
14175 size_t i;
14176
14177 *expandedp = true;
14178
14179 switch (fcode)
14180 {
14181 case PAIRED_BUILTIN_STX:
14182 return paired_expand_stv_builtin (CODE_FOR_paired_stx, exp);
14183 case PAIRED_BUILTIN_LX:
14184 return paired_expand_lv_builtin (CODE_FOR_paired_lx, exp, target);
14185 default:
14186 break;
14187 /* Fall through. */
14188 }
14189
14190 /* Expand the paired predicates. */
14191 d = bdesc_paired_preds;
14192 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); i++, d++)
14193 if (d->code == fcode)
14194 return paired_expand_predicate_builtin (d->icode, exp, target);
14195
14196 *expandedp = false;
14197 return NULL_RTX;
14198 }
14199
14200 /* Binops that need to be initialized manually, but can be expanded
14201 automagically by rs6000_expand_binop_builtin. */
14202 static const struct builtin_description bdesc_2arg_spe[] =
14203 {
14204 { RS6000_BTM_SPE, CODE_FOR_spe_evlddx, "__builtin_spe_evlddx", SPE_BUILTIN_EVLDDX },
14205 { RS6000_BTM_SPE, CODE_FOR_spe_evldwx, "__builtin_spe_evldwx", SPE_BUILTIN_EVLDWX },
14206 { RS6000_BTM_SPE, CODE_FOR_spe_evldhx, "__builtin_spe_evldhx", SPE_BUILTIN_EVLDHX },
14207 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhex, "__builtin_spe_evlwhex", SPE_BUILTIN_EVLWHEX },
14208 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhoux, "__builtin_spe_evlwhoux", SPE_BUILTIN_EVLWHOUX },
14209 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhosx, "__builtin_spe_evlwhosx", SPE_BUILTIN_EVLWHOSX },
14210 { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplatx, "__builtin_spe_evlwwsplatx", SPE_BUILTIN_EVLWWSPLATX },
14211 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplatx, "__builtin_spe_evlwhsplatx", SPE_BUILTIN_EVLWHSPLATX },
14212 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplatx, "__builtin_spe_evlhhesplatx", SPE_BUILTIN_EVLHHESPLATX },
14213 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplatx, "__builtin_spe_evlhhousplatx", SPE_BUILTIN_EVLHHOUSPLATX },
14214 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplatx, "__builtin_spe_evlhhossplatx", SPE_BUILTIN_EVLHHOSSPLATX },
14215 { RS6000_BTM_SPE, CODE_FOR_spe_evldd, "__builtin_spe_evldd", SPE_BUILTIN_EVLDD },
14216 { RS6000_BTM_SPE, CODE_FOR_spe_evldw, "__builtin_spe_evldw", SPE_BUILTIN_EVLDW },
14217 { RS6000_BTM_SPE, CODE_FOR_spe_evldh, "__builtin_spe_evldh", SPE_BUILTIN_EVLDH },
14218 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhe, "__builtin_spe_evlwhe", SPE_BUILTIN_EVLWHE },
14219 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhou, "__builtin_spe_evlwhou", SPE_BUILTIN_EVLWHOU },
14220 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhos, "__builtin_spe_evlwhos", SPE_BUILTIN_EVLWHOS },
14221 { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplat, "__builtin_spe_evlwwsplat", SPE_BUILTIN_EVLWWSPLAT },
14222 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplat, "__builtin_spe_evlwhsplat", SPE_BUILTIN_EVLWHSPLAT },
14223 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplat, "__builtin_spe_evlhhesplat", SPE_BUILTIN_EVLHHESPLAT },
14224 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplat, "__builtin_spe_evlhhousplat", SPE_BUILTIN_EVLHHOUSPLAT },
14225 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplat, "__builtin_spe_evlhhossplat", SPE_BUILTIN_EVLHHOSSPLAT }
14226 };
14227
14228 /* Expand the builtin in EXP and store the result in TARGET. Store
14229 true in *EXPANDEDP if we found a builtin to expand.
14230
14231 This expands the SPE builtins that are not simple unary and binary
14232 operations. */
14233 static rtx
14234 spe_expand_builtin (tree exp, rtx target, bool *expandedp)
14235 {
14236 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
14237 tree arg1, arg0;
14238 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
14239 enum insn_code icode;
14240 machine_mode tmode, mode0;
14241 rtx pat, op0;
14242 const struct builtin_description *d;
14243 size_t i;
14244
14245 *expandedp = true;
14246
14247 /* Syntax check for a 5-bit unsigned immediate. */
14248 switch (fcode)
14249 {
14250 case SPE_BUILTIN_EVSTDD:
14251 case SPE_BUILTIN_EVSTDH:
14252 case SPE_BUILTIN_EVSTDW:
14253 case SPE_BUILTIN_EVSTWHE:
14254 case SPE_BUILTIN_EVSTWHO:
14255 case SPE_BUILTIN_EVSTWWE:
14256 case SPE_BUILTIN_EVSTWWO:
14257 arg1 = CALL_EXPR_ARG (exp, 2);
14258 if (TREE_CODE (arg1) != INTEGER_CST
14259 || TREE_INT_CST_LOW (arg1) & ~0x1f)
14260 {
14261 error ("argument 2 must be a 5-bit unsigned literal");
14262 return const0_rtx;
14263 }
14264 break;
14265 default:
14266 break;
14267 }
14268
14269 /* The evsplat*i instructions are not quite generic. */
14270 switch (fcode)
14271 {
14272 case SPE_BUILTIN_EVSPLATFI:
14273 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplatfi,
14274 exp, target);
14275 case SPE_BUILTIN_EVSPLATI:
14276 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplati,
14277 exp, target);
14278 default:
14279 break;
14280 }
14281
14282 d = bdesc_2arg_spe;
14283 for (i = 0; i < ARRAY_SIZE (bdesc_2arg_spe); ++i, ++d)
14284 if (d->code == fcode)
14285 return rs6000_expand_binop_builtin (d->icode, exp, target);
14286
14287 d = bdesc_spe_predicates;
14288 for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, ++d)
14289 if (d->code == fcode)
14290 return spe_expand_predicate_builtin (d->icode, exp, target);
14291
14292 d = bdesc_spe_evsel;
14293 for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, ++d)
14294 if (d->code == fcode)
14295 return spe_expand_evsel_builtin (d->icode, exp, target);
14296
14297 switch (fcode)
14298 {
14299 case SPE_BUILTIN_EVSTDDX:
14300 return spe_expand_stv_builtin (CODE_FOR_spe_evstddx, exp);
14301 case SPE_BUILTIN_EVSTDHX:
14302 return spe_expand_stv_builtin (CODE_FOR_spe_evstdhx, exp);
14303 case SPE_BUILTIN_EVSTDWX:
14304 return spe_expand_stv_builtin (CODE_FOR_spe_evstdwx, exp);
14305 case SPE_BUILTIN_EVSTWHEX:
14306 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhex, exp);
14307 case SPE_BUILTIN_EVSTWHOX:
14308 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhox, exp);
14309 case SPE_BUILTIN_EVSTWWEX:
14310 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwex, exp);
14311 case SPE_BUILTIN_EVSTWWOX:
14312 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwox, exp);
14313 case SPE_BUILTIN_EVSTDD:
14314 return spe_expand_stv_builtin (CODE_FOR_spe_evstdd, exp);
14315 case SPE_BUILTIN_EVSTDH:
14316 return spe_expand_stv_builtin (CODE_FOR_spe_evstdh, exp);
14317 case SPE_BUILTIN_EVSTDW:
14318 return spe_expand_stv_builtin (CODE_FOR_spe_evstdw, exp);
14319 case SPE_BUILTIN_EVSTWHE:
14320 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhe, exp);
14321 case SPE_BUILTIN_EVSTWHO:
14322 return spe_expand_stv_builtin (CODE_FOR_spe_evstwho, exp);
14323 case SPE_BUILTIN_EVSTWWE:
14324 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwe, exp);
14325 case SPE_BUILTIN_EVSTWWO:
14326 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwo, exp);
14327 case SPE_BUILTIN_MFSPEFSCR:
14328 icode = CODE_FOR_spe_mfspefscr;
14329 tmode = insn_data[icode].operand[0].mode;
14330
14331 if (target == 0
14332 || GET_MODE (target) != tmode
14333 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14334 target = gen_reg_rtx (tmode);
14335
14336 pat = GEN_FCN (icode) (target);
14337 if (! pat)
14338 return 0;
14339 emit_insn (pat);
14340 return target;
14341 case SPE_BUILTIN_MTSPEFSCR:
14342 icode = CODE_FOR_spe_mtspefscr;
14343 arg0 = CALL_EXPR_ARG (exp, 0);
14344 op0 = expand_normal (arg0);
14345 mode0 = insn_data[icode].operand[0].mode;
14346
14347 if (arg0 == error_mark_node)
14348 return const0_rtx;
14349
14350 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14351 op0 = copy_to_mode_reg (mode0, op0);
14352
14353 pat = GEN_FCN (icode) (op0);
14354 if (pat)
14355 emit_insn (pat);
14356 return NULL_RTX;
14357 default:
14358 break;
14359 }
14360
14361 *expandedp = false;
14362 return NULL_RTX;
14363 }
14364
14365 static rtx
14366 paired_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
14367 {
14368 rtx pat, scratch, tmp;
14369 tree form = CALL_EXPR_ARG (exp, 0);
14370 tree arg0 = CALL_EXPR_ARG (exp, 1);
14371 tree arg1 = CALL_EXPR_ARG (exp, 2);
14372 rtx op0 = expand_normal (arg0);
14373 rtx op1 = expand_normal (arg1);
14374 machine_mode mode0 = insn_data[icode].operand[1].mode;
14375 machine_mode mode1 = insn_data[icode].operand[2].mode;
14376 int form_int;
14377 enum rtx_code code;
14378
14379 if (TREE_CODE (form) != INTEGER_CST)
14380 {
14381 error ("argument 1 of __builtin_paired_predicate must be a constant");
14382 return const0_rtx;
14383 }
14384 else
14385 form_int = TREE_INT_CST_LOW (form);
14386
14387 gcc_assert (mode0 == mode1);
14388
14389 if (arg0 == error_mark_node || arg1 == error_mark_node)
14390 return const0_rtx;
14391
14392 if (target == 0
14393 || GET_MODE (target) != SImode
14394 || !(*insn_data[icode].operand[0].predicate) (target, SImode))
14395 target = gen_reg_rtx (SImode);
14396 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
14397 op0 = copy_to_mode_reg (mode0, op0);
14398 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
14399 op1 = copy_to_mode_reg (mode1, op1);
14400
14401 scratch = gen_reg_rtx (CCFPmode);
14402
14403 pat = GEN_FCN (icode) (scratch, op0, op1);
14404 if (!pat)
14405 return const0_rtx;
14406
14407 emit_insn (pat);
14408
14409 switch (form_int)
14410 {
14411 /* LT bit. */
14412 case 0:
14413 code = LT;
14414 break;
14415 /* GT bit. */
14416 case 1:
14417 code = GT;
14418 break;
14419 /* EQ bit. */
14420 case 2:
14421 code = EQ;
14422 break;
14423 /* UN bit. */
14424 case 3:
14425 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
14426 return target;
14427 default:
14428 error ("argument 1 of __builtin_paired_predicate is out of range");
14429 return const0_rtx;
14430 }
14431
14432 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
14433 emit_move_insn (target, tmp);
14434 return target;
14435 }
14436
14437 static rtx
14438 spe_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
14439 {
14440 rtx pat, scratch, tmp;
14441 tree form = CALL_EXPR_ARG (exp, 0);
14442 tree arg0 = CALL_EXPR_ARG (exp, 1);
14443 tree arg1 = CALL_EXPR_ARG (exp, 2);
14444 rtx op0 = expand_normal (arg0);
14445 rtx op1 = expand_normal (arg1);
14446 machine_mode mode0 = insn_data[icode].operand[1].mode;
14447 machine_mode mode1 = insn_data[icode].operand[2].mode;
14448 int form_int;
14449 enum rtx_code code;
14450
14451 if (TREE_CODE (form) != INTEGER_CST)
14452 {
14453 error ("argument 1 of __builtin_spe_predicate must be a constant");
14454 return const0_rtx;
14455 }
14456 else
14457 form_int = TREE_INT_CST_LOW (form);
14458
14459 gcc_assert (mode0 == mode1);
14460
14461 if (arg0 == error_mark_node || arg1 == error_mark_node)
14462 return const0_rtx;
14463
14464 if (target == 0
14465 || GET_MODE (target) != SImode
14466 || ! (*insn_data[icode].operand[0].predicate) (target, SImode))
14467 target = gen_reg_rtx (SImode);
14468
14469 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14470 op0 = copy_to_mode_reg (mode0, op0);
14471 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14472 op1 = copy_to_mode_reg (mode1, op1);
14473
14474 scratch = gen_reg_rtx (CCmode);
14475
14476 pat = GEN_FCN (icode) (scratch, op0, op1);
14477 if (! pat)
14478 return const0_rtx;
14479 emit_insn (pat);
14480
14481 /* There are 4 variants for each predicate: _any_, _all_, _upper_,
14482 _lower_. We use one compare, but look in different bits of the
14483 CR for each variant.
14484
14485 There are 2 elements in each SPE simd type (upper/lower). The CR
14486 bits are set as follows:
14487
14488 BIT0 | BIT 1 | BIT 2 | BIT 3
14489 U | L | (U | L) | (U & L)
14490
14491 So, for an "all" relationship, BIT 3 would be set.
14492 For an "any" relationship, BIT 2 would be set. Etc.
14493
14494 Following traditional nomenclature, these bits map to:
14495
14496 BIT0 | BIT 1 | BIT 2 | BIT 3
14497 LT | GT | EQ | OV
14498
14499 Later, we will generate rtl to look in the LT/EQ/EQ/OV bits.
14500 */
14501
14502 switch (form_int)
14503 {
14504 /* All variant. OV bit. */
14505 case 0:
14506 /* We need to get to the OV bit, which is the ORDERED bit. We
14507 could generate (ordered:SI (reg:CC xx) (const_int 0)), but
14508 that's ugly and will make validate_condition_mode die.
14509 So let's just use another pattern. */
14510 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
14511 return target;
14512 /* Any variant. EQ bit. */
14513 case 1:
14514 code = EQ;
14515 break;
14516 /* Upper variant. LT bit. */
14517 case 2:
14518 code = LT;
14519 break;
14520 /* Lower variant. GT bit. */
14521 case 3:
14522 code = GT;
14523 break;
14524 default:
14525 error ("argument 1 of __builtin_spe_predicate is out of range");
14526 return const0_rtx;
14527 }
14528
14529 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
14530 emit_move_insn (target, tmp);
14531
14532 return target;
14533 }
14534
14535 /* The evsel builtins look like this:
14536
14537 e = __builtin_spe_evsel_OP (a, b, c, d);
14538
14539 and work like this:
14540
14541 e[upper] = a[upper] *OP* b[upper] ? c[upper] : d[upper];
14542 e[lower] = a[lower] *OP* b[lower] ? c[lower] : d[lower];
14543 */
14544
14545 static rtx
14546 spe_expand_evsel_builtin (enum insn_code icode, tree exp, rtx target)
14547 {
14548 rtx pat, scratch;
14549 tree arg0 = CALL_EXPR_ARG (exp, 0);
14550 tree arg1 = CALL_EXPR_ARG (exp, 1);
14551 tree arg2 = CALL_EXPR_ARG (exp, 2);
14552 tree arg3 = CALL_EXPR_ARG (exp, 3);
14553 rtx op0 = expand_normal (arg0);
14554 rtx op1 = expand_normal (arg1);
14555 rtx op2 = expand_normal (arg2);
14556 rtx op3 = expand_normal (arg3);
14557 machine_mode mode0 = insn_data[icode].operand[1].mode;
14558 machine_mode mode1 = insn_data[icode].operand[2].mode;
14559
14560 gcc_assert (mode0 == mode1);
14561
14562 if (arg0 == error_mark_node || arg1 == error_mark_node
14563 || arg2 == error_mark_node || arg3 == error_mark_node)
14564 return const0_rtx;
14565
14566 if (target == 0
14567 || GET_MODE (target) != mode0
14568 || ! (*insn_data[icode].operand[0].predicate) (target, mode0))
14569 target = gen_reg_rtx (mode0);
14570
14571 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14572 op0 = copy_to_mode_reg (mode0, op0);
14573 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14574 op1 = copy_to_mode_reg (mode0, op1);
14575 if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
14576 op2 = copy_to_mode_reg (mode0, op2);
14577 if (! (*insn_data[icode].operand[1].predicate) (op3, mode1))
14578 op3 = copy_to_mode_reg (mode0, op3);
14579
14580 /* Generate the compare. */
14581 scratch = gen_reg_rtx (CCmode);
14582 pat = GEN_FCN (icode) (scratch, op0, op1);
14583 if (! pat)
14584 return const0_rtx;
14585 emit_insn (pat);
14586
14587 if (mode0 == V2SImode)
14588 emit_insn (gen_spe_evsel (target, op2, op3, scratch));
14589 else
14590 emit_insn (gen_spe_evsel_fs (target, op2, op3, scratch));
14591
14592 return target;
14593 }
14594
14595 /* Raise an error message for a builtin function that is called without the
14596 appropriate target options being set. */
14597
14598 static void
14599 rs6000_invalid_builtin (enum rs6000_builtins fncode)
14600 {
14601 size_t uns_fncode = (size_t)fncode;
14602 const char *name = rs6000_builtin_info[uns_fncode].name;
14603 HOST_WIDE_INT fnmask = rs6000_builtin_info[uns_fncode].mask;
14604
14605 gcc_assert (name != NULL);
14606 if ((fnmask & RS6000_BTM_CELL) != 0)
14607 error ("Builtin function %s is only valid for the cell processor", name);
14608 else if ((fnmask & RS6000_BTM_VSX) != 0)
14609 error ("Builtin function %s requires the -mvsx option", name);
14610 else if ((fnmask & RS6000_BTM_HTM) != 0)
14611 error ("Builtin function %s requires the -mhtm option", name);
14612 else if ((fnmask & RS6000_BTM_ALTIVEC) != 0)
14613 error ("Builtin function %s requires the -maltivec option", name);
14614 else if ((fnmask & RS6000_BTM_PAIRED) != 0)
14615 error ("Builtin function %s requires the -mpaired option", name);
14616 else if ((fnmask & RS6000_BTM_SPE) != 0)
14617 error ("Builtin function %s requires the -mspe option", name);
14618 else if ((fnmask & (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
14619 == (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
14620 error ("Builtin function %s requires the -mhard-dfp and"
14621 " -mpower8-vector options", name);
14622 else if ((fnmask & RS6000_BTM_DFP) != 0)
14623 error ("Builtin function %s requires the -mhard-dfp option", name);
14624 else if ((fnmask & RS6000_BTM_P8_VECTOR) != 0)
14625 error ("Builtin function %s requires the -mpower8-vector option", name);
14626 else if ((fnmask & (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
14627 == (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
14628 error ("Builtin function %s requires the -mhard-float and"
14629 " -mlong-double-128 options", name);
14630 else if ((fnmask & RS6000_BTM_HARD_FLOAT) != 0)
14631 error ("Builtin function %s requires the -mhard-float option", name);
14632 else
14633 error ("Builtin function %s is not supported with the current options",
14634 name);
14635 }
14636
14637 /* Expand an expression EXP that calls a built-in function,
14638 with result going to TARGET if that's convenient
14639 (and in mode MODE if that's convenient).
14640 SUBTARGET may be used as the target for computing one of EXP's operands.
14641 IGNORE is nonzero if the value is to be ignored. */
14642
14643 static rtx
14644 rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
14645 machine_mode mode ATTRIBUTE_UNUSED,
14646 int ignore ATTRIBUTE_UNUSED)
14647 {
14648 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
14649 enum rs6000_builtins fcode
14650 = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
14651 size_t uns_fcode = (size_t)fcode;
14652 const struct builtin_description *d;
14653 size_t i;
14654 rtx ret;
14655 bool success;
14656 HOST_WIDE_INT mask = rs6000_builtin_info[uns_fcode].mask;
14657 bool func_valid_p = ((rs6000_builtin_mask & mask) == mask);
14658
14659 if (TARGET_DEBUG_BUILTIN)
14660 {
14661 enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
14662 const char *name1 = rs6000_builtin_info[uns_fcode].name;
14663 const char *name2 = ((icode != CODE_FOR_nothing)
14664 ? get_insn_name ((int)icode)
14665 : "nothing");
14666 const char *name3;
14667
14668 switch (rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK)
14669 {
14670 default: name3 = "unknown"; break;
14671 case RS6000_BTC_SPECIAL: name3 = "special"; break;
14672 case RS6000_BTC_UNARY: name3 = "unary"; break;
14673 case RS6000_BTC_BINARY: name3 = "binary"; break;
14674 case RS6000_BTC_TERNARY: name3 = "ternary"; break;
14675 case RS6000_BTC_PREDICATE: name3 = "predicate"; break;
14676 case RS6000_BTC_ABS: name3 = "abs"; break;
14677 case RS6000_BTC_EVSEL: name3 = "evsel"; break;
14678 case RS6000_BTC_DST: name3 = "dst"; break;
14679 }
14680
14681
14682 fprintf (stderr,
14683 "rs6000_expand_builtin, %s (%d), insn = %s (%d), type=%s%s\n",
14684 (name1) ? name1 : "---", fcode,
14685 (name2) ? name2 : "---", (int)icode,
14686 name3,
14687 func_valid_p ? "" : ", not valid");
14688 }
14689
14690 if (!func_valid_p)
14691 {
14692 rs6000_invalid_builtin (fcode);
14693
14694 /* Given it is invalid, just generate a normal call. */
14695 return expand_call (exp, target, ignore);
14696 }
14697
14698 switch (fcode)
14699 {
14700 case RS6000_BUILTIN_RECIP:
14701 return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
14702
14703 case RS6000_BUILTIN_RECIPF:
14704 return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
14705
14706 case RS6000_BUILTIN_RSQRTF:
14707 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
14708
14709 case RS6000_BUILTIN_RSQRT:
14710 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
14711
14712 case POWER7_BUILTIN_BPERMD:
14713 return rs6000_expand_binop_builtin (((TARGET_64BIT)
14714 ? CODE_FOR_bpermd_di
14715 : CODE_FOR_bpermd_si), exp, target);
14716
14717 case RS6000_BUILTIN_GET_TB:
14718 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
14719 target);
14720
14721 case RS6000_BUILTIN_MFTB:
14722 return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
14723 ? CODE_FOR_rs6000_mftb_di
14724 : CODE_FOR_rs6000_mftb_si),
14725 target);
14726
14727 case RS6000_BUILTIN_MFFS:
14728 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
14729
14730 case RS6000_BUILTIN_MTFSF:
14731 return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
14732
14733 case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
14734 case ALTIVEC_BUILTIN_MASK_FOR_STORE:
14735 {
14736 int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
14737 : (int) CODE_FOR_altivec_lvsl_direct);
14738 machine_mode tmode = insn_data[icode].operand[0].mode;
14739 machine_mode mode = insn_data[icode].operand[1].mode;
14740 tree arg;
14741 rtx op, addr, pat;
14742
14743 gcc_assert (TARGET_ALTIVEC);
14744
14745 arg = CALL_EXPR_ARG (exp, 0);
14746 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
14747 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
14748 addr = memory_address (mode, op);
14749 if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
14750 op = addr;
14751 else
14752 {
14753 /* For the load case need to negate the address. */
14754 op = gen_reg_rtx (GET_MODE (addr));
14755 emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
14756 }
14757 op = gen_rtx_MEM (mode, op);
14758
14759 if (target == 0
14760 || GET_MODE (target) != tmode
14761 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14762 target = gen_reg_rtx (tmode);
14763
14764 pat = GEN_FCN (icode) (target, op);
14765 if (!pat)
14766 return 0;
14767 emit_insn (pat);
14768
14769 return target;
14770 }
14771
14772 case ALTIVEC_BUILTIN_VCFUX:
14773 case ALTIVEC_BUILTIN_VCFSX:
14774 case ALTIVEC_BUILTIN_VCTUXS:
14775 case ALTIVEC_BUILTIN_VCTSXS:
14776 /* FIXME: There's got to be a nicer way to handle this case than
14777 constructing a new CALL_EXPR. */
14778 if (call_expr_nargs (exp) == 1)
14779 {
14780 exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
14781 2, CALL_EXPR_ARG (exp, 0), integer_zero_node);
14782 }
14783 break;
14784
14785 default:
14786 break;
14787 }
14788
14789 if (TARGET_ALTIVEC)
14790 {
14791 ret = altivec_expand_builtin (exp, target, &success);
14792
14793 if (success)
14794 return ret;
14795 }
14796 if (TARGET_SPE)
14797 {
14798 ret = spe_expand_builtin (exp, target, &success);
14799
14800 if (success)
14801 return ret;
14802 }
14803 if (TARGET_PAIRED_FLOAT)
14804 {
14805 ret = paired_expand_builtin (exp, target, &success);
14806
14807 if (success)
14808 return ret;
14809 }
14810 if (TARGET_HTM)
14811 {
14812 ret = htm_expand_builtin (exp, target, &success);
14813
14814 if (success)
14815 return ret;
14816 }
14817
14818 unsigned attr = rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK;
14819 gcc_assert (attr == RS6000_BTC_UNARY
14820 || attr == RS6000_BTC_BINARY
14821 || attr == RS6000_BTC_TERNARY);
14822
14823 /* Handle simple unary operations. */
14824 d = bdesc_1arg;
14825 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14826 if (d->code == fcode)
14827 return rs6000_expand_unop_builtin (d->icode, exp, target);
14828
14829 /* Handle simple binary operations. */
14830 d = bdesc_2arg;
14831 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14832 if (d->code == fcode)
14833 return rs6000_expand_binop_builtin (d->icode, exp, target);
14834
14835 /* Handle simple ternary operations. */
14836 d = bdesc_3arg;
14837 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
14838 if (d->code == fcode)
14839 return rs6000_expand_ternop_builtin (d->icode, exp, target);
14840
14841 gcc_unreachable ();
14842 }
14843
14844 static void
14845 rs6000_init_builtins (void)
14846 {
14847 tree tdecl;
14848 tree ftype;
14849 machine_mode mode;
14850
14851 if (TARGET_DEBUG_BUILTIN)
14852 fprintf (stderr, "rs6000_init_builtins%s%s%s%s\n",
14853 (TARGET_PAIRED_FLOAT) ? ", paired" : "",
14854 (TARGET_SPE) ? ", spe" : "",
14855 (TARGET_ALTIVEC) ? ", altivec" : "",
14856 (TARGET_VSX) ? ", vsx" : "");
14857
14858 V2SI_type_node = build_vector_type (intSI_type_node, 2);
14859 V2SF_type_node = build_vector_type (float_type_node, 2);
14860 V2DI_type_node = build_vector_type (intDI_type_node, 2);
14861 V2DF_type_node = build_vector_type (double_type_node, 2);
14862 V4HI_type_node = build_vector_type (intHI_type_node, 4);
14863 V4SI_type_node = build_vector_type (intSI_type_node, 4);
14864 V4SF_type_node = build_vector_type (float_type_node, 4);
14865 V8HI_type_node = build_vector_type (intHI_type_node, 8);
14866 V16QI_type_node = build_vector_type (intQI_type_node, 16);
14867
14868 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
14869 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
14870 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
14871 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
14872
14873 opaque_V2SF_type_node = build_opaque_vector_type (float_type_node, 2);
14874 opaque_V2SI_type_node = build_opaque_vector_type (intSI_type_node, 2);
14875 opaque_p_V2SI_type_node = build_pointer_type (opaque_V2SI_type_node);
14876 opaque_V4SI_type_node = build_opaque_vector_type (intSI_type_node, 4);
14877
14878 /* We use V1TI mode as a special container to hold __int128_t items that
14879 must live in VSX registers. */
14880 if (intTI_type_node)
14881 {
14882 V1TI_type_node = build_vector_type (intTI_type_node, 1);
14883 unsigned_V1TI_type_node = build_vector_type (unsigned_intTI_type_node, 1);
14884 }
14885
14886 /* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...'
14887 types, especially in C++ land. Similarly, 'vector pixel' is distinct from
14888 'vector unsigned short'. */
14889
14890 bool_char_type_node = build_distinct_type_copy (unsigned_intQI_type_node);
14891 bool_short_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
14892 bool_int_type_node = build_distinct_type_copy (unsigned_intSI_type_node);
14893 bool_long_type_node = build_distinct_type_copy (unsigned_intDI_type_node);
14894 pixel_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
14895
14896 long_integer_type_internal_node = long_integer_type_node;
14897 long_unsigned_type_internal_node = long_unsigned_type_node;
14898 long_long_integer_type_internal_node = long_long_integer_type_node;
14899 long_long_unsigned_type_internal_node = long_long_unsigned_type_node;
14900 intQI_type_internal_node = intQI_type_node;
14901 uintQI_type_internal_node = unsigned_intQI_type_node;
14902 intHI_type_internal_node = intHI_type_node;
14903 uintHI_type_internal_node = unsigned_intHI_type_node;
14904 intSI_type_internal_node = intSI_type_node;
14905 uintSI_type_internal_node = unsigned_intSI_type_node;
14906 intDI_type_internal_node = intDI_type_node;
14907 uintDI_type_internal_node = unsigned_intDI_type_node;
14908 intTI_type_internal_node = intTI_type_node;
14909 uintTI_type_internal_node = unsigned_intTI_type_node;
14910 float_type_internal_node = float_type_node;
14911 double_type_internal_node = double_type_node;
14912 long_double_type_internal_node = long_double_type_node;
14913 dfloat64_type_internal_node = dfloat64_type_node;
14914 dfloat128_type_internal_node = dfloat128_type_node;
14915 void_type_internal_node = void_type_node;
14916
14917 /* 128-bit floating point support. KFmode is IEEE 128-bit floating point.
14918 IFmode is the IBM extended 128-bit format that is a pair of doubles.
14919 TFmode will be either IEEE 128-bit floating point or the IBM double-double
14920 format that uses a pair of doubles, depending on the switches and
14921 defaults. */
14922 if (TARGET_FLOAT128)
14923 {
14924 ibm128_float_type_node = make_node (REAL_TYPE);
14925 TYPE_PRECISION (ibm128_float_type_node) = 128;
14926 layout_type (ibm128_float_type_node);
14927 SET_TYPE_MODE (ibm128_float_type_node, IFmode);
14928
14929 ieee128_float_type_node = make_node (REAL_TYPE);
14930 TYPE_PRECISION (ieee128_float_type_node) = 128;
14931 layout_type (ieee128_float_type_node);
14932 SET_TYPE_MODE (ieee128_float_type_node, KFmode);
14933
14934 lang_hooks.types.register_builtin_type (ieee128_float_type_node,
14935 "__float128");
14936
14937 lang_hooks.types.register_builtin_type (ibm128_float_type_node,
14938 "__ibm128");
14939 }
14940
14941 /* Initialize the modes for builtin_function_type, mapping a machine mode to
14942 tree type node. */
14943 builtin_mode_to_type[QImode][0] = integer_type_node;
14944 builtin_mode_to_type[HImode][0] = integer_type_node;
14945 builtin_mode_to_type[SImode][0] = intSI_type_node;
14946 builtin_mode_to_type[SImode][1] = unsigned_intSI_type_node;
14947 builtin_mode_to_type[DImode][0] = intDI_type_node;
14948 builtin_mode_to_type[DImode][1] = unsigned_intDI_type_node;
14949 builtin_mode_to_type[TImode][0] = intTI_type_node;
14950 builtin_mode_to_type[TImode][1] = unsigned_intTI_type_node;
14951 builtin_mode_to_type[SFmode][0] = float_type_node;
14952 builtin_mode_to_type[DFmode][0] = double_type_node;
14953 builtin_mode_to_type[IFmode][0] = ibm128_float_type_node;
14954 builtin_mode_to_type[KFmode][0] = ieee128_float_type_node;
14955 builtin_mode_to_type[TFmode][0] = long_double_type_node;
14956 builtin_mode_to_type[DDmode][0] = dfloat64_type_node;
14957 builtin_mode_to_type[TDmode][0] = dfloat128_type_node;
14958 builtin_mode_to_type[V1TImode][0] = V1TI_type_node;
14959 builtin_mode_to_type[V1TImode][1] = unsigned_V1TI_type_node;
14960 builtin_mode_to_type[V2SImode][0] = V2SI_type_node;
14961 builtin_mode_to_type[V2SFmode][0] = V2SF_type_node;
14962 builtin_mode_to_type[V2DImode][0] = V2DI_type_node;
14963 builtin_mode_to_type[V2DImode][1] = unsigned_V2DI_type_node;
14964 builtin_mode_to_type[V2DFmode][0] = V2DF_type_node;
14965 builtin_mode_to_type[V4HImode][0] = V4HI_type_node;
14966 builtin_mode_to_type[V4SImode][0] = V4SI_type_node;
14967 builtin_mode_to_type[V4SImode][1] = unsigned_V4SI_type_node;
14968 builtin_mode_to_type[V4SFmode][0] = V4SF_type_node;
14969 builtin_mode_to_type[V8HImode][0] = V8HI_type_node;
14970 builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node;
14971 builtin_mode_to_type[V16QImode][0] = V16QI_type_node;
14972 builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node;
14973
14974 tdecl = add_builtin_type ("__bool char", bool_char_type_node);
14975 TYPE_NAME (bool_char_type_node) = tdecl;
14976
14977 tdecl = add_builtin_type ("__bool short", bool_short_type_node);
14978 TYPE_NAME (bool_short_type_node) = tdecl;
14979
14980 tdecl = add_builtin_type ("__bool int", bool_int_type_node);
14981 TYPE_NAME (bool_int_type_node) = tdecl;
14982
14983 tdecl = add_builtin_type ("__pixel", pixel_type_node);
14984 TYPE_NAME (pixel_type_node) = tdecl;
14985
14986 bool_V16QI_type_node = build_vector_type (bool_char_type_node, 16);
14987 bool_V8HI_type_node = build_vector_type (bool_short_type_node, 8);
14988 bool_V4SI_type_node = build_vector_type (bool_int_type_node, 4);
14989 bool_V2DI_type_node = build_vector_type (bool_long_type_node, 2);
14990 pixel_V8HI_type_node = build_vector_type (pixel_type_node, 8);
14991
14992 tdecl = add_builtin_type ("__vector unsigned char", unsigned_V16QI_type_node);
14993 TYPE_NAME (unsigned_V16QI_type_node) = tdecl;
14994
14995 tdecl = add_builtin_type ("__vector signed char", V16QI_type_node);
14996 TYPE_NAME (V16QI_type_node) = tdecl;
14997
14998 tdecl = add_builtin_type ("__vector __bool char", bool_V16QI_type_node);
14999 TYPE_NAME ( bool_V16QI_type_node) = tdecl;
15000
15001 tdecl = add_builtin_type ("__vector unsigned short", unsigned_V8HI_type_node);
15002 TYPE_NAME (unsigned_V8HI_type_node) = tdecl;
15003
15004 tdecl = add_builtin_type ("__vector signed short", V8HI_type_node);
15005 TYPE_NAME (V8HI_type_node) = tdecl;
15006
15007 tdecl = add_builtin_type ("__vector __bool short", bool_V8HI_type_node);
15008 TYPE_NAME (bool_V8HI_type_node) = tdecl;
15009
15010 tdecl = add_builtin_type ("__vector unsigned int", unsigned_V4SI_type_node);
15011 TYPE_NAME (unsigned_V4SI_type_node) = tdecl;
15012
15013 tdecl = add_builtin_type ("__vector signed int", V4SI_type_node);
15014 TYPE_NAME (V4SI_type_node) = tdecl;
15015
15016 tdecl = add_builtin_type ("__vector __bool int", bool_V4SI_type_node);
15017 TYPE_NAME (bool_V4SI_type_node) = tdecl;
15018
15019 tdecl = add_builtin_type ("__vector float", V4SF_type_node);
15020 TYPE_NAME (V4SF_type_node) = tdecl;
15021
15022 tdecl = add_builtin_type ("__vector __pixel", pixel_V8HI_type_node);
15023 TYPE_NAME (pixel_V8HI_type_node) = tdecl;
15024
15025 tdecl = add_builtin_type ("__vector double", V2DF_type_node);
15026 TYPE_NAME (V2DF_type_node) = tdecl;
15027
15028 if (TARGET_POWERPC64)
15029 {
15030 tdecl = add_builtin_type ("__vector long", V2DI_type_node);
15031 TYPE_NAME (V2DI_type_node) = tdecl;
15032
15033 tdecl = add_builtin_type ("__vector unsigned long",
15034 unsigned_V2DI_type_node);
15035 TYPE_NAME (unsigned_V2DI_type_node) = tdecl;
15036
15037 tdecl = add_builtin_type ("__vector __bool long", bool_V2DI_type_node);
15038 TYPE_NAME (bool_V2DI_type_node) = tdecl;
15039 }
15040 else
15041 {
15042 tdecl = add_builtin_type ("__vector long long", V2DI_type_node);
15043 TYPE_NAME (V2DI_type_node) = tdecl;
15044
15045 tdecl = add_builtin_type ("__vector unsigned long long",
15046 unsigned_V2DI_type_node);
15047 TYPE_NAME (unsigned_V2DI_type_node) = tdecl;
15048
15049 tdecl = add_builtin_type ("__vector __bool long long",
15050 bool_V2DI_type_node);
15051 TYPE_NAME (bool_V2DI_type_node) = tdecl;
15052 }
15053
15054 if (V1TI_type_node)
15055 {
15056 tdecl = add_builtin_type ("__vector __int128", V1TI_type_node);
15057 TYPE_NAME (V1TI_type_node) = tdecl;
15058
15059 tdecl = add_builtin_type ("__vector unsigned __int128",
15060 unsigned_V1TI_type_node);
15061 TYPE_NAME (unsigned_V1TI_type_node) = tdecl;
15062 }
15063
15064 /* Paired and SPE builtins are only available if you build a compiler with
15065 the appropriate options, so only create those builtins with the
15066 appropriate compiler option. Create Altivec and VSX builtins on machines
15067 with at least the general purpose extensions (970 and newer) to allow the
15068 use of the target attribute. */
15069 if (TARGET_PAIRED_FLOAT)
15070 paired_init_builtins ();
15071 if (TARGET_SPE)
15072 spe_init_builtins ();
15073 if (TARGET_EXTRA_BUILTINS)
15074 altivec_init_builtins ();
15075 if (TARGET_HTM)
15076 htm_init_builtins ();
15077
15078 if (TARGET_EXTRA_BUILTINS || TARGET_SPE || TARGET_PAIRED_FLOAT)
15079 rs6000_common_init_builtins ();
15080
15081 ftype = builtin_function_type (DFmode, DFmode, DFmode, VOIDmode,
15082 RS6000_BUILTIN_RECIP, "__builtin_recipdiv");
15083 def_builtin ("__builtin_recipdiv", ftype, RS6000_BUILTIN_RECIP);
15084
15085 ftype = builtin_function_type (SFmode, SFmode, SFmode, VOIDmode,
15086 RS6000_BUILTIN_RECIPF, "__builtin_recipdivf");
15087 def_builtin ("__builtin_recipdivf", ftype, RS6000_BUILTIN_RECIPF);
15088
15089 ftype = builtin_function_type (DFmode, DFmode, VOIDmode, VOIDmode,
15090 RS6000_BUILTIN_RSQRT, "__builtin_rsqrt");
15091 def_builtin ("__builtin_rsqrt", ftype, RS6000_BUILTIN_RSQRT);
15092
15093 ftype = builtin_function_type (SFmode, SFmode, VOIDmode, VOIDmode,
15094 RS6000_BUILTIN_RSQRTF, "__builtin_rsqrtf");
15095 def_builtin ("__builtin_rsqrtf", ftype, RS6000_BUILTIN_RSQRTF);
15096
15097 mode = (TARGET_64BIT) ? DImode : SImode;
15098 ftype = builtin_function_type (mode, mode, mode, VOIDmode,
15099 POWER7_BUILTIN_BPERMD, "__builtin_bpermd");
15100 def_builtin ("__builtin_bpermd", ftype, POWER7_BUILTIN_BPERMD);
15101
15102 ftype = build_function_type_list (unsigned_intDI_type_node,
15103 NULL_TREE);
15104 def_builtin ("__builtin_ppc_get_timebase", ftype, RS6000_BUILTIN_GET_TB);
15105
15106 if (TARGET_64BIT)
15107 ftype = build_function_type_list (unsigned_intDI_type_node,
15108 NULL_TREE);
15109 else
15110 ftype = build_function_type_list (unsigned_intSI_type_node,
15111 NULL_TREE);
15112 def_builtin ("__builtin_ppc_mftb", ftype, RS6000_BUILTIN_MFTB);
15113
15114 ftype = build_function_type_list (double_type_node, NULL_TREE);
15115 def_builtin ("__builtin_mffs", ftype, RS6000_BUILTIN_MFFS);
15116
15117 ftype = build_function_type_list (void_type_node,
15118 intSI_type_node, double_type_node,
15119 NULL_TREE);
15120 def_builtin ("__builtin_mtfsf", ftype, RS6000_BUILTIN_MTFSF);
15121
15122 #if TARGET_XCOFF
15123 /* AIX libm provides clog as __clog. */
15124 if ((tdecl = builtin_decl_explicit (BUILT_IN_CLOG)) != NULL_TREE)
15125 set_user_assembler_name (tdecl, "__clog");
15126 #endif
15127
15128 #ifdef SUBTARGET_INIT_BUILTINS
15129 SUBTARGET_INIT_BUILTINS;
15130 #endif
15131 }
15132
15133 /* Returns the rs6000 builtin decl for CODE. */
15134
15135 static tree
15136 rs6000_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
15137 {
15138 HOST_WIDE_INT fnmask;
15139
15140 if (code >= RS6000_BUILTIN_COUNT)
15141 return error_mark_node;
15142
15143 fnmask = rs6000_builtin_info[code].mask;
15144 if ((fnmask & rs6000_builtin_mask) != fnmask)
15145 {
15146 rs6000_invalid_builtin ((enum rs6000_builtins)code);
15147 return error_mark_node;
15148 }
15149
15150 return rs6000_builtin_decls[code];
15151 }
15152
15153 static void
15154 spe_init_builtins (void)
15155 {
15156 tree puint_type_node = build_pointer_type (unsigned_type_node);
15157 tree pushort_type_node = build_pointer_type (short_unsigned_type_node);
15158 const struct builtin_description *d;
15159 size_t i;
15160
15161 tree v2si_ftype_4_v2si
15162 = build_function_type_list (opaque_V2SI_type_node,
15163 opaque_V2SI_type_node,
15164 opaque_V2SI_type_node,
15165 opaque_V2SI_type_node,
15166 opaque_V2SI_type_node,
15167 NULL_TREE);
15168
15169 tree v2sf_ftype_4_v2sf
15170 = build_function_type_list (opaque_V2SF_type_node,
15171 opaque_V2SF_type_node,
15172 opaque_V2SF_type_node,
15173 opaque_V2SF_type_node,
15174 opaque_V2SF_type_node,
15175 NULL_TREE);
15176
15177 tree int_ftype_int_v2si_v2si
15178 = build_function_type_list (integer_type_node,
15179 integer_type_node,
15180 opaque_V2SI_type_node,
15181 opaque_V2SI_type_node,
15182 NULL_TREE);
15183
15184 tree int_ftype_int_v2sf_v2sf
15185 = build_function_type_list (integer_type_node,
15186 integer_type_node,
15187 opaque_V2SF_type_node,
15188 opaque_V2SF_type_node,
15189 NULL_TREE);
15190
15191 tree void_ftype_v2si_puint_int
15192 = build_function_type_list (void_type_node,
15193 opaque_V2SI_type_node,
15194 puint_type_node,
15195 integer_type_node,
15196 NULL_TREE);
15197
15198 tree void_ftype_v2si_puint_char
15199 = build_function_type_list (void_type_node,
15200 opaque_V2SI_type_node,
15201 puint_type_node,
15202 char_type_node,
15203 NULL_TREE);
15204
15205 tree void_ftype_v2si_pv2si_int
15206 = build_function_type_list (void_type_node,
15207 opaque_V2SI_type_node,
15208 opaque_p_V2SI_type_node,
15209 integer_type_node,
15210 NULL_TREE);
15211
15212 tree void_ftype_v2si_pv2si_char
15213 = build_function_type_list (void_type_node,
15214 opaque_V2SI_type_node,
15215 opaque_p_V2SI_type_node,
15216 char_type_node,
15217 NULL_TREE);
15218
15219 tree void_ftype_int
15220 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
15221
15222 tree int_ftype_void
15223 = build_function_type_list (integer_type_node, NULL_TREE);
15224
15225 tree v2si_ftype_pv2si_int
15226 = build_function_type_list (opaque_V2SI_type_node,
15227 opaque_p_V2SI_type_node,
15228 integer_type_node,
15229 NULL_TREE);
15230
15231 tree v2si_ftype_puint_int
15232 = build_function_type_list (opaque_V2SI_type_node,
15233 puint_type_node,
15234 integer_type_node,
15235 NULL_TREE);
15236
15237 tree v2si_ftype_pushort_int
15238 = build_function_type_list (opaque_V2SI_type_node,
15239 pushort_type_node,
15240 integer_type_node,
15241 NULL_TREE);
15242
15243 tree v2si_ftype_signed_char
15244 = build_function_type_list (opaque_V2SI_type_node,
15245 signed_char_type_node,
15246 NULL_TREE);
15247
15248 add_builtin_type ("__ev64_opaque__", opaque_V2SI_type_node);
15249
15250 /* Initialize irregular SPE builtins. */
15251
15252 def_builtin ("__builtin_spe_mtspefscr", void_ftype_int, SPE_BUILTIN_MTSPEFSCR);
15253 def_builtin ("__builtin_spe_mfspefscr", int_ftype_void, SPE_BUILTIN_MFSPEFSCR);
15254 def_builtin ("__builtin_spe_evstddx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDDX);
15255 def_builtin ("__builtin_spe_evstdhx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDHX);
15256 def_builtin ("__builtin_spe_evstdwx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDWX);
15257 def_builtin ("__builtin_spe_evstwhex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHEX);
15258 def_builtin ("__builtin_spe_evstwhox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHOX);
15259 def_builtin ("__builtin_spe_evstwwex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWEX);
15260 def_builtin ("__builtin_spe_evstwwox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWOX);
15261 def_builtin ("__builtin_spe_evstdd", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDD);
15262 def_builtin ("__builtin_spe_evstdh", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDH);
15263 def_builtin ("__builtin_spe_evstdw", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDW);
15264 def_builtin ("__builtin_spe_evstwhe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHE);
15265 def_builtin ("__builtin_spe_evstwho", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHO);
15266 def_builtin ("__builtin_spe_evstwwe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWE);
15267 def_builtin ("__builtin_spe_evstwwo", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWO);
15268 def_builtin ("__builtin_spe_evsplatfi", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATFI);
15269 def_builtin ("__builtin_spe_evsplati", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATI);
15270
15271 /* Loads. */
15272 def_builtin ("__builtin_spe_evlddx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDDX);
15273 def_builtin ("__builtin_spe_evldwx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDWX);
15274 def_builtin ("__builtin_spe_evldhx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDHX);
15275 def_builtin ("__builtin_spe_evlwhex", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHEX);
15276 def_builtin ("__builtin_spe_evlwhoux", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOUX);
15277 def_builtin ("__builtin_spe_evlwhosx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOSX);
15278 def_builtin ("__builtin_spe_evlwwsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLATX);
15279 def_builtin ("__builtin_spe_evlwhsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLATX);
15280 def_builtin ("__builtin_spe_evlhhesplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLATX);
15281 def_builtin ("__builtin_spe_evlhhousplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLATX);
15282 def_builtin ("__builtin_spe_evlhhossplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLATX);
15283 def_builtin ("__builtin_spe_evldd", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDD);
15284 def_builtin ("__builtin_spe_evldw", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDW);
15285 def_builtin ("__builtin_spe_evldh", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDH);
15286 def_builtin ("__builtin_spe_evlhhesplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLAT);
15287 def_builtin ("__builtin_spe_evlhhossplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLAT);
15288 def_builtin ("__builtin_spe_evlhhousplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLAT);
15289 def_builtin ("__builtin_spe_evlwhe", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHE);
15290 def_builtin ("__builtin_spe_evlwhos", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOS);
15291 def_builtin ("__builtin_spe_evlwhou", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOU);
15292 def_builtin ("__builtin_spe_evlwhsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLAT);
15293 def_builtin ("__builtin_spe_evlwwsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLAT);
15294
15295 /* Predicates. */
15296 d = bdesc_spe_predicates;
15297 for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, d++)
15298 {
15299 tree type;
15300
15301 switch (insn_data[d->icode].operand[1].mode)
15302 {
15303 case V2SImode:
15304 type = int_ftype_int_v2si_v2si;
15305 break;
15306 case V2SFmode:
15307 type = int_ftype_int_v2sf_v2sf;
15308 break;
15309 default:
15310 gcc_unreachable ();
15311 }
15312
15313 def_builtin (d->name, type, d->code);
15314 }
15315
15316 /* Evsel predicates. */
15317 d = bdesc_spe_evsel;
15318 for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, d++)
15319 {
15320 tree type;
15321
15322 switch (insn_data[d->icode].operand[1].mode)
15323 {
15324 case V2SImode:
15325 type = v2si_ftype_4_v2si;
15326 break;
15327 case V2SFmode:
15328 type = v2sf_ftype_4_v2sf;
15329 break;
15330 default:
15331 gcc_unreachable ();
15332 }
15333
15334 def_builtin (d->name, type, d->code);
15335 }
15336 }
15337
15338 static void
15339 paired_init_builtins (void)
15340 {
15341 const struct builtin_description *d;
15342 size_t i;
15343
15344 tree int_ftype_int_v2sf_v2sf
15345 = build_function_type_list (integer_type_node,
15346 integer_type_node,
15347 V2SF_type_node,
15348 V2SF_type_node,
15349 NULL_TREE);
15350 tree pcfloat_type_node =
15351 build_pointer_type (build_qualified_type
15352 (float_type_node, TYPE_QUAL_CONST));
15353
15354 tree v2sf_ftype_long_pcfloat = build_function_type_list (V2SF_type_node,
15355 long_integer_type_node,
15356 pcfloat_type_node,
15357 NULL_TREE);
15358 tree void_ftype_v2sf_long_pcfloat =
15359 build_function_type_list (void_type_node,
15360 V2SF_type_node,
15361 long_integer_type_node,
15362 pcfloat_type_node,
15363 NULL_TREE);
15364
15365
15366 def_builtin ("__builtin_paired_lx", v2sf_ftype_long_pcfloat,
15367 PAIRED_BUILTIN_LX);
15368
15369
15370 def_builtin ("__builtin_paired_stx", void_ftype_v2sf_long_pcfloat,
15371 PAIRED_BUILTIN_STX);
15372
15373 /* Predicates. */
15374 d = bdesc_paired_preds;
15375 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); ++i, d++)
15376 {
15377 tree type;
15378
15379 if (TARGET_DEBUG_BUILTIN)
15380 fprintf (stderr, "paired pred #%d, insn = %s [%d], mode = %s\n",
15381 (int)i, get_insn_name (d->icode), (int)d->icode,
15382 GET_MODE_NAME (insn_data[d->icode].operand[1].mode));
15383
15384 switch (insn_data[d->icode].operand[1].mode)
15385 {
15386 case V2SFmode:
15387 type = int_ftype_int_v2sf_v2sf;
15388 break;
15389 default:
15390 gcc_unreachable ();
15391 }
15392
15393 def_builtin (d->name, type, d->code);
15394 }
15395 }
15396
15397 static void
15398 altivec_init_builtins (void)
15399 {
15400 const struct builtin_description *d;
15401 size_t i;
15402 tree ftype;
15403 tree decl;
15404
15405 tree pvoid_type_node = build_pointer_type (void_type_node);
15406
15407 tree pcvoid_type_node
15408 = build_pointer_type (build_qualified_type (void_type_node,
15409 TYPE_QUAL_CONST));
15410
15411 tree int_ftype_opaque
15412 = build_function_type_list (integer_type_node,
15413 opaque_V4SI_type_node, NULL_TREE);
15414 tree opaque_ftype_opaque
15415 = build_function_type_list (integer_type_node, NULL_TREE);
15416 tree opaque_ftype_opaque_int
15417 = build_function_type_list (opaque_V4SI_type_node,
15418 opaque_V4SI_type_node, integer_type_node, NULL_TREE);
15419 tree opaque_ftype_opaque_opaque_int
15420 = build_function_type_list (opaque_V4SI_type_node,
15421 opaque_V4SI_type_node, opaque_V4SI_type_node,
15422 integer_type_node, NULL_TREE);
15423 tree int_ftype_int_opaque_opaque
15424 = build_function_type_list (integer_type_node,
15425 integer_type_node, opaque_V4SI_type_node,
15426 opaque_V4SI_type_node, NULL_TREE);
15427 tree int_ftype_int_v4si_v4si
15428 = build_function_type_list (integer_type_node,
15429 integer_type_node, V4SI_type_node,
15430 V4SI_type_node, NULL_TREE);
15431 tree int_ftype_int_v2di_v2di
15432 = build_function_type_list (integer_type_node,
15433 integer_type_node, V2DI_type_node,
15434 V2DI_type_node, NULL_TREE);
15435 tree void_ftype_v4si
15436 = build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE);
15437 tree v8hi_ftype_void
15438 = build_function_type_list (V8HI_type_node, NULL_TREE);
15439 tree void_ftype_void
15440 = build_function_type_list (void_type_node, NULL_TREE);
15441 tree void_ftype_int
15442 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
15443
15444 tree opaque_ftype_long_pcvoid
15445 = build_function_type_list (opaque_V4SI_type_node,
15446 long_integer_type_node, pcvoid_type_node,
15447 NULL_TREE);
15448 tree v16qi_ftype_long_pcvoid
15449 = build_function_type_list (V16QI_type_node,
15450 long_integer_type_node, pcvoid_type_node,
15451 NULL_TREE);
15452 tree v8hi_ftype_long_pcvoid
15453 = build_function_type_list (V8HI_type_node,
15454 long_integer_type_node, pcvoid_type_node,
15455 NULL_TREE);
15456 tree v4si_ftype_long_pcvoid
15457 = build_function_type_list (V4SI_type_node,
15458 long_integer_type_node, pcvoid_type_node,
15459 NULL_TREE);
15460 tree v4sf_ftype_long_pcvoid
15461 = build_function_type_list (V4SF_type_node,
15462 long_integer_type_node, pcvoid_type_node,
15463 NULL_TREE);
15464 tree v2df_ftype_long_pcvoid
15465 = build_function_type_list (V2DF_type_node,
15466 long_integer_type_node, pcvoid_type_node,
15467 NULL_TREE);
15468 tree v2di_ftype_long_pcvoid
15469 = build_function_type_list (V2DI_type_node,
15470 long_integer_type_node, pcvoid_type_node,
15471 NULL_TREE);
15472
15473 tree void_ftype_opaque_long_pvoid
15474 = build_function_type_list (void_type_node,
15475 opaque_V4SI_type_node, long_integer_type_node,
15476 pvoid_type_node, NULL_TREE);
15477 tree void_ftype_v4si_long_pvoid
15478 = build_function_type_list (void_type_node,
15479 V4SI_type_node, long_integer_type_node,
15480 pvoid_type_node, NULL_TREE);
15481 tree void_ftype_v16qi_long_pvoid
15482 = build_function_type_list (void_type_node,
15483 V16QI_type_node, long_integer_type_node,
15484 pvoid_type_node, NULL_TREE);
15485 tree void_ftype_v8hi_long_pvoid
15486 = build_function_type_list (void_type_node,
15487 V8HI_type_node, long_integer_type_node,
15488 pvoid_type_node, NULL_TREE);
15489 tree void_ftype_v4sf_long_pvoid
15490 = build_function_type_list (void_type_node,
15491 V4SF_type_node, long_integer_type_node,
15492 pvoid_type_node, NULL_TREE);
15493 tree void_ftype_v2df_long_pvoid
15494 = build_function_type_list (void_type_node,
15495 V2DF_type_node, long_integer_type_node,
15496 pvoid_type_node, NULL_TREE);
15497 tree void_ftype_v2di_long_pvoid
15498 = build_function_type_list (void_type_node,
15499 V2DI_type_node, long_integer_type_node,
15500 pvoid_type_node, NULL_TREE);
15501 tree int_ftype_int_v8hi_v8hi
15502 = build_function_type_list (integer_type_node,
15503 integer_type_node, V8HI_type_node,
15504 V8HI_type_node, NULL_TREE);
15505 tree int_ftype_int_v16qi_v16qi
15506 = build_function_type_list (integer_type_node,
15507 integer_type_node, V16QI_type_node,
15508 V16QI_type_node, NULL_TREE);
15509 tree int_ftype_int_v4sf_v4sf
15510 = build_function_type_list (integer_type_node,
15511 integer_type_node, V4SF_type_node,
15512 V4SF_type_node, NULL_TREE);
15513 tree int_ftype_int_v2df_v2df
15514 = build_function_type_list (integer_type_node,
15515 integer_type_node, V2DF_type_node,
15516 V2DF_type_node, NULL_TREE);
15517 tree v2di_ftype_v2di
15518 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
15519 tree v4si_ftype_v4si
15520 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
15521 tree v8hi_ftype_v8hi
15522 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
15523 tree v16qi_ftype_v16qi
15524 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
15525 tree v4sf_ftype_v4sf
15526 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
15527 tree v2df_ftype_v2df
15528 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
15529 tree void_ftype_pcvoid_int_int
15530 = build_function_type_list (void_type_node,
15531 pcvoid_type_node, integer_type_node,
15532 integer_type_node, NULL_TREE);
15533
15534 def_builtin ("__builtin_altivec_mtvscr", void_ftype_v4si, ALTIVEC_BUILTIN_MTVSCR);
15535 def_builtin ("__builtin_altivec_mfvscr", v8hi_ftype_void, ALTIVEC_BUILTIN_MFVSCR);
15536 def_builtin ("__builtin_altivec_dssall", void_ftype_void, ALTIVEC_BUILTIN_DSSALL);
15537 def_builtin ("__builtin_altivec_dss", void_ftype_int, ALTIVEC_BUILTIN_DSS);
15538 def_builtin ("__builtin_altivec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSL);
15539 def_builtin ("__builtin_altivec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSR);
15540 def_builtin ("__builtin_altivec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEBX);
15541 def_builtin ("__builtin_altivec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEHX);
15542 def_builtin ("__builtin_altivec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEWX);
15543 def_builtin ("__builtin_altivec_lvxl", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVXL);
15544 def_builtin ("__builtin_altivec_lvxl_v2df", v2df_ftype_long_pcvoid,
15545 ALTIVEC_BUILTIN_LVXL_V2DF);
15546 def_builtin ("__builtin_altivec_lvxl_v2di", v2di_ftype_long_pcvoid,
15547 ALTIVEC_BUILTIN_LVXL_V2DI);
15548 def_builtin ("__builtin_altivec_lvxl_v4sf", v4sf_ftype_long_pcvoid,
15549 ALTIVEC_BUILTIN_LVXL_V4SF);
15550 def_builtin ("__builtin_altivec_lvxl_v4si", v4si_ftype_long_pcvoid,
15551 ALTIVEC_BUILTIN_LVXL_V4SI);
15552 def_builtin ("__builtin_altivec_lvxl_v8hi", v8hi_ftype_long_pcvoid,
15553 ALTIVEC_BUILTIN_LVXL_V8HI);
15554 def_builtin ("__builtin_altivec_lvxl_v16qi", v16qi_ftype_long_pcvoid,
15555 ALTIVEC_BUILTIN_LVXL_V16QI);
15556 def_builtin ("__builtin_altivec_lvx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVX);
15557 def_builtin ("__builtin_altivec_lvx_v2df", v2df_ftype_long_pcvoid,
15558 ALTIVEC_BUILTIN_LVX_V2DF);
15559 def_builtin ("__builtin_altivec_lvx_v2di", v2di_ftype_long_pcvoid,
15560 ALTIVEC_BUILTIN_LVX_V2DI);
15561 def_builtin ("__builtin_altivec_lvx_v4sf", v4sf_ftype_long_pcvoid,
15562 ALTIVEC_BUILTIN_LVX_V4SF);
15563 def_builtin ("__builtin_altivec_lvx_v4si", v4si_ftype_long_pcvoid,
15564 ALTIVEC_BUILTIN_LVX_V4SI);
15565 def_builtin ("__builtin_altivec_lvx_v8hi", v8hi_ftype_long_pcvoid,
15566 ALTIVEC_BUILTIN_LVX_V8HI);
15567 def_builtin ("__builtin_altivec_lvx_v16qi", v16qi_ftype_long_pcvoid,
15568 ALTIVEC_BUILTIN_LVX_V16QI);
15569 def_builtin ("__builtin_altivec_stvx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVX);
15570 def_builtin ("__builtin_altivec_stvx_v2df", void_ftype_v2df_long_pvoid,
15571 ALTIVEC_BUILTIN_STVX_V2DF);
15572 def_builtin ("__builtin_altivec_stvx_v2di", void_ftype_v2di_long_pvoid,
15573 ALTIVEC_BUILTIN_STVX_V2DI);
15574 def_builtin ("__builtin_altivec_stvx_v4sf", void_ftype_v4sf_long_pvoid,
15575 ALTIVEC_BUILTIN_STVX_V4SF);
15576 def_builtin ("__builtin_altivec_stvx_v4si", void_ftype_v4si_long_pvoid,
15577 ALTIVEC_BUILTIN_STVX_V4SI);
15578 def_builtin ("__builtin_altivec_stvx_v8hi", void_ftype_v8hi_long_pvoid,
15579 ALTIVEC_BUILTIN_STVX_V8HI);
15580 def_builtin ("__builtin_altivec_stvx_v16qi", void_ftype_v16qi_long_pvoid,
15581 ALTIVEC_BUILTIN_STVX_V16QI);
15582 def_builtin ("__builtin_altivec_stvewx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVEWX);
15583 def_builtin ("__builtin_altivec_stvxl", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVXL);
15584 def_builtin ("__builtin_altivec_stvxl_v2df", void_ftype_v2df_long_pvoid,
15585 ALTIVEC_BUILTIN_STVXL_V2DF);
15586 def_builtin ("__builtin_altivec_stvxl_v2di", void_ftype_v2di_long_pvoid,
15587 ALTIVEC_BUILTIN_STVXL_V2DI);
15588 def_builtin ("__builtin_altivec_stvxl_v4sf", void_ftype_v4sf_long_pvoid,
15589 ALTIVEC_BUILTIN_STVXL_V4SF);
15590 def_builtin ("__builtin_altivec_stvxl_v4si", void_ftype_v4si_long_pvoid,
15591 ALTIVEC_BUILTIN_STVXL_V4SI);
15592 def_builtin ("__builtin_altivec_stvxl_v8hi", void_ftype_v8hi_long_pvoid,
15593 ALTIVEC_BUILTIN_STVXL_V8HI);
15594 def_builtin ("__builtin_altivec_stvxl_v16qi", void_ftype_v16qi_long_pvoid,
15595 ALTIVEC_BUILTIN_STVXL_V16QI);
15596 def_builtin ("__builtin_altivec_stvebx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVEBX);
15597 def_builtin ("__builtin_altivec_stvehx", void_ftype_v8hi_long_pvoid, ALTIVEC_BUILTIN_STVEHX);
15598 def_builtin ("__builtin_vec_ld", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LD);
15599 def_builtin ("__builtin_vec_lde", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDE);
15600 def_builtin ("__builtin_vec_ldl", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDL);
15601 def_builtin ("__builtin_vec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSL);
15602 def_builtin ("__builtin_vec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSR);
15603 def_builtin ("__builtin_vec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEBX);
15604 def_builtin ("__builtin_vec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEHX);
15605 def_builtin ("__builtin_vec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEWX);
15606 def_builtin ("__builtin_vec_st", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_ST);
15607 def_builtin ("__builtin_vec_ste", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STE);
15608 def_builtin ("__builtin_vec_stl", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STL);
15609 def_builtin ("__builtin_vec_stvewx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEWX);
15610 def_builtin ("__builtin_vec_stvebx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEBX);
15611 def_builtin ("__builtin_vec_stvehx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEHX);
15612
15613 def_builtin ("__builtin_vsx_lxvd2x_v2df", v2df_ftype_long_pcvoid,
15614 VSX_BUILTIN_LXVD2X_V2DF);
15615 def_builtin ("__builtin_vsx_lxvd2x_v2di", v2di_ftype_long_pcvoid,
15616 VSX_BUILTIN_LXVD2X_V2DI);
15617 def_builtin ("__builtin_vsx_lxvw4x_v4sf", v4sf_ftype_long_pcvoid,
15618 VSX_BUILTIN_LXVW4X_V4SF);
15619 def_builtin ("__builtin_vsx_lxvw4x_v4si", v4si_ftype_long_pcvoid,
15620 VSX_BUILTIN_LXVW4X_V4SI);
15621 def_builtin ("__builtin_vsx_lxvw4x_v8hi", v8hi_ftype_long_pcvoid,
15622 VSX_BUILTIN_LXVW4X_V8HI);
15623 def_builtin ("__builtin_vsx_lxvw4x_v16qi", v16qi_ftype_long_pcvoid,
15624 VSX_BUILTIN_LXVW4X_V16QI);
15625 def_builtin ("__builtin_vsx_stxvd2x_v2df", void_ftype_v2df_long_pvoid,
15626 VSX_BUILTIN_STXVD2X_V2DF);
15627 def_builtin ("__builtin_vsx_stxvd2x_v2di", void_ftype_v2di_long_pvoid,
15628 VSX_BUILTIN_STXVD2X_V2DI);
15629 def_builtin ("__builtin_vsx_stxvw4x_v4sf", void_ftype_v4sf_long_pvoid,
15630 VSX_BUILTIN_STXVW4X_V4SF);
15631 def_builtin ("__builtin_vsx_stxvw4x_v4si", void_ftype_v4si_long_pvoid,
15632 VSX_BUILTIN_STXVW4X_V4SI);
15633 def_builtin ("__builtin_vsx_stxvw4x_v8hi", void_ftype_v8hi_long_pvoid,
15634 VSX_BUILTIN_STXVW4X_V8HI);
15635 def_builtin ("__builtin_vsx_stxvw4x_v16qi", void_ftype_v16qi_long_pvoid,
15636 VSX_BUILTIN_STXVW4X_V16QI);
15637 def_builtin ("__builtin_vec_vsx_ld", opaque_ftype_long_pcvoid,
15638 VSX_BUILTIN_VEC_LD);
15639 def_builtin ("__builtin_vec_vsx_st", void_ftype_opaque_long_pvoid,
15640 VSX_BUILTIN_VEC_ST);
15641
15642 def_builtin ("__builtin_vec_step", int_ftype_opaque, ALTIVEC_BUILTIN_VEC_STEP);
15643 def_builtin ("__builtin_vec_splats", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_SPLATS);
15644 def_builtin ("__builtin_vec_promote", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_PROMOTE);
15645
15646 def_builtin ("__builtin_vec_sld", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_SLD);
15647 def_builtin ("__builtin_vec_splat", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_SPLAT);
15648 def_builtin ("__builtin_vec_extract", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_EXTRACT);
15649 def_builtin ("__builtin_vec_insert", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_INSERT);
15650 def_builtin ("__builtin_vec_vspltw", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTW);
15651 def_builtin ("__builtin_vec_vsplth", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTH);
15652 def_builtin ("__builtin_vec_vspltb", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTB);
15653 def_builtin ("__builtin_vec_ctf", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTF);
15654 def_builtin ("__builtin_vec_vcfsx", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFSX);
15655 def_builtin ("__builtin_vec_vcfux", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFUX);
15656 def_builtin ("__builtin_vec_cts", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTS);
15657 def_builtin ("__builtin_vec_ctu", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTU);
15658
15659 /* Cell builtins. */
15660 def_builtin ("__builtin_altivec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLX);
15661 def_builtin ("__builtin_altivec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLXL);
15662 def_builtin ("__builtin_altivec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRX);
15663 def_builtin ("__builtin_altivec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRXL);
15664
15665 def_builtin ("__builtin_vec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLX);
15666 def_builtin ("__builtin_vec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLXL);
15667 def_builtin ("__builtin_vec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRX);
15668 def_builtin ("__builtin_vec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRXL);
15669
15670 def_builtin ("__builtin_altivec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLX);
15671 def_builtin ("__builtin_altivec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLXL);
15672 def_builtin ("__builtin_altivec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRX);
15673 def_builtin ("__builtin_altivec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRXL);
15674
15675 def_builtin ("__builtin_vec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLX);
15676 def_builtin ("__builtin_vec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLXL);
15677 def_builtin ("__builtin_vec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRX);
15678 def_builtin ("__builtin_vec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRXL);
15679
15680 /* Add the DST variants. */
15681 d = bdesc_dst;
15682 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
15683 def_builtin (d->name, void_ftype_pcvoid_int_int, d->code);
15684
15685 /* Initialize the predicates. */
15686 d = bdesc_altivec_preds;
15687 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
15688 {
15689 machine_mode mode1;
15690 tree type;
15691
15692 if (rs6000_overloaded_builtin_p (d->code))
15693 mode1 = VOIDmode;
15694 else
15695 mode1 = insn_data[d->icode].operand[1].mode;
15696
15697 switch (mode1)
15698 {
15699 case VOIDmode:
15700 type = int_ftype_int_opaque_opaque;
15701 break;
15702 case V2DImode:
15703 type = int_ftype_int_v2di_v2di;
15704 break;
15705 case V4SImode:
15706 type = int_ftype_int_v4si_v4si;
15707 break;
15708 case V8HImode:
15709 type = int_ftype_int_v8hi_v8hi;
15710 break;
15711 case V16QImode:
15712 type = int_ftype_int_v16qi_v16qi;
15713 break;
15714 case V4SFmode:
15715 type = int_ftype_int_v4sf_v4sf;
15716 break;
15717 case V2DFmode:
15718 type = int_ftype_int_v2df_v2df;
15719 break;
15720 default:
15721 gcc_unreachable ();
15722 }
15723
15724 def_builtin (d->name, type, d->code);
15725 }
15726
15727 /* Initialize the abs* operators. */
15728 d = bdesc_abs;
15729 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
15730 {
15731 machine_mode mode0;
15732 tree type;
15733
15734 mode0 = insn_data[d->icode].operand[0].mode;
15735
15736 switch (mode0)
15737 {
15738 case V2DImode:
15739 type = v2di_ftype_v2di;
15740 break;
15741 case V4SImode:
15742 type = v4si_ftype_v4si;
15743 break;
15744 case V8HImode:
15745 type = v8hi_ftype_v8hi;
15746 break;
15747 case V16QImode:
15748 type = v16qi_ftype_v16qi;
15749 break;
15750 case V4SFmode:
15751 type = v4sf_ftype_v4sf;
15752 break;
15753 case V2DFmode:
15754 type = v2df_ftype_v2df;
15755 break;
15756 default:
15757 gcc_unreachable ();
15758 }
15759
15760 def_builtin (d->name, type, d->code);
15761 }
15762
15763 /* Initialize target builtin that implements
15764 targetm.vectorize.builtin_mask_for_load. */
15765
15766 decl = add_builtin_function ("__builtin_altivec_mask_for_load",
15767 v16qi_ftype_long_pcvoid,
15768 ALTIVEC_BUILTIN_MASK_FOR_LOAD,
15769 BUILT_IN_MD, NULL, NULL_TREE);
15770 TREE_READONLY (decl) = 1;
15771 /* Record the decl. Will be used by rs6000_builtin_mask_for_load. */
15772 altivec_builtin_mask_for_load = decl;
15773
15774 /* Access to the vec_init patterns. */
15775 ftype = build_function_type_list (V4SI_type_node, integer_type_node,
15776 integer_type_node, integer_type_node,
15777 integer_type_node, NULL_TREE);
15778 def_builtin ("__builtin_vec_init_v4si", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SI);
15779
15780 ftype = build_function_type_list (V8HI_type_node, short_integer_type_node,
15781 short_integer_type_node,
15782 short_integer_type_node,
15783 short_integer_type_node,
15784 short_integer_type_node,
15785 short_integer_type_node,
15786 short_integer_type_node,
15787 short_integer_type_node, NULL_TREE);
15788 def_builtin ("__builtin_vec_init_v8hi", ftype, ALTIVEC_BUILTIN_VEC_INIT_V8HI);
15789
15790 ftype = build_function_type_list (V16QI_type_node, char_type_node,
15791 char_type_node, char_type_node,
15792 char_type_node, char_type_node,
15793 char_type_node, char_type_node,
15794 char_type_node, char_type_node,
15795 char_type_node, char_type_node,
15796 char_type_node, char_type_node,
15797 char_type_node, char_type_node,
15798 char_type_node, NULL_TREE);
15799 def_builtin ("__builtin_vec_init_v16qi", ftype,
15800 ALTIVEC_BUILTIN_VEC_INIT_V16QI);
15801
15802 ftype = build_function_type_list (V4SF_type_node, float_type_node,
15803 float_type_node, float_type_node,
15804 float_type_node, NULL_TREE);
15805 def_builtin ("__builtin_vec_init_v4sf", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SF);
15806
15807 /* VSX builtins. */
15808 ftype = build_function_type_list (V2DF_type_node, double_type_node,
15809 double_type_node, NULL_TREE);
15810 def_builtin ("__builtin_vec_init_v2df", ftype, VSX_BUILTIN_VEC_INIT_V2DF);
15811
15812 ftype = build_function_type_list (V2DI_type_node, intDI_type_node,
15813 intDI_type_node, NULL_TREE);
15814 def_builtin ("__builtin_vec_init_v2di", ftype, VSX_BUILTIN_VEC_INIT_V2DI);
15815
15816 /* Access to the vec_set patterns. */
15817 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
15818 intSI_type_node,
15819 integer_type_node, NULL_TREE);
15820 def_builtin ("__builtin_vec_set_v4si", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SI);
15821
15822 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
15823 intHI_type_node,
15824 integer_type_node, NULL_TREE);
15825 def_builtin ("__builtin_vec_set_v8hi", ftype, ALTIVEC_BUILTIN_VEC_SET_V8HI);
15826
15827 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
15828 intQI_type_node,
15829 integer_type_node, NULL_TREE);
15830 def_builtin ("__builtin_vec_set_v16qi", ftype, ALTIVEC_BUILTIN_VEC_SET_V16QI);
15831
15832 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
15833 float_type_node,
15834 integer_type_node, NULL_TREE);
15835 def_builtin ("__builtin_vec_set_v4sf", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SF);
15836
15837 ftype = build_function_type_list (V2DF_type_node, V2DF_type_node,
15838 double_type_node,
15839 integer_type_node, NULL_TREE);
15840 def_builtin ("__builtin_vec_set_v2df", ftype, VSX_BUILTIN_VEC_SET_V2DF);
15841
15842 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
15843 intDI_type_node,
15844 integer_type_node, NULL_TREE);
15845 def_builtin ("__builtin_vec_set_v2di", ftype, VSX_BUILTIN_VEC_SET_V2DI);
15846
15847 /* Access to the vec_extract patterns. */
15848 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
15849 integer_type_node, NULL_TREE);
15850 def_builtin ("__builtin_vec_ext_v4si", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SI);
15851
15852 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
15853 integer_type_node, NULL_TREE);
15854 def_builtin ("__builtin_vec_ext_v8hi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V8HI);
15855
15856 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
15857 integer_type_node, NULL_TREE);
15858 def_builtin ("__builtin_vec_ext_v16qi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V16QI);
15859
15860 ftype = build_function_type_list (float_type_node, V4SF_type_node,
15861 integer_type_node, NULL_TREE);
15862 def_builtin ("__builtin_vec_ext_v4sf", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SF);
15863
15864 ftype = build_function_type_list (double_type_node, V2DF_type_node,
15865 integer_type_node, NULL_TREE);
15866 def_builtin ("__builtin_vec_ext_v2df", ftype, VSX_BUILTIN_VEC_EXT_V2DF);
15867
15868 ftype = build_function_type_list (intDI_type_node, V2DI_type_node,
15869 integer_type_node, NULL_TREE);
15870 def_builtin ("__builtin_vec_ext_v2di", ftype, VSX_BUILTIN_VEC_EXT_V2DI);
15871
15872
15873 if (V1TI_type_node)
15874 {
15875 tree v1ti_ftype_long_pcvoid
15876 = build_function_type_list (V1TI_type_node,
15877 long_integer_type_node, pcvoid_type_node,
15878 NULL_TREE);
15879 tree void_ftype_v1ti_long_pvoid
15880 = build_function_type_list (void_type_node,
15881 V1TI_type_node, long_integer_type_node,
15882 pvoid_type_node, NULL_TREE);
15883 def_builtin ("__builtin_vsx_lxvd2x_v1ti", v1ti_ftype_long_pcvoid,
15884 VSX_BUILTIN_LXVD2X_V1TI);
15885 def_builtin ("__builtin_vsx_stxvd2x_v1ti", void_ftype_v1ti_long_pvoid,
15886 VSX_BUILTIN_STXVD2X_V1TI);
15887 ftype = build_function_type_list (V1TI_type_node, intTI_type_node,
15888 NULL_TREE, NULL_TREE);
15889 def_builtin ("__builtin_vec_init_v1ti", ftype, VSX_BUILTIN_VEC_INIT_V1TI);
15890 ftype = build_function_type_list (V1TI_type_node, V1TI_type_node,
15891 intTI_type_node,
15892 integer_type_node, NULL_TREE);
15893 def_builtin ("__builtin_vec_set_v1ti", ftype, VSX_BUILTIN_VEC_SET_V1TI);
15894 ftype = build_function_type_list (intTI_type_node, V1TI_type_node,
15895 integer_type_node, NULL_TREE);
15896 def_builtin ("__builtin_vec_ext_v1ti", ftype, VSX_BUILTIN_VEC_EXT_V1TI);
15897 }
15898
15899 }
15900
15901 static void
15902 htm_init_builtins (void)
15903 {
15904 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
15905 const struct builtin_description *d;
15906 size_t i;
15907
15908 d = bdesc_htm;
15909 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
15910 {
15911 tree op[MAX_HTM_OPERANDS], type;
15912 HOST_WIDE_INT mask = d->mask;
15913 unsigned attr = rs6000_builtin_info[d->code].attr;
15914 bool void_func = (attr & RS6000_BTC_VOID);
15915 int attr_args = (attr & RS6000_BTC_TYPE_MASK);
15916 int nopnds = 0;
15917 tree gpr_type_node;
15918 tree rettype;
15919 tree argtype;
15920
15921 if (TARGET_32BIT && TARGET_POWERPC64)
15922 gpr_type_node = long_long_unsigned_type_node;
15923 else
15924 gpr_type_node = long_unsigned_type_node;
15925
15926 if (attr & RS6000_BTC_SPR)
15927 {
15928 rettype = gpr_type_node;
15929 argtype = gpr_type_node;
15930 }
15931 else if (d->code == HTM_BUILTIN_TABORTDC
15932 || d->code == HTM_BUILTIN_TABORTDCI)
15933 {
15934 rettype = unsigned_type_node;
15935 argtype = gpr_type_node;
15936 }
15937 else
15938 {
15939 rettype = unsigned_type_node;
15940 argtype = unsigned_type_node;
15941 }
15942
15943 if ((mask & builtin_mask) != mask)
15944 {
15945 if (TARGET_DEBUG_BUILTIN)
15946 fprintf (stderr, "htm_builtin, skip binary %s\n", d->name);
15947 continue;
15948 }
15949
15950 if (d->name == 0)
15951 {
15952 if (TARGET_DEBUG_BUILTIN)
15953 fprintf (stderr, "htm_builtin, bdesc_htm[%ld] no name\n",
15954 (long unsigned) i);
15955 continue;
15956 }
15957
15958 op[nopnds++] = (void_func) ? void_type_node : rettype;
15959
15960 if (attr_args == RS6000_BTC_UNARY)
15961 op[nopnds++] = argtype;
15962 else if (attr_args == RS6000_BTC_BINARY)
15963 {
15964 op[nopnds++] = argtype;
15965 op[nopnds++] = argtype;
15966 }
15967 else if (attr_args == RS6000_BTC_TERNARY)
15968 {
15969 op[nopnds++] = argtype;
15970 op[nopnds++] = argtype;
15971 op[nopnds++] = argtype;
15972 }
15973
15974 switch (nopnds)
15975 {
15976 case 1:
15977 type = build_function_type_list (op[0], NULL_TREE);
15978 break;
15979 case 2:
15980 type = build_function_type_list (op[0], op[1], NULL_TREE);
15981 break;
15982 case 3:
15983 type = build_function_type_list (op[0], op[1], op[2], NULL_TREE);
15984 break;
15985 case 4:
15986 type = build_function_type_list (op[0], op[1], op[2], op[3],
15987 NULL_TREE);
15988 break;
15989 default:
15990 gcc_unreachable ();
15991 }
15992
15993 def_builtin (d->name, type, d->code);
15994 }
15995 }
15996
15997 /* Hash function for builtin functions with up to 3 arguments and a return
15998 type. */
15999 hashval_t
16000 builtin_hasher::hash (builtin_hash_struct *bh)
16001 {
16002 unsigned ret = 0;
16003 int i;
16004
16005 for (i = 0; i < 4; i++)
16006 {
16007 ret = (ret * (unsigned)MAX_MACHINE_MODE) + ((unsigned)bh->mode[i]);
16008 ret = (ret * 2) + bh->uns_p[i];
16009 }
16010
16011 return ret;
16012 }
16013
16014 /* Compare builtin hash entries H1 and H2 for equivalence. */
16015 bool
16016 builtin_hasher::equal (builtin_hash_struct *p1, builtin_hash_struct *p2)
16017 {
16018 return ((p1->mode[0] == p2->mode[0])
16019 && (p1->mode[1] == p2->mode[1])
16020 && (p1->mode[2] == p2->mode[2])
16021 && (p1->mode[3] == p2->mode[3])
16022 && (p1->uns_p[0] == p2->uns_p[0])
16023 && (p1->uns_p[1] == p2->uns_p[1])
16024 && (p1->uns_p[2] == p2->uns_p[2])
16025 && (p1->uns_p[3] == p2->uns_p[3]));
16026 }
16027
16028 /* Map types for builtin functions with an explicit return type and up to 3
16029 arguments. Functions with fewer than 3 arguments use VOIDmode as the type
16030 of the argument. */
16031 static tree
16032 builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0,
16033 machine_mode mode_arg1, machine_mode mode_arg2,
16034 enum rs6000_builtins builtin, const char *name)
16035 {
16036 struct builtin_hash_struct h;
16037 struct builtin_hash_struct *h2;
16038 int num_args = 3;
16039 int i;
16040 tree ret_type = NULL_TREE;
16041 tree arg_type[3] = { NULL_TREE, NULL_TREE, NULL_TREE };
16042
16043 /* Create builtin_hash_table. */
16044 if (builtin_hash_table == NULL)
16045 builtin_hash_table = hash_table<builtin_hasher>::create_ggc (1500);
16046
16047 h.type = NULL_TREE;
16048 h.mode[0] = mode_ret;
16049 h.mode[1] = mode_arg0;
16050 h.mode[2] = mode_arg1;
16051 h.mode[3] = mode_arg2;
16052 h.uns_p[0] = 0;
16053 h.uns_p[1] = 0;
16054 h.uns_p[2] = 0;
16055 h.uns_p[3] = 0;
16056
16057 /* If the builtin is a type that produces unsigned results or takes unsigned
16058 arguments, and it is returned as a decl for the vectorizer (such as
16059 widening multiplies, permute), make sure the arguments and return value
16060 are type correct. */
16061 switch (builtin)
16062 {
16063 /* unsigned 1 argument functions. */
16064 case CRYPTO_BUILTIN_VSBOX:
16065 case P8V_BUILTIN_VGBBD:
16066 case MISC_BUILTIN_CDTBCD:
16067 case MISC_BUILTIN_CBCDTD:
16068 h.uns_p[0] = 1;
16069 h.uns_p[1] = 1;
16070 break;
16071
16072 /* unsigned 2 argument functions. */
16073 case ALTIVEC_BUILTIN_VMULEUB_UNS:
16074 case ALTIVEC_BUILTIN_VMULEUH_UNS:
16075 case ALTIVEC_BUILTIN_VMULOUB_UNS:
16076 case ALTIVEC_BUILTIN_VMULOUH_UNS:
16077 case CRYPTO_BUILTIN_VCIPHER:
16078 case CRYPTO_BUILTIN_VCIPHERLAST:
16079 case CRYPTO_BUILTIN_VNCIPHER:
16080 case CRYPTO_BUILTIN_VNCIPHERLAST:
16081 case CRYPTO_BUILTIN_VPMSUMB:
16082 case CRYPTO_BUILTIN_VPMSUMH:
16083 case CRYPTO_BUILTIN_VPMSUMW:
16084 case CRYPTO_BUILTIN_VPMSUMD:
16085 case CRYPTO_BUILTIN_VPMSUM:
16086 case MISC_BUILTIN_ADDG6S:
16087 case MISC_BUILTIN_DIVWEU:
16088 case MISC_BUILTIN_DIVWEUO:
16089 case MISC_BUILTIN_DIVDEU:
16090 case MISC_BUILTIN_DIVDEUO:
16091 h.uns_p[0] = 1;
16092 h.uns_p[1] = 1;
16093 h.uns_p[2] = 1;
16094 break;
16095
16096 /* unsigned 3 argument functions. */
16097 case ALTIVEC_BUILTIN_VPERM_16QI_UNS:
16098 case ALTIVEC_BUILTIN_VPERM_8HI_UNS:
16099 case ALTIVEC_BUILTIN_VPERM_4SI_UNS:
16100 case ALTIVEC_BUILTIN_VPERM_2DI_UNS:
16101 case ALTIVEC_BUILTIN_VSEL_16QI_UNS:
16102 case ALTIVEC_BUILTIN_VSEL_8HI_UNS:
16103 case ALTIVEC_BUILTIN_VSEL_4SI_UNS:
16104 case ALTIVEC_BUILTIN_VSEL_2DI_UNS:
16105 case VSX_BUILTIN_VPERM_16QI_UNS:
16106 case VSX_BUILTIN_VPERM_8HI_UNS:
16107 case VSX_BUILTIN_VPERM_4SI_UNS:
16108 case VSX_BUILTIN_VPERM_2DI_UNS:
16109 case VSX_BUILTIN_XXSEL_16QI_UNS:
16110 case VSX_BUILTIN_XXSEL_8HI_UNS:
16111 case VSX_BUILTIN_XXSEL_4SI_UNS:
16112 case VSX_BUILTIN_XXSEL_2DI_UNS:
16113 case CRYPTO_BUILTIN_VPERMXOR:
16114 case CRYPTO_BUILTIN_VPERMXOR_V2DI:
16115 case CRYPTO_BUILTIN_VPERMXOR_V4SI:
16116 case CRYPTO_BUILTIN_VPERMXOR_V8HI:
16117 case CRYPTO_BUILTIN_VPERMXOR_V16QI:
16118 case CRYPTO_BUILTIN_VSHASIGMAW:
16119 case CRYPTO_BUILTIN_VSHASIGMAD:
16120 case CRYPTO_BUILTIN_VSHASIGMA:
16121 h.uns_p[0] = 1;
16122 h.uns_p[1] = 1;
16123 h.uns_p[2] = 1;
16124 h.uns_p[3] = 1;
16125 break;
16126
16127 /* signed permute functions with unsigned char mask. */
16128 case ALTIVEC_BUILTIN_VPERM_16QI:
16129 case ALTIVEC_BUILTIN_VPERM_8HI:
16130 case ALTIVEC_BUILTIN_VPERM_4SI:
16131 case ALTIVEC_BUILTIN_VPERM_4SF:
16132 case ALTIVEC_BUILTIN_VPERM_2DI:
16133 case ALTIVEC_BUILTIN_VPERM_2DF:
16134 case VSX_BUILTIN_VPERM_16QI:
16135 case VSX_BUILTIN_VPERM_8HI:
16136 case VSX_BUILTIN_VPERM_4SI:
16137 case VSX_BUILTIN_VPERM_4SF:
16138 case VSX_BUILTIN_VPERM_2DI:
16139 case VSX_BUILTIN_VPERM_2DF:
16140 h.uns_p[3] = 1;
16141 break;
16142
16143 /* unsigned args, signed return. */
16144 case VSX_BUILTIN_XVCVUXDDP_UNS:
16145 case ALTIVEC_BUILTIN_UNSFLOAT_V4SI_V4SF:
16146 h.uns_p[1] = 1;
16147 break;
16148
16149 /* signed args, unsigned return. */
16150 case VSX_BUILTIN_XVCVDPUXDS_UNS:
16151 case ALTIVEC_BUILTIN_FIXUNS_V4SF_V4SI:
16152 case MISC_BUILTIN_UNPACK_TD:
16153 case MISC_BUILTIN_UNPACK_V1TI:
16154 h.uns_p[0] = 1;
16155 break;
16156
16157 /* unsigned arguments for 128-bit pack instructions. */
16158 case MISC_BUILTIN_PACK_TD:
16159 case MISC_BUILTIN_PACK_V1TI:
16160 h.uns_p[1] = 1;
16161 h.uns_p[2] = 1;
16162 break;
16163
16164 default:
16165 break;
16166 }
16167
16168 /* Figure out how many args are present. */
16169 while (num_args > 0 && h.mode[num_args] == VOIDmode)
16170 num_args--;
16171
16172 if (num_args == 0)
16173 fatal_error (input_location,
16174 "internal error: builtin function %s had no type", name);
16175
16176 ret_type = builtin_mode_to_type[h.mode[0]][h.uns_p[0]];
16177 if (!ret_type && h.uns_p[0])
16178 ret_type = builtin_mode_to_type[h.mode[0]][0];
16179
16180 if (!ret_type)
16181 fatal_error (input_location,
16182 "internal error: builtin function %s had an unexpected "
16183 "return type %s", name, GET_MODE_NAME (h.mode[0]));
16184
16185 for (i = 0; i < (int) ARRAY_SIZE (arg_type); i++)
16186 arg_type[i] = NULL_TREE;
16187
16188 for (i = 0; i < num_args; i++)
16189 {
16190 int m = (int) h.mode[i+1];
16191 int uns_p = h.uns_p[i+1];
16192
16193 arg_type[i] = builtin_mode_to_type[m][uns_p];
16194 if (!arg_type[i] && uns_p)
16195 arg_type[i] = builtin_mode_to_type[m][0];
16196
16197 if (!arg_type[i])
16198 fatal_error (input_location,
16199 "internal error: builtin function %s, argument %d "
16200 "had unexpected argument type %s", name, i,
16201 GET_MODE_NAME (m));
16202 }
16203
16204 builtin_hash_struct **found = builtin_hash_table->find_slot (&h, INSERT);
16205 if (*found == NULL)
16206 {
16207 h2 = ggc_alloc<builtin_hash_struct> ();
16208 *h2 = h;
16209 *found = h2;
16210
16211 h2->type = build_function_type_list (ret_type, arg_type[0], arg_type[1],
16212 arg_type[2], NULL_TREE);
16213 }
16214
16215 return (*found)->type;
16216 }
16217
16218 static void
16219 rs6000_common_init_builtins (void)
16220 {
16221 const struct builtin_description *d;
16222 size_t i;
16223
16224 tree opaque_ftype_opaque = NULL_TREE;
16225 tree opaque_ftype_opaque_opaque = NULL_TREE;
16226 tree opaque_ftype_opaque_opaque_opaque = NULL_TREE;
16227 tree v2si_ftype_qi = NULL_TREE;
16228 tree v2si_ftype_v2si_qi = NULL_TREE;
16229 tree v2si_ftype_int_qi = NULL_TREE;
16230 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
16231
16232 if (!TARGET_PAIRED_FLOAT)
16233 {
16234 builtin_mode_to_type[V2SImode][0] = opaque_V2SI_type_node;
16235 builtin_mode_to_type[V2SFmode][0] = opaque_V2SF_type_node;
16236 }
16237
16238 /* Paired and SPE builtins are only available if you build a compiler with
16239 the appropriate options, so only create those builtins with the
16240 appropriate compiler option. Create Altivec and VSX builtins on machines
16241 with at least the general purpose extensions (970 and newer) to allow the
16242 use of the target attribute.. */
16243
16244 if (TARGET_EXTRA_BUILTINS)
16245 builtin_mask |= RS6000_BTM_COMMON;
16246
16247 /* Add the ternary operators. */
16248 d = bdesc_3arg;
16249 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
16250 {
16251 tree type;
16252 HOST_WIDE_INT mask = d->mask;
16253
16254 if ((mask & builtin_mask) != mask)
16255 {
16256 if (TARGET_DEBUG_BUILTIN)
16257 fprintf (stderr, "rs6000_builtin, skip ternary %s\n", d->name);
16258 continue;
16259 }
16260
16261 if (rs6000_overloaded_builtin_p (d->code))
16262 {
16263 if (! (type = opaque_ftype_opaque_opaque_opaque))
16264 type = opaque_ftype_opaque_opaque_opaque
16265 = build_function_type_list (opaque_V4SI_type_node,
16266 opaque_V4SI_type_node,
16267 opaque_V4SI_type_node,
16268 opaque_V4SI_type_node,
16269 NULL_TREE);
16270 }
16271 else
16272 {
16273 enum insn_code icode = d->icode;
16274 if (d->name == 0)
16275 {
16276 if (TARGET_DEBUG_BUILTIN)
16277 fprintf (stderr, "rs6000_builtin, bdesc_3arg[%ld] no name\n",
16278 (long unsigned)i);
16279
16280 continue;
16281 }
16282
16283 if (icode == CODE_FOR_nothing)
16284 {
16285 if (TARGET_DEBUG_BUILTIN)
16286 fprintf (stderr, "rs6000_builtin, skip ternary %s (no code)\n",
16287 d->name);
16288
16289 continue;
16290 }
16291
16292 type = builtin_function_type (insn_data[icode].operand[0].mode,
16293 insn_data[icode].operand[1].mode,
16294 insn_data[icode].operand[2].mode,
16295 insn_data[icode].operand[3].mode,
16296 d->code, d->name);
16297 }
16298
16299 def_builtin (d->name, type, d->code);
16300 }
16301
16302 /* Add the binary operators. */
16303 d = bdesc_2arg;
16304 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16305 {
16306 machine_mode mode0, mode1, mode2;
16307 tree type;
16308 HOST_WIDE_INT mask = d->mask;
16309
16310 if ((mask & builtin_mask) != mask)
16311 {
16312 if (TARGET_DEBUG_BUILTIN)
16313 fprintf (stderr, "rs6000_builtin, skip binary %s\n", d->name);
16314 continue;
16315 }
16316
16317 if (rs6000_overloaded_builtin_p (d->code))
16318 {
16319 if (! (type = opaque_ftype_opaque_opaque))
16320 type = opaque_ftype_opaque_opaque
16321 = build_function_type_list (opaque_V4SI_type_node,
16322 opaque_V4SI_type_node,
16323 opaque_V4SI_type_node,
16324 NULL_TREE);
16325 }
16326 else
16327 {
16328 enum insn_code icode = d->icode;
16329 if (d->name == 0)
16330 {
16331 if (TARGET_DEBUG_BUILTIN)
16332 fprintf (stderr, "rs6000_builtin, bdesc_2arg[%ld] no name\n",
16333 (long unsigned)i);
16334
16335 continue;
16336 }
16337
16338 if (icode == CODE_FOR_nothing)
16339 {
16340 if (TARGET_DEBUG_BUILTIN)
16341 fprintf (stderr, "rs6000_builtin, skip binary %s (no code)\n",
16342 d->name);
16343
16344 continue;
16345 }
16346
16347 mode0 = insn_data[icode].operand[0].mode;
16348 mode1 = insn_data[icode].operand[1].mode;
16349 mode2 = insn_data[icode].operand[2].mode;
16350
16351 if (mode0 == V2SImode && mode1 == V2SImode && mode2 == QImode)
16352 {
16353 if (! (type = v2si_ftype_v2si_qi))
16354 type = v2si_ftype_v2si_qi
16355 = build_function_type_list (opaque_V2SI_type_node,
16356 opaque_V2SI_type_node,
16357 char_type_node,
16358 NULL_TREE);
16359 }
16360
16361 else if (mode0 == V2SImode && GET_MODE_CLASS (mode1) == MODE_INT
16362 && mode2 == QImode)
16363 {
16364 if (! (type = v2si_ftype_int_qi))
16365 type = v2si_ftype_int_qi
16366 = build_function_type_list (opaque_V2SI_type_node,
16367 integer_type_node,
16368 char_type_node,
16369 NULL_TREE);
16370 }
16371
16372 else
16373 type = builtin_function_type (mode0, mode1, mode2, VOIDmode,
16374 d->code, d->name);
16375 }
16376
16377 def_builtin (d->name, type, d->code);
16378 }
16379
16380 /* Add the simple unary operators. */
16381 d = bdesc_1arg;
16382 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16383 {
16384 machine_mode mode0, mode1;
16385 tree type;
16386 HOST_WIDE_INT mask = d->mask;
16387
16388 if ((mask & builtin_mask) != mask)
16389 {
16390 if (TARGET_DEBUG_BUILTIN)
16391 fprintf (stderr, "rs6000_builtin, skip unary %s\n", d->name);
16392 continue;
16393 }
16394
16395 if (rs6000_overloaded_builtin_p (d->code))
16396 {
16397 if (! (type = opaque_ftype_opaque))
16398 type = opaque_ftype_opaque
16399 = build_function_type_list (opaque_V4SI_type_node,
16400 opaque_V4SI_type_node,
16401 NULL_TREE);
16402 }
16403 else
16404 {
16405 enum insn_code icode = d->icode;
16406 if (d->name == 0)
16407 {
16408 if (TARGET_DEBUG_BUILTIN)
16409 fprintf (stderr, "rs6000_builtin, bdesc_1arg[%ld] no name\n",
16410 (long unsigned)i);
16411
16412 continue;
16413 }
16414
16415 if (icode == CODE_FOR_nothing)
16416 {
16417 if (TARGET_DEBUG_BUILTIN)
16418 fprintf (stderr, "rs6000_builtin, skip unary %s (no code)\n",
16419 d->name);
16420
16421 continue;
16422 }
16423
16424 mode0 = insn_data[icode].operand[0].mode;
16425 mode1 = insn_data[icode].operand[1].mode;
16426
16427 if (mode0 == V2SImode && mode1 == QImode)
16428 {
16429 if (! (type = v2si_ftype_qi))
16430 type = v2si_ftype_qi
16431 = build_function_type_list (opaque_V2SI_type_node,
16432 char_type_node,
16433 NULL_TREE);
16434 }
16435
16436 else
16437 type = builtin_function_type (mode0, mode1, VOIDmode, VOIDmode,
16438 d->code, d->name);
16439 }
16440
16441 def_builtin (d->name, type, d->code);
16442 }
16443 }
16444
16445 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
16446 static void
16447 init_float128_ibm (machine_mode mode)
16448 {
16449 if (!TARGET_XL_COMPAT)
16450 {
16451 set_optab_libfunc (add_optab, mode, "__gcc_qadd");
16452 set_optab_libfunc (sub_optab, mode, "__gcc_qsub");
16453 set_optab_libfunc (smul_optab, mode, "__gcc_qmul");
16454 set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv");
16455
16456 if (!(TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE)))
16457 {
16458 set_optab_libfunc (neg_optab, mode, "__gcc_qneg");
16459 set_optab_libfunc (eq_optab, mode, "__gcc_qeq");
16460 set_optab_libfunc (ne_optab, mode, "__gcc_qne");
16461 set_optab_libfunc (gt_optab, mode, "__gcc_qgt");
16462 set_optab_libfunc (ge_optab, mode, "__gcc_qge");
16463 set_optab_libfunc (lt_optab, mode, "__gcc_qlt");
16464 set_optab_libfunc (le_optab, mode, "__gcc_qle");
16465
16466 set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq");
16467 set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq");
16468 set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos");
16469 set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod");
16470 set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi");
16471 set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou");
16472 set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq");
16473 set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq");
16474 }
16475
16476 if (!(TARGET_HARD_FLOAT && TARGET_FPRS))
16477 set_optab_libfunc (unord_optab, mode, "__gcc_qunord");
16478 }
16479 else
16480 {
16481 set_optab_libfunc (add_optab, mode, "_xlqadd");
16482 set_optab_libfunc (sub_optab, mode, "_xlqsub");
16483 set_optab_libfunc (smul_optab, mode, "_xlqmul");
16484 set_optab_libfunc (sdiv_optab, mode, "_xlqdiv");
16485 }
16486
16487 /* Add various conversions for IFmode to use the traditional TFmode
16488 names. */
16489 if (mode == IFmode)
16490 {
16491 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf2");
16492 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf2");
16493 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctftd2");
16494 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd2");
16495 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd2");
16496 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtdtf2");
16497
16498 if (TARGET_POWERPC64)
16499 {
16500 set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti");
16501 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti");
16502 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf");
16503 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf");
16504 }
16505 }
16506 }
16507
16508 /* Set up IEEE 128-bit floating point routines. Use different names if the
16509 arguments can be passed in a vector register. The historical PowerPC
16510 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
16511 continue to use that if we aren't using vector registers to pass IEEE
16512 128-bit floating point. */
16513
16514 static void
16515 init_float128_ieee (machine_mode mode)
16516 {
16517 if (FLOAT128_VECTOR_P (mode))
16518 {
16519 set_optab_libfunc (add_optab, mode, "__addkf3");
16520 set_optab_libfunc (sub_optab, mode, "__subkf3");
16521 set_optab_libfunc (neg_optab, mode, "__negkf2");
16522 set_optab_libfunc (smul_optab, mode, "__mulkf3");
16523 set_optab_libfunc (sdiv_optab, mode, "__divkf3");
16524 set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2");
16525 set_optab_libfunc (abs_optab, mode, "__abstkf2");
16526
16527 set_optab_libfunc (eq_optab, mode, "__eqkf2");
16528 set_optab_libfunc (ne_optab, mode, "__nekf2");
16529 set_optab_libfunc (gt_optab, mode, "__gtkf2");
16530 set_optab_libfunc (ge_optab, mode, "__gekf2");
16531 set_optab_libfunc (lt_optab, mode, "__ltkf2");
16532 set_optab_libfunc (le_optab, mode, "__lekf2");
16533 set_optab_libfunc (unord_optab, mode, "__unordkf2");
16534 set_optab_libfunc (cmp_optab, mode, "__cmpokf2"); /* fcmpo */
16535 set_optab_libfunc (ucmp_optab, mode, "__cmpukf2"); /* fcmpu */
16536
16537 set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2");
16538 set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2");
16539 set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2");
16540 set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2");
16541
16542 set_conv_libfunc (sext_optab, mode, IFmode, "__extendtfkf2");
16543 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
16544 set_conv_libfunc (sext_optab, mode, TFmode, "__extendtfkf2");
16545
16546 set_conv_libfunc (trunc_optab, IFmode, mode, "__trunckftf2");
16547 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
16548 set_conv_libfunc (trunc_optab, TFmode, mode, "__trunckftf2");
16549
16550 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf2");
16551 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf2");
16552 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunckftd2");
16553 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd2");
16554 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd2");
16555 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtdkf2");
16556
16557 set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi");
16558 set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi");
16559 set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi");
16560 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi");
16561
16562 set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf");
16563 set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf");
16564 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf");
16565 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf");
16566
16567 if (TARGET_POWERPC64)
16568 {
16569 set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti");
16570 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti");
16571 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf");
16572 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf");
16573 }
16574 }
16575
16576 else
16577 {
16578 set_optab_libfunc (add_optab, mode, "_q_add");
16579 set_optab_libfunc (sub_optab, mode, "_q_sub");
16580 set_optab_libfunc (neg_optab, mode, "_q_neg");
16581 set_optab_libfunc (smul_optab, mode, "_q_mul");
16582 set_optab_libfunc (sdiv_optab, mode, "_q_div");
16583 if (TARGET_PPC_GPOPT)
16584 set_optab_libfunc (sqrt_optab, mode, "_q_sqrt");
16585
16586 set_optab_libfunc (eq_optab, mode, "_q_feq");
16587 set_optab_libfunc (ne_optab, mode, "_q_fne");
16588 set_optab_libfunc (gt_optab, mode, "_q_fgt");
16589 set_optab_libfunc (ge_optab, mode, "_q_fge");
16590 set_optab_libfunc (lt_optab, mode, "_q_flt");
16591 set_optab_libfunc (le_optab, mode, "_q_fle");
16592
16593 set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq");
16594 set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq");
16595 set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos");
16596 set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod");
16597 set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi");
16598 set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou");
16599 set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq");
16600 set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq");
16601 }
16602 }
16603
16604 static void
16605 rs6000_init_libfuncs (void)
16606 {
16607 /* __float128 support. */
16608 if (TARGET_FLOAT128)
16609 {
16610 init_float128_ibm (IFmode);
16611 init_float128_ieee (KFmode);
16612 }
16613
16614 /* AIX/Darwin/64-bit Linux quad floating point routines. */
16615 if (TARGET_LONG_DOUBLE_128)
16616 {
16617 if (!TARGET_IEEEQUAD)
16618 init_float128_ibm (TFmode);
16619
16620 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
16621 else
16622 init_float128_ieee (TFmode);
16623 }
16624 }
16625
16626 \f
16627 /* Expand a block clear operation, and return 1 if successful. Return 0
16628 if we should let the compiler generate normal code.
16629
16630 operands[0] is the destination
16631 operands[1] is the length
16632 operands[3] is the alignment */
16633
16634 int
16635 expand_block_clear (rtx operands[])
16636 {
16637 rtx orig_dest = operands[0];
16638 rtx bytes_rtx = operands[1];
16639 rtx align_rtx = operands[3];
16640 bool constp = (GET_CODE (bytes_rtx) == CONST_INT);
16641 HOST_WIDE_INT align;
16642 HOST_WIDE_INT bytes;
16643 int offset;
16644 int clear_bytes;
16645 int clear_step;
16646
16647 /* If this is not a fixed size move, just call memcpy */
16648 if (! constp)
16649 return 0;
16650
16651 /* This must be a fixed size alignment */
16652 gcc_assert (GET_CODE (align_rtx) == CONST_INT);
16653 align = INTVAL (align_rtx) * BITS_PER_UNIT;
16654
16655 /* Anything to clear? */
16656 bytes = INTVAL (bytes_rtx);
16657 if (bytes <= 0)
16658 return 1;
16659
16660 /* Use the builtin memset after a point, to avoid huge code bloat.
16661 When optimize_size, avoid any significant code bloat; calling
16662 memset is about 4 instructions, so allow for one instruction to
16663 load zero and three to do clearing. */
16664 if (TARGET_ALTIVEC && align >= 128)
16665 clear_step = 16;
16666 else if (TARGET_POWERPC64 && (align >= 64 || !STRICT_ALIGNMENT))
16667 clear_step = 8;
16668 else if (TARGET_SPE && align >= 64)
16669 clear_step = 8;
16670 else
16671 clear_step = 4;
16672
16673 if (optimize_size && bytes > 3 * clear_step)
16674 return 0;
16675 if (! optimize_size && bytes > 8 * clear_step)
16676 return 0;
16677
16678 for (offset = 0; bytes > 0; offset += clear_bytes, bytes -= clear_bytes)
16679 {
16680 machine_mode mode = BLKmode;
16681 rtx dest;
16682
16683 if (bytes >= 16 && TARGET_ALTIVEC && align >= 128)
16684 {
16685 clear_bytes = 16;
16686 mode = V4SImode;
16687 }
16688 else if (bytes >= 8 && TARGET_SPE && align >= 64)
16689 {
16690 clear_bytes = 8;
16691 mode = V2SImode;
16692 }
16693 else if (bytes >= 8 && TARGET_POWERPC64
16694 && (align >= 64 || !STRICT_ALIGNMENT))
16695 {
16696 clear_bytes = 8;
16697 mode = DImode;
16698 if (offset == 0 && align < 64)
16699 {
16700 rtx addr;
16701
16702 /* If the address form is reg+offset with offset not a
16703 multiple of four, reload into reg indirect form here
16704 rather than waiting for reload. This way we get one
16705 reload, not one per store. */
16706 addr = XEXP (orig_dest, 0);
16707 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
16708 && GET_CODE (XEXP (addr, 1)) == CONST_INT
16709 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
16710 {
16711 addr = copy_addr_to_reg (addr);
16712 orig_dest = replace_equiv_address (orig_dest, addr);
16713 }
16714 }
16715 }
16716 else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
16717 { /* move 4 bytes */
16718 clear_bytes = 4;
16719 mode = SImode;
16720 }
16721 else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
16722 { /* move 2 bytes */
16723 clear_bytes = 2;
16724 mode = HImode;
16725 }
16726 else /* move 1 byte at a time */
16727 {
16728 clear_bytes = 1;
16729 mode = QImode;
16730 }
16731
16732 dest = adjust_address (orig_dest, mode, offset);
16733
16734 emit_move_insn (dest, CONST0_RTX (mode));
16735 }
16736
16737 return 1;
16738 }
16739
16740 \f
16741 /* Expand a block move operation, and return 1 if successful. Return 0
16742 if we should let the compiler generate normal code.
16743
16744 operands[0] is the destination
16745 operands[1] is the source
16746 operands[2] is the length
16747 operands[3] is the alignment */
16748
16749 #define MAX_MOVE_REG 4
16750
16751 int
16752 expand_block_move (rtx operands[])
16753 {
16754 rtx orig_dest = operands[0];
16755 rtx orig_src = operands[1];
16756 rtx bytes_rtx = operands[2];
16757 rtx align_rtx = operands[3];
16758 int constp = (GET_CODE (bytes_rtx) == CONST_INT);
16759 int align;
16760 int bytes;
16761 int offset;
16762 int move_bytes;
16763 rtx stores[MAX_MOVE_REG];
16764 int num_reg = 0;
16765
16766 /* If this is not a fixed size move, just call memcpy */
16767 if (! constp)
16768 return 0;
16769
16770 /* This must be a fixed size alignment */
16771 gcc_assert (GET_CODE (align_rtx) == CONST_INT);
16772 align = INTVAL (align_rtx) * BITS_PER_UNIT;
16773
16774 /* Anything to move? */
16775 bytes = INTVAL (bytes_rtx);
16776 if (bytes <= 0)
16777 return 1;
16778
16779 if (bytes > rs6000_block_move_inline_limit)
16780 return 0;
16781
16782 for (offset = 0; bytes > 0; offset += move_bytes, bytes -= move_bytes)
16783 {
16784 union {
16785 rtx (*movmemsi) (rtx, rtx, rtx, rtx);
16786 rtx (*mov) (rtx, rtx);
16787 } gen_func;
16788 machine_mode mode = BLKmode;
16789 rtx src, dest;
16790
16791 /* Altivec first, since it will be faster than a string move
16792 when it applies, and usually not significantly larger. */
16793 if (TARGET_ALTIVEC && bytes >= 16 && align >= 128)
16794 {
16795 move_bytes = 16;
16796 mode = V4SImode;
16797 gen_func.mov = gen_movv4si;
16798 }
16799 else if (TARGET_SPE && bytes >= 8 && align >= 64)
16800 {
16801 move_bytes = 8;
16802 mode = V2SImode;
16803 gen_func.mov = gen_movv2si;
16804 }
16805 else if (TARGET_STRING
16806 && bytes > 24 /* move up to 32 bytes at a time */
16807 && ! fixed_regs[5]
16808 && ! fixed_regs[6]
16809 && ! fixed_regs[7]
16810 && ! fixed_regs[8]
16811 && ! fixed_regs[9]
16812 && ! fixed_regs[10]
16813 && ! fixed_regs[11]
16814 && ! fixed_regs[12])
16815 {
16816 move_bytes = (bytes > 32) ? 32 : bytes;
16817 gen_func.movmemsi = gen_movmemsi_8reg;
16818 }
16819 else if (TARGET_STRING
16820 && bytes > 16 /* move up to 24 bytes at a time */
16821 && ! fixed_regs[5]
16822 && ! fixed_regs[6]
16823 && ! fixed_regs[7]
16824 && ! fixed_regs[8]
16825 && ! fixed_regs[9]
16826 && ! fixed_regs[10])
16827 {
16828 move_bytes = (bytes > 24) ? 24 : bytes;
16829 gen_func.movmemsi = gen_movmemsi_6reg;
16830 }
16831 else if (TARGET_STRING
16832 && bytes > 8 /* move up to 16 bytes at a time */
16833 && ! fixed_regs[5]
16834 && ! fixed_regs[6]
16835 && ! fixed_regs[7]
16836 && ! fixed_regs[8])
16837 {
16838 move_bytes = (bytes > 16) ? 16 : bytes;
16839 gen_func.movmemsi = gen_movmemsi_4reg;
16840 }
16841 else if (bytes >= 8 && TARGET_POWERPC64
16842 && (align >= 64 || !STRICT_ALIGNMENT))
16843 {
16844 move_bytes = 8;
16845 mode = DImode;
16846 gen_func.mov = gen_movdi;
16847 if (offset == 0 && align < 64)
16848 {
16849 rtx addr;
16850
16851 /* If the address form is reg+offset with offset not a
16852 multiple of four, reload into reg indirect form here
16853 rather than waiting for reload. This way we get one
16854 reload, not one per load and/or store. */
16855 addr = XEXP (orig_dest, 0);
16856 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
16857 && GET_CODE (XEXP (addr, 1)) == CONST_INT
16858 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
16859 {
16860 addr = copy_addr_to_reg (addr);
16861 orig_dest = replace_equiv_address (orig_dest, addr);
16862 }
16863 addr = XEXP (orig_src, 0);
16864 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
16865 && GET_CODE (XEXP (addr, 1)) == CONST_INT
16866 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
16867 {
16868 addr = copy_addr_to_reg (addr);
16869 orig_src = replace_equiv_address (orig_src, addr);
16870 }
16871 }
16872 }
16873 else if (TARGET_STRING && bytes > 4 && !TARGET_POWERPC64)
16874 { /* move up to 8 bytes at a time */
16875 move_bytes = (bytes > 8) ? 8 : bytes;
16876 gen_func.movmemsi = gen_movmemsi_2reg;
16877 }
16878 else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
16879 { /* move 4 bytes */
16880 move_bytes = 4;
16881 mode = SImode;
16882 gen_func.mov = gen_movsi;
16883 }
16884 else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
16885 { /* move 2 bytes */
16886 move_bytes = 2;
16887 mode = HImode;
16888 gen_func.mov = gen_movhi;
16889 }
16890 else if (TARGET_STRING && bytes > 1)
16891 { /* move up to 4 bytes at a time */
16892 move_bytes = (bytes > 4) ? 4 : bytes;
16893 gen_func.movmemsi = gen_movmemsi_1reg;
16894 }
16895 else /* move 1 byte at a time */
16896 {
16897 move_bytes = 1;
16898 mode = QImode;
16899 gen_func.mov = gen_movqi;
16900 }
16901
16902 src = adjust_address (orig_src, mode, offset);
16903 dest = adjust_address (orig_dest, mode, offset);
16904
16905 if (mode != BLKmode)
16906 {
16907 rtx tmp_reg = gen_reg_rtx (mode);
16908
16909 emit_insn ((*gen_func.mov) (tmp_reg, src));
16910 stores[num_reg++] = (*gen_func.mov) (dest, tmp_reg);
16911 }
16912
16913 if (mode == BLKmode || num_reg >= MAX_MOVE_REG || bytes == move_bytes)
16914 {
16915 int i;
16916 for (i = 0; i < num_reg; i++)
16917 emit_insn (stores[i]);
16918 num_reg = 0;
16919 }
16920
16921 if (mode == BLKmode)
16922 {
16923 /* Move the address into scratch registers. The movmemsi
16924 patterns require zero offset. */
16925 if (!REG_P (XEXP (src, 0)))
16926 {
16927 rtx src_reg = copy_addr_to_reg (XEXP (src, 0));
16928 src = replace_equiv_address (src, src_reg);
16929 }
16930 set_mem_size (src, move_bytes);
16931
16932 if (!REG_P (XEXP (dest, 0)))
16933 {
16934 rtx dest_reg = copy_addr_to_reg (XEXP (dest, 0));
16935 dest = replace_equiv_address (dest, dest_reg);
16936 }
16937 set_mem_size (dest, move_bytes);
16938
16939 emit_insn ((*gen_func.movmemsi) (dest, src,
16940 GEN_INT (move_bytes & 31),
16941 align_rtx));
16942 }
16943 }
16944
16945 return 1;
16946 }
16947
16948 \f
16949 /* Return a string to perform a load_multiple operation.
16950 operands[0] is the vector.
16951 operands[1] is the source address.
16952 operands[2] is the first destination register. */
16953
16954 const char *
16955 rs6000_output_load_multiple (rtx operands[3])
16956 {
16957 /* We have to handle the case where the pseudo used to contain the address
16958 is assigned to one of the output registers. */
16959 int i, j;
16960 int words = XVECLEN (operands[0], 0);
16961 rtx xop[10];
16962
16963 if (XVECLEN (operands[0], 0) == 1)
16964 return "lwz %2,0(%1)";
16965
16966 for (i = 0; i < words; i++)
16967 if (refers_to_regno_p (REGNO (operands[2]) + i, operands[1]))
16968 {
16969 if (i == words-1)
16970 {
16971 xop[0] = GEN_INT (4 * (words-1));
16972 xop[1] = operands[1];
16973 xop[2] = operands[2];
16974 output_asm_insn ("lswi %2,%1,%0\n\tlwz %1,%0(%1)", xop);
16975 return "";
16976 }
16977 else if (i == 0)
16978 {
16979 xop[0] = GEN_INT (4 * (words-1));
16980 xop[1] = operands[1];
16981 xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16982 output_asm_insn ("addi %1,%1,4\n\tlswi %2,%1,%0\n\tlwz %1,-4(%1)", xop);
16983 return "";
16984 }
16985 else
16986 {
16987 for (j = 0; j < words; j++)
16988 if (j != i)
16989 {
16990 xop[0] = GEN_INT (j * 4);
16991 xop[1] = operands[1];
16992 xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + j);
16993 output_asm_insn ("lwz %2,%0(%1)", xop);
16994 }
16995 xop[0] = GEN_INT (i * 4);
16996 xop[1] = operands[1];
16997 output_asm_insn ("lwz %1,%0(%1)", xop);
16998 return "";
16999 }
17000 }
17001
17002 return "lswi %2,%1,%N0";
17003 }
17004
17005 \f
17006 /* A validation routine: say whether CODE, a condition code, and MODE
17007 match. The other alternatives either don't make sense or should
17008 never be generated. */
17009
17010 void
17011 validate_condition_mode (enum rtx_code code, machine_mode mode)
17012 {
17013 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
17014 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
17015 && GET_MODE_CLASS (mode) == MODE_CC);
17016
17017 /* These don't make sense. */
17018 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
17019 || mode != CCUNSmode);
17020
17021 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
17022 || mode == CCUNSmode);
17023
17024 gcc_assert (mode == CCFPmode
17025 || (code != ORDERED && code != UNORDERED
17026 && code != UNEQ && code != LTGT
17027 && code != UNGT && code != UNLT
17028 && code != UNGE && code != UNLE));
17029
17030 /* These should never be generated except for
17031 flag_finite_math_only. */
17032 gcc_assert (mode != CCFPmode
17033 || flag_finite_math_only
17034 || (code != LE && code != GE
17035 && code != UNEQ && code != LTGT
17036 && code != UNGT && code != UNLT));
17037
17038 /* These are invalid; the information is not there. */
17039 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
17040 }
17041
17042 \f
17043 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
17044 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
17045 not zero, store there the bit offset (counted from the right) where
17046 the single stretch of 1 bits begins; and similarly for B, the bit
17047 offset where it ends. */
17048
17049 bool
17050 rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
17051 {
17052 unsigned HOST_WIDE_INT val = INTVAL (mask);
17053 unsigned HOST_WIDE_INT bit;
17054 int nb, ne;
17055 int n = GET_MODE_PRECISION (mode);
17056
17057 if (mode != DImode && mode != SImode)
17058 return false;
17059
17060 if (INTVAL (mask) >= 0)
17061 {
17062 bit = val & -val;
17063 ne = exact_log2 (bit);
17064 nb = exact_log2 (val + bit);
17065 }
17066 else if (val + 1 == 0)
17067 {
17068 nb = n;
17069 ne = 0;
17070 }
17071 else if (val & 1)
17072 {
17073 val = ~val;
17074 bit = val & -val;
17075 nb = exact_log2 (bit);
17076 ne = exact_log2 (val + bit);
17077 }
17078 else
17079 {
17080 bit = val & -val;
17081 ne = exact_log2 (bit);
17082 if (val + bit == 0)
17083 nb = n;
17084 else
17085 nb = 0;
17086 }
17087
17088 nb--;
17089
17090 if (nb < 0 || ne < 0 || nb >= n || ne >= n)
17091 return false;
17092
17093 if (b)
17094 *b = nb;
17095 if (e)
17096 *e = ne;
17097
17098 return true;
17099 }
17100
17101 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
17102 or rldicr instruction, to implement an AND with it in mode MODE. */
17103
17104 bool
17105 rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
17106 {
17107 int nb, ne;
17108
17109 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
17110 return false;
17111
17112 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
17113 does not wrap. */
17114 if (mode == DImode)
17115 return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
17116
17117 /* For SImode, rlwinm can do everything. */
17118 if (mode == SImode)
17119 return (nb < 32 && ne < 32);
17120
17121 return false;
17122 }
17123
17124 /* Return the instruction template for an AND with mask in mode MODE, with
17125 operands OPERANDS. If DOT is true, make it a record-form instruction. */
17126
17127 const char *
17128 rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
17129 {
17130 int nb, ne;
17131
17132 if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
17133 gcc_unreachable ();
17134
17135 if (mode == DImode && ne == 0)
17136 {
17137 operands[3] = GEN_INT (63 - nb);
17138 if (dot)
17139 return "rldicl. %0,%1,0,%3";
17140 return "rldicl %0,%1,0,%3";
17141 }
17142
17143 if (mode == DImode && nb == 63)
17144 {
17145 operands[3] = GEN_INT (63 - ne);
17146 if (dot)
17147 return "rldicr. %0,%1,0,%3";
17148 return "rldicr %0,%1,0,%3";
17149 }
17150
17151 if (nb < 32 && ne < 32)
17152 {
17153 operands[3] = GEN_INT (31 - nb);
17154 operands[4] = GEN_INT (31 - ne);
17155 if (dot)
17156 return "rlwinm. %0,%1,0,%3,%4";
17157 return "rlwinm %0,%1,0,%3,%4";
17158 }
17159
17160 gcc_unreachable ();
17161 }
17162
17163 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
17164 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
17165 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
17166
17167 bool
17168 rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
17169 {
17170 int nb, ne;
17171
17172 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
17173 return false;
17174
17175 int n = GET_MODE_PRECISION (mode);
17176 int sh = -1;
17177
17178 if (CONST_INT_P (XEXP (shift, 1)))
17179 {
17180 sh = INTVAL (XEXP (shift, 1));
17181 if (sh < 0 || sh >= n)
17182 return false;
17183 }
17184
17185 rtx_code code = GET_CODE (shift);
17186
17187 /* Convert any shift by 0 to a rotate, to simplify below code. */
17188 if (sh == 0)
17189 code = ROTATE;
17190
17191 /* Convert rotate to simple shift if we can, to make analysis simpler. */
17192 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
17193 code = ASHIFT;
17194 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
17195 {
17196 code = LSHIFTRT;
17197 sh = n - sh;
17198 }
17199
17200 /* DImode rotates need rld*. */
17201 if (mode == DImode && code == ROTATE)
17202 return (nb == 63 || ne == 0 || ne == sh);
17203
17204 /* SImode rotates need rlw*. */
17205 if (mode == SImode && code == ROTATE)
17206 return (nb < 32 && ne < 32 && sh < 32);
17207
17208 /* Wrap-around masks are only okay for rotates. */
17209 if (ne > nb)
17210 return false;
17211
17212 /* Variable shifts are only okay for rotates. */
17213 if (sh < 0)
17214 return false;
17215
17216 /* Don't allow ASHIFT if the mask is wrong for that. */
17217 if (code == ASHIFT && ne < sh)
17218 return false;
17219
17220 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
17221 if the mask is wrong for that. */
17222 if (nb < 32 && ne < 32 && sh < 32
17223 && !(code == LSHIFTRT && nb >= 32 - sh))
17224 return true;
17225
17226 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
17227 if the mask is wrong for that. */
17228 if (code == LSHIFTRT)
17229 sh = 64 - sh;
17230 if (nb == 63 || ne == 0 || ne == sh)
17231 return !(code == LSHIFTRT && nb >= sh);
17232
17233 return false;
17234 }
17235
17236 /* Return the instruction template for a shift with mask in mode MODE, with
17237 operands OPERANDS. If DOT is true, make it a record-form instruction. */
17238
17239 const char *
17240 rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
17241 {
17242 int nb, ne;
17243
17244 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
17245 gcc_unreachable ();
17246
17247 if (mode == DImode && ne == 0)
17248 {
17249 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
17250 operands[2] = GEN_INT (64 - INTVAL (operands[2]));
17251 operands[3] = GEN_INT (63 - nb);
17252 if (dot)
17253 return "rld%I2cl. %0,%1,%2,%3";
17254 return "rld%I2cl %0,%1,%2,%3";
17255 }
17256
17257 if (mode == DImode && nb == 63)
17258 {
17259 operands[3] = GEN_INT (63 - ne);
17260 if (dot)
17261 return "rld%I2cr. %0,%1,%2,%3";
17262 return "rld%I2cr %0,%1,%2,%3";
17263 }
17264
17265 if (mode == DImode
17266 && GET_CODE (operands[4]) != LSHIFTRT
17267 && CONST_INT_P (operands[2])
17268 && ne == INTVAL (operands[2]))
17269 {
17270 operands[3] = GEN_INT (63 - nb);
17271 if (dot)
17272 return "rld%I2c. %0,%1,%2,%3";
17273 return "rld%I2c %0,%1,%2,%3";
17274 }
17275
17276 if (nb < 32 && ne < 32)
17277 {
17278 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
17279 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
17280 operands[3] = GEN_INT (31 - nb);
17281 operands[4] = GEN_INT (31 - ne);
17282 if (dot)
17283 return "rlw%I2nm. %0,%1,%2,%3,%4";
17284 return "rlw%I2nm %0,%1,%2,%3,%4";
17285 }
17286
17287 gcc_unreachable ();
17288 }
17289
17290 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
17291 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
17292 ASHIFT, or LSHIFTRT) in mode MODE. */
17293
17294 bool
17295 rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
17296 {
17297 int nb, ne;
17298
17299 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
17300 return false;
17301
17302 int n = GET_MODE_PRECISION (mode);
17303
17304 int sh = INTVAL (XEXP (shift, 1));
17305 if (sh < 0 || sh >= n)
17306 return false;
17307
17308 rtx_code code = GET_CODE (shift);
17309
17310 /* Convert any shift by 0 to a rotate, to simplify below code. */
17311 if (sh == 0)
17312 code = ROTATE;
17313
17314 /* Convert rotate to simple shift if we can, to make analysis simpler. */
17315 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
17316 code = ASHIFT;
17317 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
17318 {
17319 code = LSHIFTRT;
17320 sh = n - sh;
17321 }
17322
17323 /* DImode rotates need rldimi. */
17324 if (mode == DImode && code == ROTATE)
17325 return (ne == sh);
17326
17327 /* SImode rotates need rlwimi. */
17328 if (mode == SImode && code == ROTATE)
17329 return (nb < 32 && ne < 32 && sh < 32);
17330
17331 /* Wrap-around masks are only okay for rotates. */
17332 if (ne > nb)
17333 return false;
17334
17335 /* Don't allow ASHIFT if the mask is wrong for that. */
17336 if (code == ASHIFT && ne < sh)
17337 return false;
17338
17339 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
17340 if the mask is wrong for that. */
17341 if (nb < 32 && ne < 32 && sh < 32
17342 && !(code == LSHIFTRT && nb >= 32 - sh))
17343 return true;
17344
17345 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
17346 if the mask is wrong for that. */
17347 if (code == LSHIFTRT)
17348 sh = 64 - sh;
17349 if (ne == sh)
17350 return !(code == LSHIFTRT && nb >= sh);
17351
17352 return false;
17353 }
17354
17355 /* Return the instruction template for an insert with mask in mode MODE, with
17356 operands OPERANDS. If DOT is true, make it a record-form instruction. */
17357
17358 const char *
17359 rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
17360 {
17361 int nb, ne;
17362
17363 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
17364 gcc_unreachable ();
17365
17366 /* Prefer rldimi because rlwimi is cracked. */
17367 if (TARGET_POWERPC64
17368 && (!dot || mode == DImode)
17369 && GET_CODE (operands[4]) != LSHIFTRT
17370 && ne == INTVAL (operands[2]))
17371 {
17372 operands[3] = GEN_INT (63 - nb);
17373 if (dot)
17374 return "rldimi. %0,%1,%2,%3";
17375 return "rldimi %0,%1,%2,%3";
17376 }
17377
17378 if (nb < 32 && ne < 32)
17379 {
17380 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
17381 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
17382 operands[3] = GEN_INT (31 - nb);
17383 operands[4] = GEN_INT (31 - ne);
17384 if (dot)
17385 return "rlwimi. %0,%1,%2,%3,%4";
17386 return "rlwimi %0,%1,%2,%3,%4";
17387 }
17388
17389 gcc_unreachable ();
17390 }
17391
17392 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
17393 using two machine instructions. */
17394
17395 bool
17396 rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
17397 {
17398 /* There are two kinds of AND we can handle with two insns:
17399 1) those we can do with two rl* insn;
17400 2) ori[s];xori[s].
17401
17402 We do not handle that last case yet. */
17403
17404 /* If there is just one stretch of ones, we can do it. */
17405 if (rs6000_is_valid_mask (c, NULL, NULL, mode))
17406 return true;
17407
17408 /* Otherwise, fill in the lowest "hole"; if we can do the result with
17409 one insn, we can do the whole thing with two. */
17410 unsigned HOST_WIDE_INT val = INTVAL (c);
17411 unsigned HOST_WIDE_INT bit1 = val & -val;
17412 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
17413 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
17414 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
17415 return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
17416 }
17417
17418 /* Emit a potentially record-form instruction, setting DST from SRC.
17419 If DOT is 0, that is all; otherwise, set CCREG to the result of the
17420 signed comparison of DST with zero. If DOT is 1, the generated RTL
17421 doesn't care about the DST result; if DOT is 2, it does. If CCREG
17422 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
17423 a separate COMPARE. */
17424
17425 static void
17426 rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
17427 {
17428 if (dot == 0)
17429 {
17430 emit_move_insn (dst, src);
17431 return;
17432 }
17433
17434 if (cc_reg_not_cr0_operand (ccreg, CCmode))
17435 {
17436 emit_move_insn (dst, src);
17437 emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx));
17438 return;
17439 }
17440
17441 rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx));
17442 if (dot == 1)
17443 {
17444 rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst);
17445 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber)));
17446 }
17447 else
17448 {
17449 rtx set = gen_rtx_SET (dst, src);
17450 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set)));
17451 }
17452 }
17453
17454 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
17455 If EXPAND is true, split rotate-and-mask instructions we generate to
17456 their constituent parts as well (this is used during expand); if DOT
17457 is 1, make the last insn a record-form instruction clobbering the
17458 destination GPR and setting the CC reg (from operands[3]); if 2, set
17459 that GPR as well as the CC reg. */
17460
17461 void
17462 rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot)
17463 {
17464 gcc_assert (!(expand && dot));
17465
17466 unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
17467
17468 /* If it is one stretch of ones, it is DImode; shift left, mask, then
17469 shift right. This generates better code than doing the masks without
17470 shifts, or shifting first right and then left. */
17471 int nb, ne;
17472 if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
17473 {
17474 gcc_assert (mode == DImode);
17475
17476 int shift = 63 - nb;
17477 if (expand)
17478 {
17479 rtx tmp1 = gen_reg_rtx (DImode);
17480 rtx tmp2 = gen_reg_rtx (DImode);
17481 emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
17482 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
17483 emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
17484 }
17485 else
17486 {
17487 rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
17488 tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
17489 emit_move_insn (operands[0], tmp);
17490 tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift));
17491 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
17492 }
17493 return;
17494 }
17495
17496 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
17497 that does the rest. */
17498 unsigned HOST_WIDE_INT bit1 = val & -val;
17499 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
17500 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
17501 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
17502
17503 unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
17504 unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
17505
17506 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
17507
17508 /* Two "no-rotate"-and-mask instructions, for SImode. */
17509 if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
17510 {
17511 gcc_assert (mode == SImode);
17512
17513 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
17514 rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
17515 emit_move_insn (reg, tmp);
17516 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
17517 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
17518 return;
17519 }
17520
17521 gcc_assert (mode == DImode);
17522
17523 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
17524 insns; we have to do the first in SImode, because it wraps. */
17525 if (mask2 <= 0xffffffff
17526 && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
17527 {
17528 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
17529 rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
17530 GEN_INT (mask1));
17531 rtx reg_low = gen_lowpart (SImode, reg);
17532 emit_move_insn (reg_low, tmp);
17533 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
17534 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
17535 return;
17536 }
17537
17538 /* Two rld* insns: rotate, clear the hole in the middle (which now is
17539 at the top end), rotate back and clear the other hole. */
17540 int right = exact_log2 (bit3);
17541 int left = 64 - right;
17542
17543 /* Rotate the mask too. */
17544 mask1 = (mask1 >> right) | ((bit2 - 1) << left);
17545
17546 if (expand)
17547 {
17548 rtx tmp1 = gen_reg_rtx (DImode);
17549 rtx tmp2 = gen_reg_rtx (DImode);
17550 rtx tmp3 = gen_reg_rtx (DImode);
17551 emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
17552 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
17553 emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
17554 emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
17555 }
17556 else
17557 {
17558 rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
17559 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
17560 emit_move_insn (operands[0], tmp);
17561 tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
17562 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
17563 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
17564 }
17565 }
17566 \f
17567 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
17568 for lfq and stfq insns iff the registers are hard registers. */
17569
17570 int
17571 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
17572 {
17573 /* We might have been passed a SUBREG. */
17574 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
17575 return 0;
17576
17577 /* We might have been passed non floating point registers. */
17578 if (!FP_REGNO_P (REGNO (reg1))
17579 || !FP_REGNO_P (REGNO (reg2)))
17580 return 0;
17581
17582 return (REGNO (reg1) == REGNO (reg2) - 1);
17583 }
17584
17585 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
17586 addr1 and addr2 must be in consecutive memory locations
17587 (addr2 == addr1 + 8). */
17588
17589 int
17590 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
17591 {
17592 rtx addr1, addr2;
17593 unsigned int reg1, reg2;
17594 int offset1, offset2;
17595
17596 /* The mems cannot be volatile. */
17597 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
17598 return 0;
17599
17600 addr1 = XEXP (mem1, 0);
17601 addr2 = XEXP (mem2, 0);
17602
17603 /* Extract an offset (if used) from the first addr. */
17604 if (GET_CODE (addr1) == PLUS)
17605 {
17606 /* If not a REG, return zero. */
17607 if (GET_CODE (XEXP (addr1, 0)) != REG)
17608 return 0;
17609 else
17610 {
17611 reg1 = REGNO (XEXP (addr1, 0));
17612 /* The offset must be constant! */
17613 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
17614 return 0;
17615 offset1 = INTVAL (XEXP (addr1, 1));
17616 }
17617 }
17618 else if (GET_CODE (addr1) != REG)
17619 return 0;
17620 else
17621 {
17622 reg1 = REGNO (addr1);
17623 /* This was a simple (mem (reg)) expression. Offset is 0. */
17624 offset1 = 0;
17625 }
17626
17627 /* And now for the second addr. */
17628 if (GET_CODE (addr2) == PLUS)
17629 {
17630 /* If not a REG, return zero. */
17631 if (GET_CODE (XEXP (addr2, 0)) != REG)
17632 return 0;
17633 else
17634 {
17635 reg2 = REGNO (XEXP (addr2, 0));
17636 /* The offset must be constant. */
17637 if (GET_CODE (XEXP (addr2, 1)) != CONST_INT)
17638 return 0;
17639 offset2 = INTVAL (XEXP (addr2, 1));
17640 }
17641 }
17642 else if (GET_CODE (addr2) != REG)
17643 return 0;
17644 else
17645 {
17646 reg2 = REGNO (addr2);
17647 /* This was a simple (mem (reg)) expression. Offset is 0. */
17648 offset2 = 0;
17649 }
17650
17651 /* Both of these must have the same base register. */
17652 if (reg1 != reg2)
17653 return 0;
17654
17655 /* The offset for the second addr must be 8 more than the first addr. */
17656 if (offset2 != offset1 + 8)
17657 return 0;
17658
17659 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
17660 instructions. */
17661 return 1;
17662 }
17663 \f
17664
17665 rtx
17666 rs6000_secondary_memory_needed_rtx (machine_mode mode)
17667 {
17668 static bool eliminated = false;
17669 rtx ret;
17670
17671 if (mode != SDmode || TARGET_NO_SDMODE_STACK)
17672 ret = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
17673 else
17674 {
17675 rtx mem = cfun->machine->sdmode_stack_slot;
17676 gcc_assert (mem != NULL_RTX);
17677
17678 if (!eliminated)
17679 {
17680 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
17681 cfun->machine->sdmode_stack_slot = mem;
17682 eliminated = true;
17683 }
17684 ret = mem;
17685 }
17686
17687 if (TARGET_DEBUG_ADDR)
17688 {
17689 fprintf (stderr, "\nrs6000_secondary_memory_needed_rtx, mode %s, rtx:\n",
17690 GET_MODE_NAME (mode));
17691 if (!ret)
17692 fprintf (stderr, "\tNULL_RTX\n");
17693 else
17694 debug_rtx (ret);
17695 }
17696
17697 return ret;
17698 }
17699
17700 /* Return the mode to be used for memory when a secondary memory
17701 location is needed. For SDmode values we need to use DDmode, in
17702 all other cases we can use the same mode. */
17703 machine_mode
17704 rs6000_secondary_memory_needed_mode (machine_mode mode)
17705 {
17706 if (lra_in_progress && mode == SDmode)
17707 return DDmode;
17708 return mode;
17709 }
17710
17711 static tree
17712 rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
17713 {
17714 /* Don't walk into types. */
17715 if (*tp == NULL_TREE || *tp == error_mark_node || TYPE_P (*tp))
17716 {
17717 *walk_subtrees = 0;
17718 return NULL_TREE;
17719 }
17720
17721 switch (TREE_CODE (*tp))
17722 {
17723 case VAR_DECL:
17724 case PARM_DECL:
17725 case FIELD_DECL:
17726 case RESULT_DECL:
17727 case SSA_NAME:
17728 case REAL_CST:
17729 case MEM_REF:
17730 case VIEW_CONVERT_EXPR:
17731 if (TYPE_MODE (TREE_TYPE (*tp)) == SDmode)
17732 return *tp;
17733 break;
17734 default:
17735 break;
17736 }
17737
17738 return NULL_TREE;
17739 }
17740
17741 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
17742 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
17743 only work on the traditional altivec registers, note if an altivec register
17744 was chosen. */
17745
17746 static enum rs6000_reg_type
17747 register_to_reg_type (rtx reg, bool *is_altivec)
17748 {
17749 HOST_WIDE_INT regno;
17750 enum reg_class rclass;
17751
17752 if (GET_CODE (reg) == SUBREG)
17753 reg = SUBREG_REG (reg);
17754
17755 if (!REG_P (reg))
17756 return NO_REG_TYPE;
17757
17758 regno = REGNO (reg);
17759 if (regno >= FIRST_PSEUDO_REGISTER)
17760 {
17761 if (!lra_in_progress && !reload_in_progress && !reload_completed)
17762 return PSEUDO_REG_TYPE;
17763
17764 regno = true_regnum (reg);
17765 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
17766 return PSEUDO_REG_TYPE;
17767 }
17768
17769 gcc_assert (regno >= 0);
17770
17771 if (is_altivec && ALTIVEC_REGNO_P (regno))
17772 *is_altivec = true;
17773
17774 rclass = rs6000_regno_regclass[regno];
17775 return reg_class_to_reg_type[(int)rclass];
17776 }
17777
17778 /* Helper function to return the cost of adding a TOC entry address. */
17779
17780 static inline int
17781 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
17782 {
17783 int ret;
17784
17785 if (TARGET_CMODEL != CMODEL_SMALL)
17786 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
17787
17788 else
17789 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
17790
17791 return ret;
17792 }
17793
17794 /* Helper function for rs6000_secondary_reload to determine whether the memory
17795 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
17796 needs reloading. Return negative if the memory is not handled by the memory
17797 helper functions and to try a different reload method, 0 if no additional
17798 instructions are need, and positive to give the extra cost for the
17799 memory. */
17800
17801 static int
17802 rs6000_secondary_reload_memory (rtx addr,
17803 enum reg_class rclass,
17804 machine_mode mode)
17805 {
17806 int extra_cost = 0;
17807 rtx reg, and_arg, plus_arg0, plus_arg1;
17808 addr_mask_type addr_mask;
17809 const char *type = NULL;
17810 const char *fail_msg = NULL;
17811
17812 if (GPR_REG_CLASS_P (rclass))
17813 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
17814
17815 else if (rclass == FLOAT_REGS)
17816 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
17817
17818 else if (rclass == ALTIVEC_REGS)
17819 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
17820
17821 /* For the combined VSX_REGS, turn off Altivec AND -16. */
17822 else if (rclass == VSX_REGS)
17823 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
17824 & ~RELOAD_REG_AND_M16);
17825
17826 else
17827 {
17828 if (TARGET_DEBUG_ADDR)
17829 fprintf (stderr,
17830 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
17831 "class is not GPR, FPR, VMX\n",
17832 GET_MODE_NAME (mode), reg_class_names[rclass]);
17833
17834 return -1;
17835 }
17836
17837 /* If the register isn't valid in this register class, just return now. */
17838 if ((addr_mask & RELOAD_REG_VALID) == 0)
17839 {
17840 if (TARGET_DEBUG_ADDR)
17841 fprintf (stderr,
17842 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
17843 "not valid in class\n",
17844 GET_MODE_NAME (mode), reg_class_names[rclass]);
17845
17846 return -1;
17847 }
17848
17849 switch (GET_CODE (addr))
17850 {
17851 /* Does the register class supports auto update forms for this mode? We
17852 don't need a scratch register, since the powerpc only supports
17853 PRE_INC, PRE_DEC, and PRE_MODIFY. */
17854 case PRE_INC:
17855 case PRE_DEC:
17856 reg = XEXP (addr, 0);
17857 if (!base_reg_operand (addr, GET_MODE (reg)))
17858 {
17859 fail_msg = "no base register #1";
17860 extra_cost = -1;
17861 }
17862
17863 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
17864 {
17865 extra_cost = 1;
17866 type = "update";
17867 }
17868 break;
17869
17870 case PRE_MODIFY:
17871 reg = XEXP (addr, 0);
17872 plus_arg1 = XEXP (addr, 1);
17873 if (!base_reg_operand (reg, GET_MODE (reg))
17874 || GET_CODE (plus_arg1) != PLUS
17875 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
17876 {
17877 fail_msg = "bad PRE_MODIFY";
17878 extra_cost = -1;
17879 }
17880
17881 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
17882 {
17883 extra_cost = 1;
17884 type = "update";
17885 }
17886 break;
17887
17888 /* Do we need to simulate AND -16 to clear the bottom address bits used
17889 in VMX load/stores? Only allow the AND for vector sizes. */
17890 case AND:
17891 and_arg = XEXP (addr, 0);
17892 if (GET_MODE_SIZE (mode) != 16
17893 || GET_CODE (XEXP (addr, 1)) != CONST_INT
17894 || INTVAL (XEXP (addr, 1)) != -16)
17895 {
17896 fail_msg = "bad Altivec AND #1";
17897 extra_cost = -1;
17898 }
17899
17900 if (rclass != ALTIVEC_REGS)
17901 {
17902 if (legitimate_indirect_address_p (and_arg, false))
17903 extra_cost = 1;
17904
17905 else if (legitimate_indexed_address_p (and_arg, false))
17906 extra_cost = 2;
17907
17908 else
17909 {
17910 fail_msg = "bad Altivec AND #2";
17911 extra_cost = -1;
17912 }
17913
17914 type = "and";
17915 }
17916 break;
17917
17918 /* If this is an indirect address, make sure it is a base register. */
17919 case REG:
17920 case SUBREG:
17921 if (!legitimate_indirect_address_p (addr, false))
17922 {
17923 extra_cost = 1;
17924 type = "move";
17925 }
17926 break;
17927
17928 /* If this is an indexed address, make sure the register class can handle
17929 indexed addresses for this mode. */
17930 case PLUS:
17931 plus_arg0 = XEXP (addr, 0);
17932 plus_arg1 = XEXP (addr, 1);
17933
17934 /* (plus (plus (reg) (constant)) (constant)) is generated during
17935 push_reload processing, so handle it now. */
17936 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
17937 {
17938 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
17939 {
17940 extra_cost = 1;
17941 type = "offset";
17942 }
17943 }
17944
17945 /* (plus (plus (reg) (constant)) (reg)) is also generated during
17946 push_reload processing, so handle it now. */
17947 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
17948 {
17949 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
17950 {
17951 extra_cost = 1;
17952 type = "indexed #2";
17953 }
17954 }
17955
17956 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
17957 {
17958 fail_msg = "no base register #2";
17959 extra_cost = -1;
17960 }
17961
17962 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
17963 {
17964 if ((addr_mask & RELOAD_REG_INDEXED) == 0
17965 || !legitimate_indexed_address_p (addr, false))
17966 {
17967 extra_cost = 1;
17968 type = "indexed";
17969 }
17970 }
17971
17972 /* Make sure the register class can handle offset addresses. */
17973 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
17974 {
17975 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
17976 {
17977 extra_cost = 1;
17978 type = "offset";
17979 }
17980 }
17981
17982 else
17983 {
17984 fail_msg = "bad PLUS";
17985 extra_cost = -1;
17986 }
17987
17988 break;
17989
17990 case LO_SUM:
17991 if (!legitimate_lo_sum_address_p (mode, addr, false))
17992 {
17993 fail_msg = "bad LO_SUM";
17994 extra_cost = -1;
17995 }
17996
17997 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
17998 {
17999 extra_cost = 1;
18000 type = "lo_sum";
18001 }
18002 break;
18003
18004 /* Static addresses need to create a TOC entry. */
18005 case CONST:
18006 case SYMBOL_REF:
18007 case LABEL_REF:
18008 type = "address";
18009 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
18010 break;
18011
18012 /* TOC references look like offsetable memory. */
18013 case UNSPEC:
18014 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
18015 {
18016 fail_msg = "bad UNSPEC";
18017 extra_cost = -1;
18018 }
18019
18020 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
18021 {
18022 extra_cost = 1;
18023 type = "toc reference";
18024 }
18025 break;
18026
18027 default:
18028 {
18029 fail_msg = "bad address";
18030 extra_cost = -1;
18031 }
18032 }
18033
18034 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
18035 {
18036 if (extra_cost < 0)
18037 fprintf (stderr,
18038 "rs6000_secondary_reload_memory error: mode = %s, "
18039 "class = %s, addr_mask = '%s', %s\n",
18040 GET_MODE_NAME (mode),
18041 reg_class_names[rclass],
18042 rs6000_debug_addr_mask (addr_mask, false),
18043 (fail_msg != NULL) ? fail_msg : "<bad address>");
18044
18045 else
18046 fprintf (stderr,
18047 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
18048 "addr_mask = '%s', extra cost = %d, %s\n",
18049 GET_MODE_NAME (mode),
18050 reg_class_names[rclass],
18051 rs6000_debug_addr_mask (addr_mask, false),
18052 extra_cost,
18053 (type) ? type : "<none>");
18054
18055 debug_rtx (addr);
18056 }
18057
18058 return extra_cost;
18059 }
18060
18061 /* Helper function for rs6000_secondary_reload to return true if a move to a
18062 different register classe is really a simple move. */
18063
18064 static bool
18065 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
18066 enum rs6000_reg_type from_type,
18067 machine_mode mode)
18068 {
18069 int size;
18070
18071 /* Add support for various direct moves available. In this function, we only
18072 look at cases where we don't need any extra registers, and one or more
18073 simple move insns are issued. At present, 32-bit integers are not allowed
18074 in FPR/VSX registers. Single precision binary floating is not a simple
18075 move because we need to convert to the single precision memory layout.
18076 The 4-byte SDmode can be moved. */
18077 size = GET_MODE_SIZE (mode);
18078 if (TARGET_DIRECT_MOVE
18079 && ((mode == SDmode) || (TARGET_POWERPC64 && size == 8))
18080 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
18081 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
18082 return true;
18083
18084 else if (TARGET_MFPGPR && TARGET_POWERPC64 && size == 8
18085 && ((to_type == GPR_REG_TYPE && from_type == FPR_REG_TYPE)
18086 || (to_type == FPR_REG_TYPE && from_type == GPR_REG_TYPE)))
18087 return true;
18088
18089 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
18090 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
18091 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
18092 return true;
18093
18094 return false;
18095 }
18096
18097 /* Power8 helper function for rs6000_secondary_reload, handle all of the
18098 special direct moves that involve allocating an extra register, return the
18099 insn code of the helper function if there is such a function or
18100 CODE_FOR_nothing if not. */
18101
18102 static bool
18103 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
18104 enum rs6000_reg_type from_type,
18105 machine_mode mode,
18106 secondary_reload_info *sri,
18107 bool altivec_p)
18108 {
18109 bool ret = false;
18110 enum insn_code icode = CODE_FOR_nothing;
18111 int cost = 0;
18112 int size = GET_MODE_SIZE (mode);
18113
18114 if (TARGET_POWERPC64)
18115 {
18116 if (size == 16)
18117 {
18118 /* Handle moving 128-bit values from GPRs to VSX point registers on
18119 power8 when running in 64-bit mode using XXPERMDI to glue the two
18120 64-bit values back together. */
18121 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
18122 {
18123 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
18124 icode = reg_addr[mode].reload_vsx_gpr;
18125 }
18126
18127 /* Handle moving 128-bit values from VSX point registers to GPRs on
18128 power8 when running in 64-bit mode using XXPERMDI to get access to the
18129 bottom 64-bit value. */
18130 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
18131 {
18132 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
18133 icode = reg_addr[mode].reload_gpr_vsx;
18134 }
18135 }
18136
18137 else if (mode == SFmode)
18138 {
18139 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
18140 {
18141 cost = 3; /* xscvdpspn, mfvsrd, and. */
18142 icode = reg_addr[mode].reload_gpr_vsx;
18143 }
18144
18145 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
18146 {
18147 cost = 2; /* mtvsrz, xscvspdpn. */
18148 icode = reg_addr[mode].reload_vsx_gpr;
18149 }
18150 }
18151 }
18152
18153 else if (size == 8)
18154 {
18155 /* Handle moving 64-bit values from GPRs to floating point registers on
18156 power8 when running in 32-bit mode using FMRGOW to glue the two 32-bit
18157 values back together. Altivec register classes must be handled
18158 specially since a different instruction is used, and the secondary
18159 reload support requires a single instruction class in the scratch
18160 register constraint. However, right now TFmode is not allowed in
18161 Altivec registers, so the pattern will never match. */
18162 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
18163 {
18164 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
18165 icode = reg_addr[mode].reload_fpr_gpr;
18166 }
18167 }
18168
18169 if (icode != CODE_FOR_nothing)
18170 {
18171 ret = true;
18172 if (sri)
18173 {
18174 sri->icode = icode;
18175 sri->extra_cost = cost;
18176 }
18177 }
18178
18179 return ret;
18180 }
18181
18182 /* Return whether a move between two register classes can be done either
18183 directly (simple move) or via a pattern that uses a single extra temporary
18184 (using power8's direct move in this case. */
18185
18186 static bool
18187 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
18188 enum rs6000_reg_type from_type,
18189 machine_mode mode,
18190 secondary_reload_info *sri,
18191 bool altivec_p)
18192 {
18193 /* Fall back to load/store reloads if either type is not a register. */
18194 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
18195 return false;
18196
18197 /* If we haven't allocated registers yet, assume the move can be done for the
18198 standard register types. */
18199 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
18200 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
18201 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
18202 return true;
18203
18204 /* Moves to the same set of registers is a simple move for non-specialized
18205 registers. */
18206 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
18207 return true;
18208
18209 /* Check whether a simple move can be done directly. */
18210 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
18211 {
18212 if (sri)
18213 {
18214 sri->icode = CODE_FOR_nothing;
18215 sri->extra_cost = 0;
18216 }
18217 return true;
18218 }
18219
18220 /* Now check if we can do it in a few steps. */
18221 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
18222 altivec_p);
18223 }
18224
18225 /* Inform reload about cases where moving X with a mode MODE to a register in
18226 RCLASS requires an extra scratch or immediate register. Return the class
18227 needed for the immediate register.
18228
18229 For VSX and Altivec, we may need a register to convert sp+offset into
18230 reg+sp.
18231
18232 For misaligned 64-bit gpr loads and stores we need a register to
18233 convert an offset address to indirect. */
18234
18235 static reg_class_t
18236 rs6000_secondary_reload (bool in_p,
18237 rtx x,
18238 reg_class_t rclass_i,
18239 machine_mode mode,
18240 secondary_reload_info *sri)
18241 {
18242 enum reg_class rclass = (enum reg_class) rclass_i;
18243 reg_class_t ret = ALL_REGS;
18244 enum insn_code icode;
18245 bool default_p = false;
18246 bool done_p = false;
18247
18248 /* Allow subreg of memory before/during reload. */
18249 bool memory_p = (MEM_P (x)
18250 || (!reload_completed && GET_CODE (x) == SUBREG
18251 && MEM_P (SUBREG_REG (x))));
18252
18253 sri->icode = CODE_FOR_nothing;
18254 sri->extra_cost = 0;
18255 icode = ((in_p)
18256 ? reg_addr[mode].reload_load
18257 : reg_addr[mode].reload_store);
18258
18259 if (REG_P (x) || register_operand (x, mode))
18260 {
18261 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
18262 bool altivec_p = (rclass == ALTIVEC_REGS);
18263 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
18264
18265 if (!in_p)
18266 {
18267 enum rs6000_reg_type exchange = to_type;
18268 to_type = from_type;
18269 from_type = exchange;
18270 }
18271
18272 /* Can we do a direct move of some sort? */
18273 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
18274 altivec_p))
18275 {
18276 icode = (enum insn_code)sri->icode;
18277 default_p = false;
18278 done_p = true;
18279 ret = NO_REGS;
18280 }
18281 }
18282
18283 /* Make sure 0.0 is not reloaded or forced into memory. */
18284 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
18285 {
18286 ret = NO_REGS;
18287 default_p = false;
18288 done_p = true;
18289 }
18290
18291 /* If this is a scalar floating point value and we want to load it into the
18292 traditional Altivec registers, do it via a move via a traditional floating
18293 point register. Also make sure that non-zero constants use a FPR. */
18294 if (!done_p && reg_addr[mode].scalar_in_vmx_p
18295 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
18296 && (memory_p || (GET_CODE (x) == CONST_DOUBLE)))
18297 {
18298 ret = FLOAT_REGS;
18299 default_p = false;
18300 done_p = true;
18301 }
18302
18303 /* Handle reload of load/stores if we have reload helper functions. */
18304 if (!done_p && icode != CODE_FOR_nothing && memory_p)
18305 {
18306 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
18307 mode);
18308
18309 if (extra_cost >= 0)
18310 {
18311 done_p = true;
18312 ret = NO_REGS;
18313 if (extra_cost > 0)
18314 {
18315 sri->extra_cost = extra_cost;
18316 sri->icode = icode;
18317 }
18318 }
18319 }
18320
18321 /* Handle unaligned loads and stores of integer registers. */
18322 if (!done_p && TARGET_POWERPC64
18323 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
18324 && memory_p
18325 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
18326 {
18327 rtx addr = XEXP (x, 0);
18328 rtx off = address_offset (addr);
18329
18330 if (off != NULL_RTX)
18331 {
18332 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
18333 unsigned HOST_WIDE_INT offset = INTVAL (off);
18334
18335 /* We need a secondary reload when our legitimate_address_p
18336 says the address is good (as otherwise the entire address
18337 will be reloaded), and the offset is not a multiple of
18338 four or we have an address wrap. Address wrap will only
18339 occur for LO_SUMs since legitimate_offset_address_p
18340 rejects addresses for 16-byte mems that will wrap. */
18341 if (GET_CODE (addr) == LO_SUM
18342 ? (1 /* legitimate_address_p allows any offset for lo_sum */
18343 && ((offset & 3) != 0
18344 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
18345 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
18346 && (offset & 3) != 0))
18347 {
18348 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
18349 if (in_p)
18350 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
18351 : CODE_FOR_reload_di_load);
18352 else
18353 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
18354 : CODE_FOR_reload_di_store);
18355 sri->extra_cost = 2;
18356 ret = NO_REGS;
18357 done_p = true;
18358 }
18359 else
18360 default_p = true;
18361 }
18362 else
18363 default_p = true;
18364 }
18365
18366 if (!done_p && !TARGET_POWERPC64
18367 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
18368 && memory_p
18369 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
18370 {
18371 rtx addr = XEXP (x, 0);
18372 rtx off = address_offset (addr);
18373
18374 if (off != NULL_RTX)
18375 {
18376 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
18377 unsigned HOST_WIDE_INT offset = INTVAL (off);
18378
18379 /* We need a secondary reload when our legitimate_address_p
18380 says the address is good (as otherwise the entire address
18381 will be reloaded), and we have a wrap.
18382
18383 legitimate_lo_sum_address_p allows LO_SUM addresses to
18384 have any offset so test for wrap in the low 16 bits.
18385
18386 legitimate_offset_address_p checks for the range
18387 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
18388 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
18389 [0x7ff4,0x7fff] respectively, so test for the
18390 intersection of these ranges, [0x7ffc,0x7fff] and
18391 [0x7ff4,0x7ff7] respectively.
18392
18393 Note that the address we see here may have been
18394 manipulated by legitimize_reload_address. */
18395 if (GET_CODE (addr) == LO_SUM
18396 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
18397 : offset - (0x8000 - extra) < UNITS_PER_WORD)
18398 {
18399 if (in_p)
18400 sri->icode = CODE_FOR_reload_si_load;
18401 else
18402 sri->icode = CODE_FOR_reload_si_store;
18403 sri->extra_cost = 2;
18404 ret = NO_REGS;
18405 done_p = true;
18406 }
18407 else
18408 default_p = true;
18409 }
18410 else
18411 default_p = true;
18412 }
18413
18414 if (!done_p)
18415 default_p = true;
18416
18417 if (default_p)
18418 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
18419
18420 gcc_assert (ret != ALL_REGS);
18421
18422 if (TARGET_DEBUG_ADDR)
18423 {
18424 fprintf (stderr,
18425 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
18426 "mode = %s",
18427 reg_class_names[ret],
18428 in_p ? "true" : "false",
18429 reg_class_names[rclass],
18430 GET_MODE_NAME (mode));
18431
18432 if (reload_completed)
18433 fputs (", after reload", stderr);
18434
18435 if (!done_p)
18436 fputs (", done_p not set", stderr);
18437
18438 if (default_p)
18439 fputs (", default secondary reload", stderr);
18440
18441 if (sri->icode != CODE_FOR_nothing)
18442 fprintf (stderr, ", reload func = %s, extra cost = %d",
18443 insn_data[sri->icode].name, sri->extra_cost);
18444
18445 fputs ("\n", stderr);
18446 debug_rtx (x);
18447 }
18448
18449 return ret;
18450 }
18451
18452 /* Better tracing for rs6000_secondary_reload_inner. */
18453
18454 static void
18455 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
18456 bool store_p)
18457 {
18458 rtx set, clobber;
18459
18460 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
18461
18462 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
18463 store_p ? "store" : "load");
18464
18465 if (store_p)
18466 set = gen_rtx_SET (mem, reg);
18467 else
18468 set = gen_rtx_SET (reg, mem);
18469
18470 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
18471 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
18472 }
18473
18474 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
18475 ATTRIBUTE_NORETURN;
18476
18477 static void
18478 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
18479 bool store_p)
18480 {
18481 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
18482 gcc_unreachable ();
18483 }
18484
18485 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
18486 reload helper functions. These were identified in
18487 rs6000_secondary_reload_memory, and if reload decided to use the secondary
18488 reload, it calls the insns:
18489 reload_<RELOAD:mode>_<P:mptrsize>_store
18490 reload_<RELOAD:mode>_<P:mptrsize>_load
18491
18492 which in turn calls this function, to do whatever is necessary to create
18493 valid addresses. */
18494
18495 void
18496 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
18497 {
18498 int regno = true_regnum (reg);
18499 machine_mode mode = GET_MODE (reg);
18500 addr_mask_type addr_mask;
18501 rtx addr;
18502 rtx new_addr;
18503 rtx op_reg, op0, op1;
18504 rtx and_op;
18505 rtx cc_clobber;
18506 rtvec rv;
18507
18508 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER || !MEM_P (mem)
18509 || !base_reg_operand (scratch, GET_MODE (scratch)))
18510 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
18511
18512 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
18513 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
18514
18515 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
18516 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
18517
18518 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
18519 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
18520
18521 else
18522 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
18523
18524 /* Make sure the mode is valid in this register class. */
18525 if ((addr_mask & RELOAD_REG_VALID) == 0)
18526 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
18527
18528 if (TARGET_DEBUG_ADDR)
18529 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
18530
18531 new_addr = addr = XEXP (mem, 0);
18532 switch (GET_CODE (addr))
18533 {
18534 /* Does the register class support auto update forms for this mode? If
18535 not, do the update now. We don't need a scratch register, since the
18536 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
18537 case PRE_INC:
18538 case PRE_DEC:
18539 op_reg = XEXP (addr, 0);
18540 if (!base_reg_operand (op_reg, Pmode))
18541 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
18542
18543 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
18544 {
18545 emit_insn (gen_add2_insn (op_reg, GEN_INT (GET_MODE_SIZE (mode))));
18546 new_addr = op_reg;
18547 }
18548 break;
18549
18550 case PRE_MODIFY:
18551 op0 = XEXP (addr, 0);
18552 op1 = XEXP (addr, 1);
18553 if (!base_reg_operand (op0, Pmode)
18554 || GET_CODE (op1) != PLUS
18555 || !rtx_equal_p (op0, XEXP (op1, 0)))
18556 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
18557
18558 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
18559 {
18560 emit_insn (gen_rtx_SET (op0, op1));
18561 new_addr = reg;
18562 }
18563 break;
18564
18565 /* Do we need to simulate AND -16 to clear the bottom address bits used
18566 in VMX load/stores? */
18567 case AND:
18568 op0 = XEXP (addr, 0);
18569 op1 = XEXP (addr, 1);
18570 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
18571 {
18572 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
18573 op_reg = op0;
18574
18575 else if (GET_CODE (op1) == PLUS)
18576 {
18577 emit_insn (gen_rtx_SET (scratch, op1));
18578 op_reg = scratch;
18579 }
18580
18581 else
18582 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
18583
18584 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
18585 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
18586 rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
18587 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
18588 new_addr = scratch;
18589 }
18590 break;
18591
18592 /* If this is an indirect address, make sure it is a base register. */
18593 case REG:
18594 case SUBREG:
18595 if (!base_reg_operand (addr, GET_MODE (addr)))
18596 {
18597 emit_insn (gen_rtx_SET (scratch, addr));
18598 new_addr = scratch;
18599 }
18600 break;
18601
18602 /* If this is an indexed address, make sure the register class can handle
18603 indexed addresses for this mode. */
18604 case PLUS:
18605 op0 = XEXP (addr, 0);
18606 op1 = XEXP (addr, 1);
18607 if (!base_reg_operand (op0, Pmode))
18608 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
18609
18610 else if (int_reg_operand (op1, Pmode))
18611 {
18612 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
18613 {
18614 emit_insn (gen_rtx_SET (scratch, addr));
18615 new_addr = scratch;
18616 }
18617 }
18618
18619 /* Make sure the register class can handle offset addresses. */
18620 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
18621 {
18622 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
18623 {
18624 emit_insn (gen_rtx_SET (scratch, addr));
18625 new_addr = scratch;
18626 }
18627 }
18628
18629 else
18630 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
18631
18632 break;
18633
18634 case LO_SUM:
18635 op0 = XEXP (addr, 0);
18636 op1 = XEXP (addr, 1);
18637 if (!base_reg_operand (op0, Pmode))
18638 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
18639
18640 else if (int_reg_operand (op1, Pmode))
18641 {
18642 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
18643 {
18644 emit_insn (gen_rtx_SET (scratch, addr));
18645 new_addr = scratch;
18646 }
18647 }
18648
18649 /* Make sure the register class can handle offset addresses. */
18650 else if (legitimate_lo_sum_address_p (mode, addr, false))
18651 {
18652 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
18653 {
18654 emit_insn (gen_rtx_SET (scratch, addr));
18655 new_addr = scratch;
18656 }
18657 }
18658
18659 else
18660 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
18661
18662 break;
18663
18664 case SYMBOL_REF:
18665 case CONST:
18666 case LABEL_REF:
18667 rs6000_emit_move (scratch, addr, Pmode);
18668 new_addr = scratch;
18669 break;
18670
18671 default:
18672 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
18673 }
18674
18675 /* Adjust the address if it changed. */
18676 if (addr != new_addr)
18677 {
18678 mem = replace_equiv_address_nv (mem, new_addr);
18679 if (TARGET_DEBUG_ADDR)
18680 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
18681 }
18682
18683 /* Now create the move. */
18684 if (store_p)
18685 emit_insn (gen_rtx_SET (mem, reg));
18686 else
18687 emit_insn (gen_rtx_SET (reg, mem));
18688
18689 return;
18690 }
18691
18692 /* Convert reloads involving 64-bit gprs and misaligned offset
18693 addressing, or multiple 32-bit gprs and offsets that are too large,
18694 to use indirect addressing. */
18695
18696 void
18697 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
18698 {
18699 int regno = true_regnum (reg);
18700 enum reg_class rclass;
18701 rtx addr;
18702 rtx scratch_or_premodify = scratch;
18703
18704 if (TARGET_DEBUG_ADDR)
18705 {
18706 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
18707 store_p ? "store" : "load");
18708 fprintf (stderr, "reg:\n");
18709 debug_rtx (reg);
18710 fprintf (stderr, "mem:\n");
18711 debug_rtx (mem);
18712 fprintf (stderr, "scratch:\n");
18713 debug_rtx (scratch);
18714 }
18715
18716 gcc_assert (regno >= 0 && regno < FIRST_PSEUDO_REGISTER);
18717 gcc_assert (GET_CODE (mem) == MEM);
18718 rclass = REGNO_REG_CLASS (regno);
18719 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
18720 addr = XEXP (mem, 0);
18721
18722 if (GET_CODE (addr) == PRE_MODIFY)
18723 {
18724 gcc_assert (REG_P (XEXP (addr, 0))
18725 && GET_CODE (XEXP (addr, 1)) == PLUS
18726 && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0));
18727 scratch_or_premodify = XEXP (addr, 0);
18728 if (!HARD_REGISTER_P (scratch_or_premodify))
18729 /* If we have a pseudo here then reload will have arranged
18730 to have it replaced, but only in the original insn.
18731 Use the replacement here too. */
18732 scratch_or_premodify = find_replacement (&XEXP (addr, 0));
18733
18734 /* RTL emitted by rs6000_secondary_reload_gpr uses RTL
18735 expressions from the original insn, without unsharing them.
18736 Any RTL that points into the original insn will of course
18737 have register replacements applied. That is why we don't
18738 need to look for replacements under the PLUS. */
18739 addr = XEXP (addr, 1);
18740 }
18741 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
18742
18743 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
18744
18745 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
18746
18747 /* Now create the move. */
18748 if (store_p)
18749 emit_insn (gen_rtx_SET (mem, reg));
18750 else
18751 emit_insn (gen_rtx_SET (reg, mem));
18752
18753 return;
18754 }
18755
18756 /* Allocate a 64-bit stack slot to be used for copying SDmode values through if
18757 this function has any SDmode references. If we are on a power7 or later, we
18758 don't need the 64-bit stack slot since the LFIWZX and STIFWX instructions
18759 can load/store the value. */
18760
18761 static void
18762 rs6000_alloc_sdmode_stack_slot (void)
18763 {
18764 tree t;
18765 basic_block bb;
18766 gimple_stmt_iterator gsi;
18767
18768 gcc_assert (cfun->machine->sdmode_stack_slot == NULL_RTX);
18769 /* We use a different approach for dealing with the secondary
18770 memory in LRA. */
18771 if (ira_use_lra_p)
18772 return;
18773
18774 if (TARGET_NO_SDMODE_STACK)
18775 return;
18776
18777 FOR_EACH_BB_FN (bb, cfun)
18778 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
18779 {
18780 tree ret = walk_gimple_op (gsi_stmt (gsi), rs6000_check_sdmode, NULL);
18781 if (ret)
18782 {
18783 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
18784 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
18785 SDmode, 0);
18786 return;
18787 }
18788 }
18789
18790 /* Check for any SDmode parameters of the function. */
18791 for (t = DECL_ARGUMENTS (cfun->decl); t; t = DECL_CHAIN (t))
18792 {
18793 if (TREE_TYPE (t) == error_mark_node)
18794 continue;
18795
18796 if (TYPE_MODE (TREE_TYPE (t)) == SDmode
18797 || TYPE_MODE (DECL_ARG_TYPE (t)) == SDmode)
18798 {
18799 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
18800 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
18801 SDmode, 0);
18802 return;
18803 }
18804 }
18805 }
18806
18807 static void
18808 rs6000_instantiate_decls (void)
18809 {
18810 if (cfun->machine->sdmode_stack_slot != NULL_RTX)
18811 instantiate_decl_rtl (cfun->machine->sdmode_stack_slot);
18812 }
18813
18814 /* Given an rtx X being reloaded into a reg required to be
18815 in class CLASS, return the class of reg to actually use.
18816 In general this is just CLASS; but on some machines
18817 in some cases it is preferable to use a more restrictive class.
18818
18819 On the RS/6000, we have to return NO_REGS when we want to reload a
18820 floating-point CONST_DOUBLE to force it to be copied to memory.
18821
18822 We also don't want to reload integer values into floating-point
18823 registers if we can at all help it. In fact, this can
18824 cause reload to die, if it tries to generate a reload of CTR
18825 into a FP register and discovers it doesn't have the memory location
18826 required.
18827
18828 ??? Would it be a good idea to have reload do the converse, that is
18829 try to reload floating modes into FP registers if possible?
18830 */
18831
18832 static enum reg_class
18833 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
18834 {
18835 machine_mode mode = GET_MODE (x);
18836 bool is_constant = CONSTANT_P (x);
18837
18838 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
18839 the reloading of address expressions using PLUS into floating point
18840 registers. */
18841 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
18842 {
18843 if (is_constant)
18844 {
18845 /* Zero is always allowed in all VSX registers. */
18846 if (x == CONST0_RTX (mode))
18847 return rclass;
18848
18849 /* If this is a vector constant that can be formed with a few Altivec
18850 instructions, we want altivec registers. */
18851 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
18852 return ALTIVEC_REGS;
18853
18854 /* Force constant to memory. */
18855 return NO_REGS;
18856 }
18857
18858 /* If this is a scalar floating point value, prefer the traditional
18859 floating point registers so that we can use D-form (register+offset)
18860 addressing. */
18861 if (GET_MODE_SIZE (mode) < 16)
18862 return FLOAT_REGS;
18863
18864 /* Prefer the Altivec registers if Altivec is handling the vector
18865 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
18866 loads. */
18867 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
18868 || mode == V1TImode)
18869 return ALTIVEC_REGS;
18870
18871 return rclass;
18872 }
18873
18874 if (is_constant || GET_CODE (x) == PLUS)
18875 {
18876 if (reg_class_subset_p (GENERAL_REGS, rclass))
18877 return GENERAL_REGS;
18878 if (reg_class_subset_p (BASE_REGS, rclass))
18879 return BASE_REGS;
18880 return NO_REGS;
18881 }
18882
18883 if (GET_MODE_CLASS (mode) == MODE_INT && rclass == NON_SPECIAL_REGS)
18884 return GENERAL_REGS;
18885
18886 return rclass;
18887 }
18888
18889 /* Debug version of rs6000_preferred_reload_class. */
18890 static enum reg_class
18891 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
18892 {
18893 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
18894
18895 fprintf (stderr,
18896 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
18897 "mode = %s, x:\n",
18898 reg_class_names[ret], reg_class_names[rclass],
18899 GET_MODE_NAME (GET_MODE (x)));
18900 debug_rtx (x);
18901
18902 return ret;
18903 }
18904
18905 /* If we are copying between FP or AltiVec registers and anything else, we need
18906 a memory location. The exception is when we are targeting ppc64 and the
18907 move to/from fpr to gpr instructions are available. Also, under VSX, you
18908 can copy vector registers from the FP register set to the Altivec register
18909 set and vice versa. */
18910
18911 static bool
18912 rs6000_secondary_memory_needed (enum reg_class from_class,
18913 enum reg_class to_class,
18914 machine_mode mode)
18915 {
18916 enum rs6000_reg_type from_type, to_type;
18917 bool altivec_p = ((from_class == ALTIVEC_REGS)
18918 || (to_class == ALTIVEC_REGS));
18919
18920 /* If a simple/direct move is available, we don't need secondary memory */
18921 from_type = reg_class_to_reg_type[(int)from_class];
18922 to_type = reg_class_to_reg_type[(int)to_class];
18923
18924 if (rs6000_secondary_reload_move (to_type, from_type, mode,
18925 (secondary_reload_info *)0, altivec_p))
18926 return false;
18927
18928 /* If we have a floating point or vector register class, we need to use
18929 memory to transfer the data. */
18930 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
18931 return true;
18932
18933 return false;
18934 }
18935
18936 /* Debug version of rs6000_secondary_memory_needed. */
18937 static bool
18938 rs6000_debug_secondary_memory_needed (enum reg_class from_class,
18939 enum reg_class to_class,
18940 machine_mode mode)
18941 {
18942 bool ret = rs6000_secondary_memory_needed (from_class, to_class, mode);
18943
18944 fprintf (stderr,
18945 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
18946 "to_class = %s, mode = %s\n",
18947 ret ? "true" : "false",
18948 reg_class_names[from_class],
18949 reg_class_names[to_class],
18950 GET_MODE_NAME (mode));
18951
18952 return ret;
18953 }
18954
18955 /* Return the register class of a scratch register needed to copy IN into
18956 or out of a register in RCLASS in MODE. If it can be done directly,
18957 NO_REGS is returned. */
18958
18959 static enum reg_class
18960 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
18961 rtx in)
18962 {
18963 int regno;
18964
18965 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
18966 #if TARGET_MACHO
18967 && MACHOPIC_INDIRECT
18968 #endif
18969 ))
18970 {
18971 /* We cannot copy a symbolic operand directly into anything
18972 other than BASE_REGS for TARGET_ELF. So indicate that a
18973 register from BASE_REGS is needed as an intermediate
18974 register.
18975
18976 On Darwin, pic addresses require a load from memory, which
18977 needs a base register. */
18978 if (rclass != BASE_REGS
18979 && (GET_CODE (in) == SYMBOL_REF
18980 || GET_CODE (in) == HIGH
18981 || GET_CODE (in) == LABEL_REF
18982 || GET_CODE (in) == CONST))
18983 return BASE_REGS;
18984 }
18985
18986 if (GET_CODE (in) == REG)
18987 {
18988 regno = REGNO (in);
18989 if (regno >= FIRST_PSEUDO_REGISTER)
18990 {
18991 regno = true_regnum (in);
18992 if (regno >= FIRST_PSEUDO_REGISTER)
18993 regno = -1;
18994 }
18995 }
18996 else if (GET_CODE (in) == SUBREG)
18997 {
18998 regno = true_regnum (in);
18999 if (regno >= FIRST_PSEUDO_REGISTER)
19000 regno = -1;
19001 }
19002 else
19003 regno = -1;
19004
19005 /* If we have VSX register moves, prefer moving scalar values between
19006 Altivec registers and GPR by going via an FPR (and then via memory)
19007 instead of reloading the secondary memory address for Altivec moves. */
19008 if (TARGET_VSX
19009 && GET_MODE_SIZE (mode) < 16
19010 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
19011 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
19012 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
19013 && (regno >= 0 && INT_REGNO_P (regno)))))
19014 return FLOAT_REGS;
19015
19016 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
19017 into anything. */
19018 if (rclass == GENERAL_REGS || rclass == BASE_REGS
19019 || (regno >= 0 && INT_REGNO_P (regno)))
19020 return NO_REGS;
19021
19022 /* Constants, memory, and VSX registers can go into VSX registers (both the
19023 traditional floating point and the altivec registers). */
19024 if (rclass == VSX_REGS
19025 && (regno == -1 || VSX_REGNO_P (regno)))
19026 return NO_REGS;
19027
19028 /* Constants, memory, and FP registers can go into FP registers. */
19029 if ((regno == -1 || FP_REGNO_P (regno))
19030 && (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS))
19031 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
19032
19033 /* Memory, and AltiVec registers can go into AltiVec registers. */
19034 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
19035 && rclass == ALTIVEC_REGS)
19036 return NO_REGS;
19037
19038 /* We can copy among the CR registers. */
19039 if ((rclass == CR_REGS || rclass == CR0_REGS)
19040 && regno >= 0 && CR_REGNO_P (regno))
19041 return NO_REGS;
19042
19043 /* Otherwise, we need GENERAL_REGS. */
19044 return GENERAL_REGS;
19045 }
19046
19047 /* Debug version of rs6000_secondary_reload_class. */
19048 static enum reg_class
19049 rs6000_debug_secondary_reload_class (enum reg_class rclass,
19050 machine_mode mode, rtx in)
19051 {
19052 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
19053 fprintf (stderr,
19054 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
19055 "mode = %s, input rtx:\n",
19056 reg_class_names[ret], reg_class_names[rclass],
19057 GET_MODE_NAME (mode));
19058 debug_rtx (in);
19059
19060 return ret;
19061 }
19062
19063 /* Return nonzero if for CLASS a mode change from FROM to TO is invalid. */
19064
19065 static bool
19066 rs6000_cannot_change_mode_class (machine_mode from,
19067 machine_mode to,
19068 enum reg_class rclass)
19069 {
19070 unsigned from_size = GET_MODE_SIZE (from);
19071 unsigned to_size = GET_MODE_SIZE (to);
19072
19073 if (from_size != to_size)
19074 {
19075 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
19076
19077 if (reg_classes_intersect_p (xclass, rclass))
19078 {
19079 unsigned to_nregs = hard_regno_nregs[FIRST_FPR_REGNO][to];
19080 unsigned from_nregs = hard_regno_nregs[FIRST_FPR_REGNO][from];
19081 bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
19082 bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
19083
19084 /* Don't allow 64-bit types to overlap with 128-bit types that take a
19085 single register under VSX because the scalar part of the register
19086 is in the upper 64-bits, and not the lower 64-bits. Types like
19087 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
19088 IEEE floating point can't overlap, and neither can small
19089 values. */
19090
19091 if (to_float128_vector_p && from_float128_vector_p)
19092 return false;
19093
19094 else if (to_float128_vector_p || from_float128_vector_p)
19095 return true;
19096
19097 /* TDmode in floating-mode registers must always go into a register
19098 pair with the most significant word in the even-numbered register
19099 to match ISA requirements. In little-endian mode, this does not
19100 match subreg numbering, so we cannot allow subregs. */
19101 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
19102 return true;
19103
19104 if (from_size < 8 || to_size < 8)
19105 return true;
19106
19107 if (from_size == 8 && (8 * to_nregs) != to_size)
19108 return true;
19109
19110 if (to_size == 8 && (8 * from_nregs) != from_size)
19111 return true;
19112
19113 return false;
19114 }
19115 else
19116 return false;
19117 }
19118
19119 if (TARGET_E500_DOUBLE
19120 && ((((to) == DFmode) + ((from) == DFmode)) == 1
19121 || (((to) == TFmode) + ((from) == TFmode)) == 1
19122 || (((to) == IFmode) + ((from) == IFmode)) == 1
19123 || (((to) == KFmode) + ((from) == KFmode)) == 1
19124 || (((to) == DDmode) + ((from) == DDmode)) == 1
19125 || (((to) == TDmode) + ((from) == TDmode)) == 1
19126 || (((to) == DImode) + ((from) == DImode)) == 1))
19127 return true;
19128
19129 /* Since the VSX register set includes traditional floating point registers
19130 and altivec registers, just check for the size being different instead of
19131 trying to check whether the modes are vector modes. Otherwise it won't
19132 allow say DF and DI to change classes. For types like TFmode and TDmode
19133 that take 2 64-bit registers, rather than a single 128-bit register, don't
19134 allow subregs of those types to other 128 bit types. */
19135 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
19136 {
19137 unsigned num_regs = (from_size + 15) / 16;
19138 if (hard_regno_nregs[FIRST_FPR_REGNO][to] > num_regs
19139 || hard_regno_nregs[FIRST_FPR_REGNO][from] > num_regs)
19140 return true;
19141
19142 return (from_size != 8 && from_size != 16);
19143 }
19144
19145 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
19146 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
19147 return true;
19148
19149 if (TARGET_SPE && (SPE_VECTOR_MODE (from) + SPE_VECTOR_MODE (to)) == 1
19150 && reg_classes_intersect_p (GENERAL_REGS, rclass))
19151 return true;
19152
19153 return false;
19154 }
19155
19156 /* Debug version of rs6000_cannot_change_mode_class. */
19157 static bool
19158 rs6000_debug_cannot_change_mode_class (machine_mode from,
19159 machine_mode to,
19160 enum reg_class rclass)
19161 {
19162 bool ret = rs6000_cannot_change_mode_class (from, to, rclass);
19163
19164 fprintf (stderr,
19165 "rs6000_cannot_change_mode_class, return %s, from = %s, "
19166 "to = %s, rclass = %s\n",
19167 ret ? "true" : "false",
19168 GET_MODE_NAME (from), GET_MODE_NAME (to),
19169 reg_class_names[rclass]);
19170
19171 return ret;
19172 }
19173 \f
19174 /* Return a string to do a move operation of 128 bits of data. */
19175
19176 const char *
19177 rs6000_output_move_128bit (rtx operands[])
19178 {
19179 rtx dest = operands[0];
19180 rtx src = operands[1];
19181 machine_mode mode = GET_MODE (dest);
19182 int dest_regno;
19183 int src_regno;
19184 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
19185 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
19186
19187 if (REG_P (dest))
19188 {
19189 dest_regno = REGNO (dest);
19190 dest_gpr_p = INT_REGNO_P (dest_regno);
19191 dest_fp_p = FP_REGNO_P (dest_regno);
19192 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
19193 dest_vsx_p = dest_fp_p | dest_vmx_p;
19194 }
19195 else
19196 {
19197 dest_regno = -1;
19198 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
19199 }
19200
19201 if (REG_P (src))
19202 {
19203 src_regno = REGNO (src);
19204 src_gpr_p = INT_REGNO_P (src_regno);
19205 src_fp_p = FP_REGNO_P (src_regno);
19206 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
19207 src_vsx_p = src_fp_p | src_vmx_p;
19208 }
19209 else
19210 {
19211 src_regno = -1;
19212 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
19213 }
19214
19215 /* Register moves. */
19216 if (dest_regno >= 0 && src_regno >= 0)
19217 {
19218 if (dest_gpr_p)
19219 {
19220 if (src_gpr_p)
19221 return "#";
19222
19223 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
19224 return "#";
19225 }
19226
19227 else if (TARGET_VSX && dest_vsx_p)
19228 {
19229 if (src_vsx_p)
19230 return "xxlor %x0,%x1,%x1";
19231
19232 else if (TARGET_DIRECT_MOVE && src_gpr_p)
19233 return "#";
19234 }
19235
19236 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
19237 return "vor %0,%1,%1";
19238
19239 else if (dest_fp_p && src_fp_p)
19240 return "#";
19241 }
19242
19243 /* Loads. */
19244 else if (dest_regno >= 0 && MEM_P (src))
19245 {
19246 if (dest_gpr_p)
19247 {
19248 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
19249 return "lq %0,%1";
19250 else
19251 return "#";
19252 }
19253
19254 else if (TARGET_ALTIVEC && dest_vmx_p
19255 && altivec_indexed_or_indirect_operand (src, mode))
19256 return "lvx %0,%y1";
19257
19258 else if (TARGET_VSX && dest_vsx_p)
19259 {
19260 if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
19261 return "lxvw4x %x0,%y1";
19262 else
19263 return "lxvd2x %x0,%y1";
19264 }
19265
19266 else if (TARGET_ALTIVEC && dest_vmx_p)
19267 return "lvx %0,%y1";
19268
19269 else if (dest_fp_p)
19270 return "#";
19271 }
19272
19273 /* Stores. */
19274 else if (src_regno >= 0 && MEM_P (dest))
19275 {
19276 if (src_gpr_p)
19277 {
19278 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
19279 return "stq %1,%0";
19280 else
19281 return "#";
19282 }
19283
19284 else if (TARGET_ALTIVEC && src_vmx_p
19285 && altivec_indexed_or_indirect_operand (src, mode))
19286 return "stvx %1,%y0";
19287
19288 else if (TARGET_VSX && src_vsx_p)
19289 {
19290 if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
19291 return "stxvw4x %x1,%y0";
19292 else
19293 return "stxvd2x %x1,%y0";
19294 }
19295
19296 else if (TARGET_ALTIVEC && src_vmx_p)
19297 return "stvx %1,%y0";
19298
19299 else if (src_fp_p)
19300 return "#";
19301 }
19302
19303 /* Constants. */
19304 else if (dest_regno >= 0
19305 && (GET_CODE (src) == CONST_INT
19306 || GET_CODE (src) == CONST_WIDE_INT
19307 || GET_CODE (src) == CONST_DOUBLE
19308 || GET_CODE (src) == CONST_VECTOR))
19309 {
19310 if (dest_gpr_p)
19311 return "#";
19312
19313 else if (TARGET_VSX && dest_vsx_p && zero_constant (src, mode))
19314 return "xxlxor %x0,%x0,%x0";
19315
19316 else if (TARGET_ALTIVEC && dest_vmx_p)
19317 return output_vec_const_move (operands);
19318 }
19319
19320 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
19321 }
19322
19323 /* Validate a 128-bit move. */
19324 bool
19325 rs6000_move_128bit_ok_p (rtx operands[])
19326 {
19327 machine_mode mode = GET_MODE (operands[0]);
19328 return (gpc_reg_operand (operands[0], mode)
19329 || gpc_reg_operand (operands[1], mode));
19330 }
19331
19332 /* Return true if a 128-bit move needs to be split. */
19333 bool
19334 rs6000_split_128bit_ok_p (rtx operands[])
19335 {
19336 if (!reload_completed)
19337 return false;
19338
19339 if (!gpr_or_gpr_p (operands[0], operands[1]))
19340 return false;
19341
19342 if (quad_load_store_p (operands[0], operands[1]))
19343 return false;
19344
19345 return true;
19346 }
19347
19348 \f
19349 /* Given a comparison operation, return the bit number in CCR to test. We
19350 know this is a valid comparison.
19351
19352 SCC_P is 1 if this is for an scc. That means that %D will have been
19353 used instead of %C, so the bits will be in different places.
19354
19355 Return -1 if OP isn't a valid comparison for some reason. */
19356
19357 int
19358 ccr_bit (rtx op, int scc_p)
19359 {
19360 enum rtx_code code = GET_CODE (op);
19361 machine_mode cc_mode;
19362 int cc_regnum;
19363 int base_bit;
19364 rtx reg;
19365
19366 if (!COMPARISON_P (op))
19367 return -1;
19368
19369 reg = XEXP (op, 0);
19370
19371 gcc_assert (GET_CODE (reg) == REG && CR_REGNO_P (REGNO (reg)));
19372
19373 cc_mode = GET_MODE (reg);
19374 cc_regnum = REGNO (reg);
19375 base_bit = 4 * (cc_regnum - CR0_REGNO);
19376
19377 validate_condition_mode (code, cc_mode);
19378
19379 /* When generating a sCOND operation, only positive conditions are
19380 allowed. */
19381 gcc_assert (!scc_p
19382 || code == EQ || code == GT || code == LT || code == UNORDERED
19383 || code == GTU || code == LTU);
19384
19385 switch (code)
19386 {
19387 case NE:
19388 return scc_p ? base_bit + 3 : base_bit + 2;
19389 case EQ:
19390 return base_bit + 2;
19391 case GT: case GTU: case UNLE:
19392 return base_bit + 1;
19393 case LT: case LTU: case UNGE:
19394 return base_bit;
19395 case ORDERED: case UNORDERED:
19396 return base_bit + 3;
19397
19398 case GE: case GEU:
19399 /* If scc, we will have done a cror to put the bit in the
19400 unordered position. So test that bit. For integer, this is ! LT
19401 unless this is an scc insn. */
19402 return scc_p ? base_bit + 3 : base_bit;
19403
19404 case LE: case LEU:
19405 return scc_p ? base_bit + 3 : base_bit + 1;
19406
19407 default:
19408 gcc_unreachable ();
19409 }
19410 }
19411 \f
19412 /* Return the GOT register. */
19413
19414 rtx
19415 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
19416 {
19417 /* The second flow pass currently (June 1999) can't update
19418 regs_ever_live without disturbing other parts of the compiler, so
19419 update it here to make the prolog/epilogue code happy. */
19420 if (!can_create_pseudo_p ()
19421 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
19422 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
19423
19424 crtl->uses_pic_offset_table = 1;
19425
19426 return pic_offset_table_rtx;
19427 }
19428 \f
19429 static rs6000_stack_t stack_info;
19430
19431 /* Function to init struct machine_function.
19432 This will be called, via a pointer variable,
19433 from push_function_context. */
19434
19435 static struct machine_function *
19436 rs6000_init_machine_status (void)
19437 {
19438 stack_info.reload_completed = 0;
19439 return ggc_cleared_alloc<machine_function> ();
19440 }
19441 \f
19442 #define INT_P(X) (GET_CODE (X) == CONST_INT && GET_MODE (X) == VOIDmode)
19443
19444 /* Write out a function code label. */
19445
19446 void
19447 rs6000_output_function_entry (FILE *file, const char *fname)
19448 {
19449 if (fname[0] != '.')
19450 {
19451 switch (DEFAULT_ABI)
19452 {
19453 default:
19454 gcc_unreachable ();
19455
19456 case ABI_AIX:
19457 if (DOT_SYMBOLS)
19458 putc ('.', file);
19459 else
19460 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
19461 break;
19462
19463 case ABI_ELFv2:
19464 case ABI_V4:
19465 case ABI_DARWIN:
19466 break;
19467 }
19468 }
19469
19470 RS6000_OUTPUT_BASENAME (file, fname);
19471 }
19472
19473 /* Print an operand. Recognize special options, documented below. */
19474
19475 #if TARGET_ELF
19476 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
19477 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
19478 #else
19479 #define SMALL_DATA_RELOC "sda21"
19480 #define SMALL_DATA_REG 0
19481 #endif
19482
19483 void
19484 print_operand (FILE *file, rtx x, int code)
19485 {
19486 int i;
19487 unsigned HOST_WIDE_INT uval;
19488
19489 switch (code)
19490 {
19491 /* %a is output_address. */
19492
19493 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
19494 output_operand. */
19495
19496 case 'D':
19497 /* Like 'J' but get to the GT bit only. */
19498 gcc_assert (REG_P (x));
19499
19500 /* Bit 1 is GT bit. */
19501 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
19502
19503 /* Add one for shift count in rlinm for scc. */
19504 fprintf (file, "%d", i + 1);
19505 return;
19506
19507 case 'e':
19508 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
19509 if (! INT_P (x))
19510 {
19511 output_operand_lossage ("invalid %%e value");
19512 return;
19513 }
19514
19515 uval = INTVAL (x);
19516 if ((uval & 0xffff) == 0 && uval != 0)
19517 putc ('s', file);
19518 return;
19519
19520 case 'E':
19521 /* X is a CR register. Print the number of the EQ bit of the CR */
19522 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
19523 output_operand_lossage ("invalid %%E value");
19524 else
19525 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
19526 return;
19527
19528 case 'f':
19529 /* X is a CR register. Print the shift count needed to move it
19530 to the high-order four bits. */
19531 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
19532 output_operand_lossage ("invalid %%f value");
19533 else
19534 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
19535 return;
19536
19537 case 'F':
19538 /* Similar, but print the count for the rotate in the opposite
19539 direction. */
19540 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
19541 output_operand_lossage ("invalid %%F value");
19542 else
19543 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
19544 return;
19545
19546 case 'G':
19547 /* X is a constant integer. If it is negative, print "m",
19548 otherwise print "z". This is to make an aze or ame insn. */
19549 if (GET_CODE (x) != CONST_INT)
19550 output_operand_lossage ("invalid %%G value");
19551 else if (INTVAL (x) >= 0)
19552 putc ('z', file);
19553 else
19554 putc ('m', file);
19555 return;
19556
19557 case 'h':
19558 /* If constant, output low-order five bits. Otherwise, write
19559 normally. */
19560 if (INT_P (x))
19561 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
19562 else
19563 print_operand (file, x, 0);
19564 return;
19565
19566 case 'H':
19567 /* If constant, output low-order six bits. Otherwise, write
19568 normally. */
19569 if (INT_P (x))
19570 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
19571 else
19572 print_operand (file, x, 0);
19573 return;
19574
19575 case 'I':
19576 /* Print `i' if this is a constant, else nothing. */
19577 if (INT_P (x))
19578 putc ('i', file);
19579 return;
19580
19581 case 'j':
19582 /* Write the bit number in CCR for jump. */
19583 i = ccr_bit (x, 0);
19584 if (i == -1)
19585 output_operand_lossage ("invalid %%j code");
19586 else
19587 fprintf (file, "%d", i);
19588 return;
19589
19590 case 'J':
19591 /* Similar, but add one for shift count in rlinm for scc and pass
19592 scc flag to `ccr_bit'. */
19593 i = ccr_bit (x, 1);
19594 if (i == -1)
19595 output_operand_lossage ("invalid %%J code");
19596 else
19597 /* If we want bit 31, write a shift count of zero, not 32. */
19598 fprintf (file, "%d", i == 31 ? 0 : i + 1);
19599 return;
19600
19601 case 'k':
19602 /* X must be a constant. Write the 1's complement of the
19603 constant. */
19604 if (! INT_P (x))
19605 output_operand_lossage ("invalid %%k value");
19606 else
19607 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
19608 return;
19609
19610 case 'K':
19611 /* X must be a symbolic constant on ELF. Write an
19612 expression suitable for an 'addi' that adds in the low 16
19613 bits of the MEM. */
19614 if (GET_CODE (x) == CONST)
19615 {
19616 if (GET_CODE (XEXP (x, 0)) != PLUS
19617 || (GET_CODE (XEXP (XEXP (x, 0), 0)) != SYMBOL_REF
19618 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
19619 || GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
19620 output_operand_lossage ("invalid %%K value");
19621 }
19622 print_operand_address (file, x);
19623 fputs ("@l", file);
19624 return;
19625
19626 /* %l is output_asm_label. */
19627
19628 case 'L':
19629 /* Write second word of DImode or DFmode reference. Works on register
19630 or non-indexed memory only. */
19631 if (REG_P (x))
19632 fputs (reg_names[REGNO (x) + 1], file);
19633 else if (MEM_P (x))
19634 {
19635 machine_mode mode = GET_MODE (x);
19636 /* Handle possible auto-increment. Since it is pre-increment and
19637 we have already done it, we can just use an offset of word. */
19638 if (GET_CODE (XEXP (x, 0)) == PRE_INC
19639 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
19640 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
19641 UNITS_PER_WORD));
19642 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
19643 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
19644 UNITS_PER_WORD));
19645 else
19646 output_address (mode, XEXP (adjust_address_nv (x, SImode,
19647 UNITS_PER_WORD),
19648 0));
19649
19650 if (small_data_operand (x, GET_MODE (x)))
19651 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
19652 reg_names[SMALL_DATA_REG]);
19653 }
19654 return;
19655
19656 case 'N':
19657 /* Write the number of elements in the vector times 4. */
19658 if (GET_CODE (x) != PARALLEL)
19659 output_operand_lossage ("invalid %%N value");
19660 else
19661 fprintf (file, "%d", XVECLEN (x, 0) * 4);
19662 return;
19663
19664 case 'O':
19665 /* Similar, but subtract 1 first. */
19666 if (GET_CODE (x) != PARALLEL)
19667 output_operand_lossage ("invalid %%O value");
19668 else
19669 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
19670 return;
19671
19672 case 'p':
19673 /* X is a CONST_INT that is a power of two. Output the logarithm. */
19674 if (! INT_P (x)
19675 || INTVAL (x) < 0
19676 || (i = exact_log2 (INTVAL (x))) < 0)
19677 output_operand_lossage ("invalid %%p value");
19678 else
19679 fprintf (file, "%d", i);
19680 return;
19681
19682 case 'P':
19683 /* The operand must be an indirect memory reference. The result
19684 is the register name. */
19685 if (GET_CODE (x) != MEM || GET_CODE (XEXP (x, 0)) != REG
19686 || REGNO (XEXP (x, 0)) >= 32)
19687 output_operand_lossage ("invalid %%P value");
19688 else
19689 fputs (reg_names[REGNO (XEXP (x, 0))], file);
19690 return;
19691
19692 case 'q':
19693 /* This outputs the logical code corresponding to a boolean
19694 expression. The expression may have one or both operands
19695 negated (if one, only the first one). For condition register
19696 logical operations, it will also treat the negated
19697 CR codes as NOTs, but not handle NOTs of them. */
19698 {
19699 const char *const *t = 0;
19700 const char *s;
19701 enum rtx_code code = GET_CODE (x);
19702 static const char * const tbl[3][3] = {
19703 { "and", "andc", "nor" },
19704 { "or", "orc", "nand" },
19705 { "xor", "eqv", "xor" } };
19706
19707 if (code == AND)
19708 t = tbl[0];
19709 else if (code == IOR)
19710 t = tbl[1];
19711 else if (code == XOR)
19712 t = tbl[2];
19713 else
19714 output_operand_lossage ("invalid %%q value");
19715
19716 if (GET_CODE (XEXP (x, 0)) != NOT)
19717 s = t[0];
19718 else
19719 {
19720 if (GET_CODE (XEXP (x, 1)) == NOT)
19721 s = t[2];
19722 else
19723 s = t[1];
19724 }
19725
19726 fputs (s, file);
19727 }
19728 return;
19729
19730 case 'Q':
19731 if (! TARGET_MFCRF)
19732 return;
19733 fputc (',', file);
19734 /* FALLTHRU */
19735
19736 case 'R':
19737 /* X is a CR register. Print the mask for `mtcrf'. */
19738 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
19739 output_operand_lossage ("invalid %%R value");
19740 else
19741 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
19742 return;
19743
19744 case 't':
19745 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
19746 gcc_assert (REG_P (x) && GET_MODE (x) == CCmode);
19747
19748 /* Bit 3 is OV bit. */
19749 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
19750
19751 /* If we want bit 31, write a shift count of zero, not 32. */
19752 fprintf (file, "%d", i == 31 ? 0 : i + 1);
19753 return;
19754
19755 case 'T':
19756 /* Print the symbolic name of a branch target register. */
19757 if (GET_CODE (x) != REG || (REGNO (x) != LR_REGNO
19758 && REGNO (x) != CTR_REGNO))
19759 output_operand_lossage ("invalid %%T value");
19760 else if (REGNO (x) == LR_REGNO)
19761 fputs ("lr", file);
19762 else
19763 fputs ("ctr", file);
19764 return;
19765
19766 case 'u':
19767 /* High-order or low-order 16 bits of constant, whichever is non-zero,
19768 for use in unsigned operand. */
19769 if (! INT_P (x))
19770 {
19771 output_operand_lossage ("invalid %%u value");
19772 return;
19773 }
19774
19775 uval = INTVAL (x);
19776 if ((uval & 0xffff) == 0)
19777 uval >>= 16;
19778
19779 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
19780 return;
19781
19782 case 'v':
19783 /* High-order 16 bits of constant for use in signed operand. */
19784 if (! INT_P (x))
19785 output_operand_lossage ("invalid %%v value");
19786 else
19787 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
19788 (INTVAL (x) >> 16) & 0xffff);
19789 return;
19790
19791 case 'U':
19792 /* Print `u' if this has an auto-increment or auto-decrement. */
19793 if (MEM_P (x)
19794 && (GET_CODE (XEXP (x, 0)) == PRE_INC
19795 || GET_CODE (XEXP (x, 0)) == PRE_DEC
19796 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
19797 putc ('u', file);
19798 return;
19799
19800 case 'V':
19801 /* Print the trap code for this operand. */
19802 switch (GET_CODE (x))
19803 {
19804 case EQ:
19805 fputs ("eq", file); /* 4 */
19806 break;
19807 case NE:
19808 fputs ("ne", file); /* 24 */
19809 break;
19810 case LT:
19811 fputs ("lt", file); /* 16 */
19812 break;
19813 case LE:
19814 fputs ("le", file); /* 20 */
19815 break;
19816 case GT:
19817 fputs ("gt", file); /* 8 */
19818 break;
19819 case GE:
19820 fputs ("ge", file); /* 12 */
19821 break;
19822 case LTU:
19823 fputs ("llt", file); /* 2 */
19824 break;
19825 case LEU:
19826 fputs ("lle", file); /* 6 */
19827 break;
19828 case GTU:
19829 fputs ("lgt", file); /* 1 */
19830 break;
19831 case GEU:
19832 fputs ("lge", file); /* 5 */
19833 break;
19834 default:
19835 gcc_unreachable ();
19836 }
19837 break;
19838
19839 case 'w':
19840 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
19841 normally. */
19842 if (INT_P (x))
19843 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
19844 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
19845 else
19846 print_operand (file, x, 0);
19847 return;
19848
19849 case 'x':
19850 /* X is a FPR or Altivec register used in a VSX context. */
19851 if (GET_CODE (x) != REG || !VSX_REGNO_P (REGNO (x)))
19852 output_operand_lossage ("invalid %%x value");
19853 else
19854 {
19855 int reg = REGNO (x);
19856 int vsx_reg = (FP_REGNO_P (reg)
19857 ? reg - 32
19858 : reg - FIRST_ALTIVEC_REGNO + 32);
19859
19860 #ifdef TARGET_REGNAMES
19861 if (TARGET_REGNAMES)
19862 fprintf (file, "%%vs%d", vsx_reg);
19863 else
19864 #endif
19865 fprintf (file, "%d", vsx_reg);
19866 }
19867 return;
19868
19869 case 'X':
19870 if (MEM_P (x)
19871 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
19872 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
19873 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
19874 putc ('x', file);
19875 return;
19876
19877 case 'Y':
19878 /* Like 'L', for third word of TImode/PTImode */
19879 if (REG_P (x))
19880 fputs (reg_names[REGNO (x) + 2], file);
19881 else if (MEM_P (x))
19882 {
19883 machine_mode mode = GET_MODE (x);
19884 if (GET_CODE (XEXP (x, 0)) == PRE_INC
19885 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
19886 output_address (mode, plus_constant (Pmode,
19887 XEXP (XEXP (x, 0), 0), 8));
19888 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
19889 output_address (mode, plus_constant (Pmode,
19890 XEXP (XEXP (x, 0), 0), 8));
19891 else
19892 output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0));
19893 if (small_data_operand (x, GET_MODE (x)))
19894 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
19895 reg_names[SMALL_DATA_REG]);
19896 }
19897 return;
19898
19899 case 'z':
19900 /* X is a SYMBOL_REF. Write out the name preceded by a
19901 period and without any trailing data in brackets. Used for function
19902 names. If we are configured for System V (or the embedded ABI) on
19903 the PowerPC, do not emit the period, since those systems do not use
19904 TOCs and the like. */
19905 gcc_assert (GET_CODE (x) == SYMBOL_REF);
19906
19907 /* For macho, check to see if we need a stub. */
19908 if (TARGET_MACHO)
19909 {
19910 const char *name = XSTR (x, 0);
19911 #if TARGET_MACHO
19912 if (darwin_emit_branch_islands
19913 && MACHOPIC_INDIRECT
19914 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
19915 name = machopic_indirection_name (x, /*stub_p=*/true);
19916 #endif
19917 assemble_name (file, name);
19918 }
19919 else if (!DOT_SYMBOLS)
19920 assemble_name (file, XSTR (x, 0));
19921 else
19922 rs6000_output_function_entry (file, XSTR (x, 0));
19923 return;
19924
19925 case 'Z':
19926 /* Like 'L', for last word of TImode/PTImode. */
19927 if (REG_P (x))
19928 fputs (reg_names[REGNO (x) + 3], file);
19929 else if (MEM_P (x))
19930 {
19931 machine_mode mode = GET_MODE (x);
19932 if (GET_CODE (XEXP (x, 0)) == PRE_INC
19933 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
19934 output_address (mode, plus_constant (Pmode,
19935 XEXP (XEXP (x, 0), 0), 12));
19936 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
19937 output_address (mode, plus_constant (Pmode,
19938 XEXP (XEXP (x, 0), 0), 12));
19939 else
19940 output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0));
19941 if (small_data_operand (x, GET_MODE (x)))
19942 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
19943 reg_names[SMALL_DATA_REG]);
19944 }
19945 return;
19946
19947 /* Print AltiVec or SPE memory operand. */
19948 case 'y':
19949 {
19950 rtx tmp;
19951
19952 gcc_assert (MEM_P (x));
19953
19954 tmp = XEXP (x, 0);
19955
19956 /* Ugly hack because %y is overloaded. */
19957 if ((TARGET_SPE || TARGET_E500_DOUBLE)
19958 && (GET_MODE_SIZE (GET_MODE (x)) == 8
19959 || FLOAT128_2REG_P (GET_MODE (x))
19960 || GET_MODE (x) == TImode
19961 || GET_MODE (x) == PTImode))
19962 {
19963 /* Handle [reg]. */
19964 if (REG_P (tmp))
19965 {
19966 fprintf (file, "0(%s)", reg_names[REGNO (tmp)]);
19967 break;
19968 }
19969 /* Handle [reg+UIMM]. */
19970 else if (GET_CODE (tmp) == PLUS &&
19971 GET_CODE (XEXP (tmp, 1)) == CONST_INT)
19972 {
19973 int x;
19974
19975 gcc_assert (REG_P (XEXP (tmp, 0)));
19976
19977 x = INTVAL (XEXP (tmp, 1));
19978 fprintf (file, "%d(%s)", x, reg_names[REGNO (XEXP (tmp, 0))]);
19979 break;
19980 }
19981
19982 /* Fall through. Must be [reg+reg]. */
19983 }
19984 if (VECTOR_MEM_ALTIVEC_P (GET_MODE (x))
19985 && GET_CODE (tmp) == AND
19986 && GET_CODE (XEXP (tmp, 1)) == CONST_INT
19987 && INTVAL (XEXP (tmp, 1)) == -16)
19988 tmp = XEXP (tmp, 0);
19989 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
19990 && GET_CODE (tmp) == PRE_MODIFY)
19991 tmp = XEXP (tmp, 1);
19992 if (REG_P (tmp))
19993 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
19994 else
19995 {
19996 if (GET_CODE (tmp) != PLUS
19997 || !REG_P (XEXP (tmp, 0))
19998 || !REG_P (XEXP (tmp, 1)))
19999 {
20000 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
20001 break;
20002 }
20003
20004 if (REGNO (XEXP (tmp, 0)) == 0)
20005 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
20006 reg_names[ REGNO (XEXP (tmp, 0)) ]);
20007 else
20008 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
20009 reg_names[ REGNO (XEXP (tmp, 1)) ]);
20010 }
20011 break;
20012 }
20013
20014 case 0:
20015 if (REG_P (x))
20016 fprintf (file, "%s", reg_names[REGNO (x)]);
20017 else if (MEM_P (x))
20018 {
20019 /* We need to handle PRE_INC and PRE_DEC here, since we need to
20020 know the width from the mode. */
20021 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
20022 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
20023 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
20024 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
20025 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
20026 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
20027 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
20028 output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
20029 else
20030 output_address (GET_MODE (x), XEXP (x, 0));
20031 }
20032 else
20033 {
20034 if (toc_relative_expr_p (x, false))
20035 /* This hack along with a corresponding hack in
20036 rs6000_output_addr_const_extra arranges to output addends
20037 where the assembler expects to find them. eg.
20038 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
20039 without this hack would be output as "x@toc+4". We
20040 want "x+4@toc". */
20041 output_addr_const (file, CONST_CAST_RTX (tocrel_base));
20042 else
20043 output_addr_const (file, x);
20044 }
20045 return;
20046
20047 case '&':
20048 if (const char *name = get_some_local_dynamic_name ())
20049 assemble_name (file, name);
20050 else
20051 output_operand_lossage ("'%%&' used without any "
20052 "local dynamic TLS references");
20053 return;
20054
20055 default:
20056 output_operand_lossage ("invalid %%xn code");
20057 }
20058 }
20059 \f
20060 /* Print the address of an operand. */
20061
20062 void
20063 print_operand_address (FILE *file, rtx x)
20064 {
20065 if (REG_P (x))
20066 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
20067 else if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST
20068 || GET_CODE (x) == LABEL_REF)
20069 {
20070 output_addr_const (file, x);
20071 if (small_data_operand (x, GET_MODE (x)))
20072 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
20073 reg_names[SMALL_DATA_REG]);
20074 else
20075 gcc_assert (!TARGET_TOC);
20076 }
20077 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
20078 && REG_P (XEXP (x, 1)))
20079 {
20080 if (REGNO (XEXP (x, 0)) == 0)
20081 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
20082 reg_names[ REGNO (XEXP (x, 0)) ]);
20083 else
20084 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
20085 reg_names[ REGNO (XEXP (x, 1)) ]);
20086 }
20087 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
20088 && GET_CODE (XEXP (x, 1)) == CONST_INT)
20089 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
20090 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
20091 #if TARGET_MACHO
20092 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
20093 && CONSTANT_P (XEXP (x, 1)))
20094 {
20095 fprintf (file, "lo16(");
20096 output_addr_const (file, XEXP (x, 1));
20097 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
20098 }
20099 #endif
20100 #if TARGET_ELF
20101 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
20102 && CONSTANT_P (XEXP (x, 1)))
20103 {
20104 output_addr_const (file, XEXP (x, 1));
20105 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
20106 }
20107 #endif
20108 else if (toc_relative_expr_p (x, false))
20109 {
20110 /* This hack along with a corresponding hack in
20111 rs6000_output_addr_const_extra arranges to output addends
20112 where the assembler expects to find them. eg.
20113 (lo_sum (reg 9)
20114 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
20115 without this hack would be output as "x@toc+8@l(9)". We
20116 want "x+8@toc@l(9)". */
20117 output_addr_const (file, CONST_CAST_RTX (tocrel_base));
20118 if (GET_CODE (x) == LO_SUM)
20119 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
20120 else
20121 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base, 0, 1))]);
20122 }
20123 else
20124 gcc_unreachable ();
20125 }
20126 \f
20127 /* Implement TARGET_OUTPUT_ADDR_CONST_EXTRA. */
20128
20129 static bool
20130 rs6000_output_addr_const_extra (FILE *file, rtx x)
20131 {
20132 if (GET_CODE (x) == UNSPEC)
20133 switch (XINT (x, 1))
20134 {
20135 case UNSPEC_TOCREL:
20136 gcc_checking_assert (GET_CODE (XVECEXP (x, 0, 0)) == SYMBOL_REF
20137 && REG_P (XVECEXP (x, 0, 1))
20138 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
20139 output_addr_const (file, XVECEXP (x, 0, 0));
20140 if (x == tocrel_base && tocrel_offset != const0_rtx)
20141 {
20142 if (INTVAL (tocrel_offset) >= 0)
20143 fprintf (file, "+");
20144 output_addr_const (file, CONST_CAST_RTX (tocrel_offset));
20145 }
20146 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
20147 {
20148 putc ('-', file);
20149 assemble_name (file, toc_label_name);
20150 }
20151 else if (TARGET_ELF)
20152 fputs ("@toc", file);
20153 return true;
20154
20155 #if TARGET_MACHO
20156 case UNSPEC_MACHOPIC_OFFSET:
20157 output_addr_const (file, XVECEXP (x, 0, 0));
20158 putc ('-', file);
20159 machopic_output_function_base_name (file);
20160 return true;
20161 #endif
20162 }
20163 return false;
20164 }
20165 \f
20166 /* Target hook for assembling integer objects. The PowerPC version has
20167 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
20168 is defined. It also needs to handle DI-mode objects on 64-bit
20169 targets. */
20170
20171 static bool
20172 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
20173 {
20174 #ifdef RELOCATABLE_NEEDS_FIXUP
20175 /* Special handling for SI values. */
20176 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
20177 {
20178 static int recurse = 0;
20179
20180 /* For -mrelocatable, we mark all addresses that need to be fixed up in
20181 the .fixup section. Since the TOC section is already relocated, we
20182 don't need to mark it here. We used to skip the text section, but it
20183 should never be valid for relocated addresses to be placed in the text
20184 section. */
20185 if (TARGET_RELOCATABLE
20186 && in_section != toc_section
20187 && !recurse
20188 && !CONST_SCALAR_INT_P (x)
20189 && CONSTANT_P (x))
20190 {
20191 char buf[256];
20192
20193 recurse = 1;
20194 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
20195 fixuplabelno++;
20196 ASM_OUTPUT_LABEL (asm_out_file, buf);
20197 fprintf (asm_out_file, "\t.long\t(");
20198 output_addr_const (asm_out_file, x);
20199 fprintf (asm_out_file, ")@fixup\n");
20200 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
20201 ASM_OUTPUT_ALIGN (asm_out_file, 2);
20202 fprintf (asm_out_file, "\t.long\t");
20203 assemble_name (asm_out_file, buf);
20204 fprintf (asm_out_file, "\n\t.previous\n");
20205 recurse = 0;
20206 return true;
20207 }
20208 /* Remove initial .'s to turn a -mcall-aixdesc function
20209 address into the address of the descriptor, not the function
20210 itself. */
20211 else if (GET_CODE (x) == SYMBOL_REF
20212 && XSTR (x, 0)[0] == '.'
20213 && DEFAULT_ABI == ABI_AIX)
20214 {
20215 const char *name = XSTR (x, 0);
20216 while (*name == '.')
20217 name++;
20218
20219 fprintf (asm_out_file, "\t.long\t%s\n", name);
20220 return true;
20221 }
20222 }
20223 #endif /* RELOCATABLE_NEEDS_FIXUP */
20224 return default_assemble_integer (x, size, aligned_p);
20225 }
20226
20227 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
20228 /* Emit an assembler directive to set symbol visibility for DECL to
20229 VISIBILITY_TYPE. */
20230
20231 static void
20232 rs6000_assemble_visibility (tree decl, int vis)
20233 {
20234 if (TARGET_XCOFF)
20235 return;
20236
20237 /* Functions need to have their entry point symbol visibility set as
20238 well as their descriptor symbol visibility. */
20239 if (DEFAULT_ABI == ABI_AIX
20240 && DOT_SYMBOLS
20241 && TREE_CODE (decl) == FUNCTION_DECL)
20242 {
20243 static const char * const visibility_types[] = {
20244 NULL, "internal", "hidden", "protected"
20245 };
20246
20247 const char *name, *type;
20248
20249 name = ((* targetm.strip_name_encoding)
20250 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
20251 type = visibility_types[vis];
20252
20253 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
20254 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
20255 }
20256 else
20257 default_assemble_visibility (decl, vis);
20258 }
20259 #endif
20260 \f
20261 enum rtx_code
20262 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
20263 {
20264 /* Reversal of FP compares takes care -- an ordered compare
20265 becomes an unordered compare and vice versa. */
20266 if (mode == CCFPmode
20267 && (!flag_finite_math_only
20268 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
20269 || code == UNEQ || code == LTGT))
20270 return reverse_condition_maybe_unordered (code);
20271 else
20272 return reverse_condition (code);
20273 }
20274
20275 /* Generate a compare for CODE. Return a brand-new rtx that
20276 represents the result of the compare. */
20277
20278 static rtx
20279 rs6000_generate_compare (rtx cmp, machine_mode mode)
20280 {
20281 machine_mode comp_mode;
20282 rtx compare_result;
20283 enum rtx_code code = GET_CODE (cmp);
20284 rtx op0 = XEXP (cmp, 0);
20285 rtx op1 = XEXP (cmp, 1);
20286
20287 if (FLOAT_MODE_P (mode))
20288 comp_mode = CCFPmode;
20289 else if (code == GTU || code == LTU
20290 || code == GEU || code == LEU)
20291 comp_mode = CCUNSmode;
20292 else if ((code == EQ || code == NE)
20293 && unsigned_reg_p (op0)
20294 && (unsigned_reg_p (op1)
20295 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
20296 /* These are unsigned values, perhaps there will be a later
20297 ordering compare that can be shared with this one. */
20298 comp_mode = CCUNSmode;
20299 else
20300 comp_mode = CCmode;
20301
20302 /* If we have an unsigned compare, make sure we don't have a signed value as
20303 an immediate. */
20304 if (comp_mode == CCUNSmode && GET_CODE (op1) == CONST_INT
20305 && INTVAL (op1) < 0)
20306 {
20307 op0 = copy_rtx_if_shared (op0);
20308 op1 = force_reg (GET_MODE (op0), op1);
20309 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
20310 }
20311
20312 /* First, the compare. */
20313 compare_result = gen_reg_rtx (comp_mode);
20314
20315 /* E500 FP compare instructions on the GPRs. Yuck! */
20316 if ((!TARGET_FPRS && TARGET_HARD_FLOAT)
20317 && FLOAT_MODE_P (mode))
20318 {
20319 rtx cmp, or_result, compare_result2;
20320 machine_mode op_mode = GET_MODE (op0);
20321 bool reverse_p;
20322
20323 if (op_mode == VOIDmode)
20324 op_mode = GET_MODE (op1);
20325
20326 /* First reverse the condition codes that aren't directly supported. */
20327 switch (code)
20328 {
20329 case NE:
20330 case UNLT:
20331 case UNLE:
20332 case UNGT:
20333 case UNGE:
20334 code = reverse_condition_maybe_unordered (code);
20335 reverse_p = true;
20336 break;
20337
20338 case EQ:
20339 case LT:
20340 case LE:
20341 case GT:
20342 case GE:
20343 reverse_p = false;
20344 break;
20345
20346 default:
20347 gcc_unreachable ();
20348 }
20349
20350 /* The E500 FP compare instructions toggle the GT bit (CR bit 1) only.
20351 This explains the following mess. */
20352
20353 switch (code)
20354 {
20355 case EQ:
20356 switch (op_mode)
20357 {
20358 case SFmode:
20359 cmp = (flag_finite_math_only && !flag_trapping_math)
20360 ? gen_tstsfeq_gpr (compare_result, op0, op1)
20361 : gen_cmpsfeq_gpr (compare_result, op0, op1);
20362 break;
20363
20364 case DFmode:
20365 cmp = (flag_finite_math_only && !flag_trapping_math)
20366 ? gen_tstdfeq_gpr (compare_result, op0, op1)
20367 : gen_cmpdfeq_gpr (compare_result, op0, op1);
20368 break;
20369
20370 case TFmode:
20371 case IFmode:
20372 case KFmode:
20373 cmp = (flag_finite_math_only && !flag_trapping_math)
20374 ? gen_tsttfeq_gpr (compare_result, op0, op1)
20375 : gen_cmptfeq_gpr (compare_result, op0, op1);
20376 break;
20377
20378 default:
20379 gcc_unreachable ();
20380 }
20381 break;
20382
20383 case GT:
20384 case GE:
20385 switch (op_mode)
20386 {
20387 case SFmode:
20388 cmp = (flag_finite_math_only && !flag_trapping_math)
20389 ? gen_tstsfgt_gpr (compare_result, op0, op1)
20390 : gen_cmpsfgt_gpr (compare_result, op0, op1);
20391 break;
20392
20393 case DFmode:
20394 cmp = (flag_finite_math_only && !flag_trapping_math)
20395 ? gen_tstdfgt_gpr (compare_result, op0, op1)
20396 : gen_cmpdfgt_gpr (compare_result, op0, op1);
20397 break;
20398
20399 case TFmode:
20400 case IFmode:
20401 case KFmode:
20402 cmp = (flag_finite_math_only && !flag_trapping_math)
20403 ? gen_tsttfgt_gpr (compare_result, op0, op1)
20404 : gen_cmptfgt_gpr (compare_result, op0, op1);
20405 break;
20406
20407 default:
20408 gcc_unreachable ();
20409 }
20410 break;
20411
20412 case LT:
20413 case LE:
20414 switch (op_mode)
20415 {
20416 case SFmode:
20417 cmp = (flag_finite_math_only && !flag_trapping_math)
20418 ? gen_tstsflt_gpr (compare_result, op0, op1)
20419 : gen_cmpsflt_gpr (compare_result, op0, op1);
20420 break;
20421
20422 case DFmode:
20423 cmp = (flag_finite_math_only && !flag_trapping_math)
20424 ? gen_tstdflt_gpr (compare_result, op0, op1)
20425 : gen_cmpdflt_gpr (compare_result, op0, op1);
20426 break;
20427
20428 case TFmode:
20429 case IFmode:
20430 case KFmode:
20431 cmp = (flag_finite_math_only && !flag_trapping_math)
20432 ? gen_tsttflt_gpr (compare_result, op0, op1)
20433 : gen_cmptflt_gpr (compare_result, op0, op1);
20434 break;
20435
20436 default:
20437 gcc_unreachable ();
20438 }
20439 break;
20440
20441 default:
20442 gcc_unreachable ();
20443 }
20444
20445 /* Synthesize LE and GE from LT/GT || EQ. */
20446 if (code == LE || code == GE)
20447 {
20448 emit_insn (cmp);
20449
20450 compare_result2 = gen_reg_rtx (CCFPmode);
20451
20452 /* Do the EQ. */
20453 switch (op_mode)
20454 {
20455 case SFmode:
20456 cmp = (flag_finite_math_only && !flag_trapping_math)
20457 ? gen_tstsfeq_gpr (compare_result2, op0, op1)
20458 : gen_cmpsfeq_gpr (compare_result2, op0, op1);
20459 break;
20460
20461 case DFmode:
20462 cmp = (flag_finite_math_only && !flag_trapping_math)
20463 ? gen_tstdfeq_gpr (compare_result2, op0, op1)
20464 : gen_cmpdfeq_gpr (compare_result2, op0, op1);
20465 break;
20466
20467 case TFmode:
20468 case IFmode:
20469 case KFmode:
20470 cmp = (flag_finite_math_only && !flag_trapping_math)
20471 ? gen_tsttfeq_gpr (compare_result2, op0, op1)
20472 : gen_cmptfeq_gpr (compare_result2, op0, op1);
20473 break;
20474
20475 default:
20476 gcc_unreachable ();
20477 }
20478
20479 emit_insn (cmp);
20480
20481 /* OR them together. */
20482 or_result = gen_reg_rtx (CCFPmode);
20483 cmp = gen_e500_cr_ior_compare (or_result, compare_result,
20484 compare_result2);
20485 compare_result = or_result;
20486 }
20487
20488 code = reverse_p ? NE : EQ;
20489
20490 emit_insn (cmp);
20491 }
20492
20493 /* IEEE 128-bit support in VSX registers. The comparison functions
20494 (__cmpokf2 and __cmpukf2) returns 0..15 that is laid out the same way as
20495 the PowerPC CR register would for a normal floating point comparison from
20496 the fcmpo and fcmpu instructions. */
20497 else if (FLOAT128_IEEE_P (mode))
20498 {
20499 rtx and_reg = gen_reg_rtx (SImode);
20500 rtx dest = gen_reg_rtx (SImode);
20501 rtx libfunc = optab_libfunc (ucmp_optab, mode);
20502 HOST_WIDE_INT mask_value = 0;
20503
20504 /* Values that __cmpokf2/__cmpukf2 returns. */
20505 #define PPC_CMP_UNORDERED 0x1 /* isnan (a) || isnan (b). */
20506 #define PPC_CMP_EQUAL 0x2 /* a == b. */
20507 #define PPC_CMP_GREATER_THEN 0x4 /* a > b. */
20508 #define PPC_CMP_LESS_THEN 0x8 /* a < b. */
20509
20510 switch (code)
20511 {
20512 case EQ:
20513 mask_value = PPC_CMP_EQUAL;
20514 code = NE;
20515 break;
20516
20517 case NE:
20518 mask_value = PPC_CMP_EQUAL;
20519 code = EQ;
20520 break;
20521
20522 case GT:
20523 mask_value = PPC_CMP_GREATER_THEN;
20524 code = NE;
20525 break;
20526
20527 case GE:
20528 mask_value = PPC_CMP_GREATER_THEN | PPC_CMP_EQUAL;
20529 code = NE;
20530 break;
20531
20532 case LT:
20533 mask_value = PPC_CMP_LESS_THEN;
20534 code = NE;
20535 break;
20536
20537 case LE:
20538 mask_value = PPC_CMP_LESS_THEN | PPC_CMP_EQUAL;
20539 code = NE;
20540 break;
20541
20542 case UNLE:
20543 mask_value = PPC_CMP_GREATER_THEN;
20544 code = EQ;
20545 break;
20546
20547 case UNLT:
20548 mask_value = PPC_CMP_GREATER_THEN | PPC_CMP_EQUAL;
20549 code = EQ;
20550 break;
20551
20552 case UNGE:
20553 mask_value = PPC_CMP_LESS_THEN;
20554 code = EQ;
20555 break;
20556
20557 case UNGT:
20558 mask_value = PPC_CMP_LESS_THEN | PPC_CMP_EQUAL;
20559 code = EQ;
20560 break;
20561
20562 case UNEQ:
20563 mask_value = PPC_CMP_EQUAL | PPC_CMP_UNORDERED;
20564 code = NE;
20565
20566 case LTGT:
20567 mask_value = PPC_CMP_EQUAL | PPC_CMP_UNORDERED;
20568 code = EQ;
20569 break;
20570
20571 case UNORDERED:
20572 mask_value = PPC_CMP_UNORDERED;
20573 code = NE;
20574 break;
20575
20576 case ORDERED:
20577 mask_value = PPC_CMP_UNORDERED;
20578 code = EQ;
20579 break;
20580
20581 default:
20582 gcc_unreachable ();
20583 }
20584
20585 gcc_assert (mask_value != 0);
20586 and_reg = emit_library_call_value (libfunc, and_reg, LCT_CONST, SImode, 2,
20587 op0, mode, op1, mode);
20588
20589 emit_insn (gen_andsi3 (dest, and_reg, GEN_INT (mask_value)));
20590 compare_result = gen_reg_rtx (CCmode);
20591 comp_mode = CCmode;
20592
20593 emit_insn (gen_rtx_SET (compare_result,
20594 gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
20595 }
20596
20597 else
20598 {
20599 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
20600 CLOBBERs to match cmptf_internal2 pattern. */
20601 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
20602 && FLOAT128_IBM_P (GET_MODE (op0))
20603 && TARGET_HARD_FLOAT && TARGET_FPRS)
20604 emit_insn (gen_rtx_PARALLEL (VOIDmode,
20605 gen_rtvec (10,
20606 gen_rtx_SET (compare_result,
20607 gen_rtx_COMPARE (comp_mode, op0, op1)),
20608 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
20609 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
20610 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
20611 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
20612 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
20613 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
20614 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
20615 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
20616 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
20617 else if (GET_CODE (op1) == UNSPEC
20618 && XINT (op1, 1) == UNSPEC_SP_TEST)
20619 {
20620 rtx op1b = XVECEXP (op1, 0, 0);
20621 comp_mode = CCEQmode;
20622 compare_result = gen_reg_rtx (CCEQmode);
20623 if (TARGET_64BIT)
20624 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
20625 else
20626 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
20627 }
20628 else
20629 emit_insn (gen_rtx_SET (compare_result,
20630 gen_rtx_COMPARE (comp_mode, op0, op1)));
20631 }
20632
20633 /* Some kinds of FP comparisons need an OR operation;
20634 under flag_finite_math_only we don't bother. */
20635 if (FLOAT_MODE_P (mode)
20636 && !FLOAT128_IEEE_P (mode)
20637 && !flag_finite_math_only
20638 && !(TARGET_HARD_FLOAT && !TARGET_FPRS)
20639 && (code == LE || code == GE
20640 || code == UNEQ || code == LTGT
20641 || code == UNGT || code == UNLT))
20642 {
20643 enum rtx_code or1, or2;
20644 rtx or1_rtx, or2_rtx, compare2_rtx;
20645 rtx or_result = gen_reg_rtx (CCEQmode);
20646
20647 switch (code)
20648 {
20649 case LE: or1 = LT; or2 = EQ; break;
20650 case GE: or1 = GT; or2 = EQ; break;
20651 case UNEQ: or1 = UNORDERED; or2 = EQ; break;
20652 case LTGT: or1 = LT; or2 = GT; break;
20653 case UNGT: or1 = UNORDERED; or2 = GT; break;
20654 case UNLT: or1 = UNORDERED; or2 = LT; break;
20655 default: gcc_unreachable ();
20656 }
20657 validate_condition_mode (or1, comp_mode);
20658 validate_condition_mode (or2, comp_mode);
20659 or1_rtx = gen_rtx_fmt_ee (or1, SImode, compare_result, const0_rtx);
20660 or2_rtx = gen_rtx_fmt_ee (or2, SImode, compare_result, const0_rtx);
20661 compare2_rtx = gen_rtx_COMPARE (CCEQmode,
20662 gen_rtx_IOR (SImode, or1_rtx, or2_rtx),
20663 const_true_rtx);
20664 emit_insn (gen_rtx_SET (or_result, compare2_rtx));
20665
20666 compare_result = or_result;
20667 code = EQ;
20668 }
20669
20670 validate_condition_mode (code, GET_MODE (compare_result));
20671
20672 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
20673 }
20674
20675 \f
20676 /* Return the diagnostic message string if the binary operation OP is
20677 not permitted on TYPE1 and TYPE2, NULL otherwise. */
20678
20679 static const char*
20680 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED,
20681 const_tree type1,
20682 const_tree type2)
20683 {
20684 enum machine_mode mode1 = TYPE_MODE (type1);
20685 enum machine_mode mode2 = TYPE_MODE (type2);
20686
20687 /* For complex modes, use the inner type. */
20688 if (COMPLEX_MODE_P (mode1))
20689 mode1 = GET_MODE_INNER (mode1);
20690
20691 if (COMPLEX_MODE_P (mode2))
20692 mode2 = GET_MODE_INNER (mode2);
20693
20694 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
20695 double to intermix. */
20696 if (mode1 == mode2)
20697 return NULL;
20698
20699 if ((mode1 == KFmode && mode2 == IFmode)
20700 || (mode1 == IFmode && mode2 == KFmode))
20701 return N_("__float128 and __ibm128 cannot be used in the same expression");
20702
20703 if (TARGET_IEEEQUAD
20704 && ((mode1 == IFmode && mode2 == TFmode)
20705 || (mode1 == TFmode && mode2 == IFmode)))
20706 return N_("__ibm128 and long double cannot be used in the same expression");
20707
20708 if (!TARGET_IEEEQUAD
20709 && ((mode1 == KFmode && mode2 == TFmode)
20710 || (mode1 == TFmode && mode2 == KFmode)))
20711 return N_("__float128 and long double cannot be used in the same "
20712 "expression");
20713
20714 return NULL;
20715 }
20716
20717 \f
20718 /* Expand floating point conversion to/from __float128 and __ibm128. */
20719
20720 void
20721 rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
20722 {
20723 machine_mode dest_mode = GET_MODE (dest);
20724 machine_mode src_mode = GET_MODE (src);
20725 convert_optab cvt = unknown_optab;
20726 bool do_move = false;
20727 rtx libfunc = NULL_RTX;
20728 rtx dest2;
20729
20730 if (dest_mode == src_mode)
20731 gcc_unreachable ();
20732
20733 /* Eliminate memory operations. */
20734 if (MEM_P (src))
20735 src = force_reg (src_mode, src);
20736
20737 if (MEM_P (dest))
20738 {
20739 rtx tmp = gen_reg_rtx (dest_mode);
20740 rs6000_expand_float128_convert (tmp, src, unsigned_p);
20741 rs6000_emit_move (dest, tmp, dest_mode);
20742 return;
20743 }
20744
20745 /* Convert to IEEE 128-bit floating point. */
20746 if (FLOAT128_IEEE_P (dest_mode))
20747 {
20748 switch (src_mode)
20749 {
20750 case DFmode:
20751 cvt = sext_optab;
20752 break;
20753
20754 case SFmode:
20755 cvt = sext_optab;
20756 break;
20757
20758 case KFmode:
20759 case IFmode:
20760 case TFmode:
20761 if (FLOAT128_IBM_P (src_mode))
20762 cvt = sext_optab;
20763 else
20764 do_move = true;
20765 break;
20766
20767 case SImode:
20768 case DImode:
20769 cvt = (unsigned_p) ? ufloat_optab : sfloat_optab;
20770 break;
20771
20772 default:
20773 gcc_unreachable ();
20774 }
20775 }
20776
20777 /* Convert from IEEE 128-bit floating point. */
20778 else if (FLOAT128_IEEE_P (src_mode))
20779 {
20780 switch (dest_mode)
20781 {
20782 case DFmode:
20783 cvt = trunc_optab;
20784 break;
20785
20786 case SFmode:
20787 cvt = trunc_optab;
20788 break;
20789
20790 case KFmode:
20791 case IFmode:
20792 case TFmode:
20793 if (FLOAT128_IBM_P (dest_mode))
20794 cvt = trunc_optab;
20795 else
20796 do_move = true;
20797 break;
20798
20799 case SImode:
20800 case DImode:
20801 cvt = (unsigned_p) ? ufix_optab : sfix_optab;
20802 break;
20803
20804 default:
20805 gcc_unreachable ();
20806 }
20807 }
20808
20809 /* Both IBM format. */
20810 else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode))
20811 do_move = true;
20812
20813 else
20814 gcc_unreachable ();
20815
20816 /* Handle conversion between TFmode/KFmode. */
20817 if (do_move)
20818 emit_move_insn (dest, gen_lowpart (dest_mode, src));
20819
20820 /* Call an external function to do the conversion. */
20821 else if (cvt != unknown_optab)
20822 {
20823 libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
20824 gcc_assert (libfunc != NULL_RTX);
20825
20826 dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode, 1, src,
20827 src_mode);
20828
20829 gcc_assert (dest2 != NULL_RTX);
20830 if (!rtx_equal_p (dest, dest2))
20831 emit_move_insn (dest, dest2);
20832 }
20833
20834 else
20835 gcc_unreachable ();
20836
20837 return;
20838 }
20839
20840 \f
20841 /* Emit the RTL for an sISEL pattern. */
20842
20843 void
20844 rs6000_emit_sISEL (machine_mode mode ATTRIBUTE_UNUSED, rtx operands[])
20845 {
20846 rs6000_emit_int_cmove (operands[0], operands[1], const1_rtx, const0_rtx);
20847 }
20848
20849 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
20850 can be used as that dest register. Return the dest register. */
20851
20852 rtx
20853 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
20854 {
20855 if (op2 == const0_rtx)
20856 return op1;
20857
20858 if (GET_CODE (scratch) == SCRATCH)
20859 scratch = gen_reg_rtx (mode);
20860
20861 if (logical_operand (op2, mode))
20862 emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
20863 else
20864 emit_insn (gen_rtx_SET (scratch,
20865 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
20866
20867 return scratch;
20868 }
20869
20870 void
20871 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
20872 {
20873 rtx condition_rtx;
20874 machine_mode op_mode;
20875 enum rtx_code cond_code;
20876 rtx result = operands[0];
20877
20878 condition_rtx = rs6000_generate_compare (operands[1], mode);
20879 cond_code = GET_CODE (condition_rtx);
20880
20881 if (FLOAT_MODE_P (mode)
20882 && !TARGET_FPRS && TARGET_HARD_FLOAT)
20883 {
20884 rtx t;
20885
20886 PUT_MODE (condition_rtx, SImode);
20887 t = XEXP (condition_rtx, 0);
20888
20889 gcc_assert (cond_code == NE || cond_code == EQ);
20890
20891 if (cond_code == NE)
20892 emit_insn (gen_e500_flip_gt_bit (t, t));
20893
20894 emit_insn (gen_move_from_CR_gt_bit (result, t));
20895 return;
20896 }
20897
20898 if (cond_code == NE
20899 || cond_code == GE || cond_code == LE
20900 || cond_code == GEU || cond_code == LEU
20901 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
20902 {
20903 rtx not_result = gen_reg_rtx (CCEQmode);
20904 rtx not_op, rev_cond_rtx;
20905 machine_mode cc_mode;
20906
20907 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
20908
20909 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
20910 SImode, XEXP (condition_rtx, 0), const0_rtx);
20911 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
20912 emit_insn (gen_rtx_SET (not_result, not_op));
20913 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
20914 }
20915
20916 op_mode = GET_MODE (XEXP (operands[1], 0));
20917 if (op_mode == VOIDmode)
20918 op_mode = GET_MODE (XEXP (operands[1], 1));
20919
20920 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
20921 {
20922 PUT_MODE (condition_rtx, DImode);
20923 convert_move (result, condition_rtx, 0);
20924 }
20925 else
20926 {
20927 PUT_MODE (condition_rtx, SImode);
20928 emit_insn (gen_rtx_SET (result, condition_rtx));
20929 }
20930 }
20931
20932 /* Emit a branch of kind CODE to location LOC. */
20933
20934 void
20935 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
20936 {
20937 rtx condition_rtx, loc_ref;
20938
20939 condition_rtx = rs6000_generate_compare (operands[0], mode);
20940 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
20941 emit_jump_insn (gen_rtx_SET (pc_rtx,
20942 gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx,
20943 loc_ref, pc_rtx)));
20944 }
20945
20946 /* Return the string to output a conditional branch to LABEL, which is
20947 the operand template of the label, or NULL if the branch is really a
20948 conditional return.
20949
20950 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
20951 condition code register and its mode specifies what kind of
20952 comparison we made.
20953
20954 REVERSED is nonzero if we should reverse the sense of the comparison.
20955
20956 INSN is the insn. */
20957
20958 char *
20959 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
20960 {
20961 static char string[64];
20962 enum rtx_code code = GET_CODE (op);
20963 rtx cc_reg = XEXP (op, 0);
20964 machine_mode mode = GET_MODE (cc_reg);
20965 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
20966 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
20967 int really_reversed = reversed ^ need_longbranch;
20968 char *s = string;
20969 const char *ccode;
20970 const char *pred;
20971 rtx note;
20972
20973 validate_condition_mode (code, mode);
20974
20975 /* Work out which way this really branches. We could use
20976 reverse_condition_maybe_unordered here always but this
20977 makes the resulting assembler clearer. */
20978 if (really_reversed)
20979 {
20980 /* Reversal of FP compares takes care -- an ordered compare
20981 becomes an unordered compare and vice versa. */
20982 if (mode == CCFPmode)
20983 code = reverse_condition_maybe_unordered (code);
20984 else
20985 code = reverse_condition (code);
20986 }
20987
20988 if ((!TARGET_FPRS && TARGET_HARD_FLOAT) && mode == CCFPmode)
20989 {
20990 /* The efscmp/tst* instructions twiddle bit 2, which maps nicely
20991 to the GT bit. */
20992 switch (code)
20993 {
20994 case EQ:
20995 /* Opposite of GT. */
20996 code = GT;
20997 break;
20998
20999 case NE:
21000 code = UNLE;
21001 break;
21002
21003 default:
21004 gcc_unreachable ();
21005 }
21006 }
21007
21008 switch (code)
21009 {
21010 /* Not all of these are actually distinct opcodes, but
21011 we distinguish them for clarity of the resulting assembler. */
21012 case NE: case LTGT:
21013 ccode = "ne"; break;
21014 case EQ: case UNEQ:
21015 ccode = "eq"; break;
21016 case GE: case GEU:
21017 ccode = "ge"; break;
21018 case GT: case GTU: case UNGT:
21019 ccode = "gt"; break;
21020 case LE: case LEU:
21021 ccode = "le"; break;
21022 case LT: case LTU: case UNLT:
21023 ccode = "lt"; break;
21024 case UNORDERED: ccode = "un"; break;
21025 case ORDERED: ccode = "nu"; break;
21026 case UNGE: ccode = "nl"; break;
21027 case UNLE: ccode = "ng"; break;
21028 default:
21029 gcc_unreachable ();
21030 }
21031
21032 /* Maybe we have a guess as to how likely the branch is. */
21033 pred = "";
21034 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
21035 if (note != NULL_RTX)
21036 {
21037 /* PROB is the difference from 50%. */
21038 int prob = XINT (note, 0) - REG_BR_PROB_BASE / 2;
21039
21040 /* Only hint for highly probable/improbable branches on newer
21041 cpus as static prediction overrides processor dynamic
21042 prediction. For older cpus we may as well always hint, but
21043 assume not taken for branches that are very close to 50% as a
21044 mispredicted taken branch is more expensive than a
21045 mispredicted not-taken branch. */
21046 if (rs6000_always_hint
21047 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
21048 && br_prob_note_reliable_p (note)))
21049 {
21050 if (abs (prob) > REG_BR_PROB_BASE / 20
21051 && ((prob > 0) ^ need_longbranch))
21052 pred = "+";
21053 else
21054 pred = "-";
21055 }
21056 }
21057
21058 if (label == NULL)
21059 s += sprintf (s, "b%slr%s ", ccode, pred);
21060 else
21061 s += sprintf (s, "b%s%s ", ccode, pred);
21062
21063 /* We need to escape any '%' characters in the reg_names string.
21064 Assume they'd only be the first character.... */
21065 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
21066 *s++ = '%';
21067 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
21068
21069 if (label != NULL)
21070 {
21071 /* If the branch distance was too far, we may have to use an
21072 unconditional branch to go the distance. */
21073 if (need_longbranch)
21074 s += sprintf (s, ",$+8\n\tb %s", label);
21075 else
21076 s += sprintf (s, ",%s", label);
21077 }
21078
21079 return string;
21080 }
21081
21082 /* Return the string to flip the GT bit on a CR. */
21083 char *
21084 output_e500_flip_gt_bit (rtx dst, rtx src)
21085 {
21086 static char string[64];
21087 int a, b;
21088
21089 gcc_assert (GET_CODE (dst) == REG && CR_REGNO_P (REGNO (dst))
21090 && GET_CODE (src) == REG && CR_REGNO_P (REGNO (src)));
21091
21092 /* GT bit. */
21093 a = 4 * (REGNO (dst) - CR0_REGNO) + 1;
21094 b = 4 * (REGNO (src) - CR0_REGNO) + 1;
21095
21096 sprintf (string, "crnot %d,%d", a, b);
21097 return string;
21098 }
21099
21100 /* Return insn for VSX or Altivec comparisons. */
21101
21102 static rtx
21103 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
21104 {
21105 rtx mask;
21106 machine_mode mode = GET_MODE (op0);
21107
21108 switch (code)
21109 {
21110 default:
21111 break;
21112
21113 case GE:
21114 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
21115 return NULL_RTX;
21116
21117 case EQ:
21118 case GT:
21119 case GTU:
21120 case ORDERED:
21121 case UNORDERED:
21122 case UNEQ:
21123 case LTGT:
21124 mask = gen_reg_rtx (mode);
21125 emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
21126 return mask;
21127 }
21128
21129 return NULL_RTX;
21130 }
21131
21132 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
21133 DMODE is expected destination mode. This is a recursive function. */
21134
21135 static rtx
21136 rs6000_emit_vector_compare (enum rtx_code rcode,
21137 rtx op0, rtx op1,
21138 machine_mode dmode)
21139 {
21140 rtx mask;
21141 bool swap_operands = false;
21142 bool try_again = false;
21143
21144 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
21145 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
21146
21147 /* See if the comparison works as is. */
21148 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
21149 if (mask)
21150 return mask;
21151
21152 switch (rcode)
21153 {
21154 case LT:
21155 rcode = GT;
21156 swap_operands = true;
21157 try_again = true;
21158 break;
21159 case LTU:
21160 rcode = GTU;
21161 swap_operands = true;
21162 try_again = true;
21163 break;
21164 case NE:
21165 case UNLE:
21166 case UNLT:
21167 case UNGE:
21168 case UNGT:
21169 /* Invert condition and try again.
21170 e.g., A != B becomes ~(A==B). */
21171 {
21172 enum rtx_code rev_code;
21173 enum insn_code nor_code;
21174 rtx mask2;
21175
21176 rev_code = reverse_condition_maybe_unordered (rcode);
21177 if (rev_code == UNKNOWN)
21178 return NULL_RTX;
21179
21180 nor_code = optab_handler (one_cmpl_optab, dmode);
21181 if (nor_code == CODE_FOR_nothing)
21182 return NULL_RTX;
21183
21184 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
21185 if (!mask2)
21186 return NULL_RTX;
21187
21188 mask = gen_reg_rtx (dmode);
21189 emit_insn (GEN_FCN (nor_code) (mask, mask2));
21190 return mask;
21191 }
21192 break;
21193 case GE:
21194 case GEU:
21195 case LE:
21196 case LEU:
21197 /* Try GT/GTU/LT/LTU OR EQ */
21198 {
21199 rtx c_rtx, eq_rtx;
21200 enum insn_code ior_code;
21201 enum rtx_code new_code;
21202
21203 switch (rcode)
21204 {
21205 case GE:
21206 new_code = GT;
21207 break;
21208
21209 case GEU:
21210 new_code = GTU;
21211 break;
21212
21213 case LE:
21214 new_code = LT;
21215 break;
21216
21217 case LEU:
21218 new_code = LTU;
21219 break;
21220
21221 default:
21222 gcc_unreachable ();
21223 }
21224
21225 ior_code = optab_handler (ior_optab, dmode);
21226 if (ior_code == CODE_FOR_nothing)
21227 return NULL_RTX;
21228
21229 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
21230 if (!c_rtx)
21231 return NULL_RTX;
21232
21233 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
21234 if (!eq_rtx)
21235 return NULL_RTX;
21236
21237 mask = gen_reg_rtx (dmode);
21238 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
21239 return mask;
21240 }
21241 break;
21242 default:
21243 return NULL_RTX;
21244 }
21245
21246 if (try_again)
21247 {
21248 if (swap_operands)
21249 std::swap (op0, op1);
21250
21251 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
21252 if (mask)
21253 return mask;
21254 }
21255
21256 /* You only get two chances. */
21257 return NULL_RTX;
21258 }
21259
21260 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
21261 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
21262 operands for the relation operation COND. */
21263
21264 int
21265 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
21266 rtx cond, rtx cc_op0, rtx cc_op1)
21267 {
21268 machine_mode dest_mode = GET_MODE (dest);
21269 machine_mode mask_mode = GET_MODE (cc_op0);
21270 enum rtx_code rcode = GET_CODE (cond);
21271 machine_mode cc_mode = CCmode;
21272 rtx mask;
21273 rtx cond2;
21274 rtx tmp;
21275 bool invert_move = false;
21276
21277 if (VECTOR_UNIT_NONE_P (dest_mode))
21278 return 0;
21279
21280 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
21281 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
21282
21283 switch (rcode)
21284 {
21285 /* Swap operands if we can, and fall back to doing the operation as
21286 specified, and doing a NOR to invert the test. */
21287 case NE:
21288 case UNLE:
21289 case UNLT:
21290 case UNGE:
21291 case UNGT:
21292 /* Invert condition and try again.
21293 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
21294 invert_move = true;
21295 rcode = reverse_condition_maybe_unordered (rcode);
21296 if (rcode == UNKNOWN)
21297 return 0;
21298 break;
21299
21300 /* Mark unsigned tests with CCUNSmode. */
21301 case GTU:
21302 case GEU:
21303 case LTU:
21304 case LEU:
21305 cc_mode = CCUNSmode;
21306 break;
21307
21308 default:
21309 break;
21310 }
21311
21312 /* Get the vector mask for the given relational operations. */
21313 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
21314
21315 if (!mask)
21316 return 0;
21317
21318 if (invert_move)
21319 {
21320 tmp = op_true;
21321 op_true = op_false;
21322 op_false = tmp;
21323 }
21324
21325 cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
21326 CONST0_RTX (dest_mode));
21327 emit_insn (gen_rtx_SET (dest,
21328 gen_rtx_IF_THEN_ELSE (dest_mode,
21329 cond2,
21330 op_true,
21331 op_false)));
21332 return 1;
21333 }
21334
21335 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
21336 operands of the last comparison is nonzero/true, FALSE_COND if it
21337 is zero/false. Return 0 if the hardware has no such operation. */
21338
21339 int
21340 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
21341 {
21342 enum rtx_code code = GET_CODE (op);
21343 rtx op0 = XEXP (op, 0);
21344 rtx op1 = XEXP (op, 1);
21345 machine_mode compare_mode = GET_MODE (op0);
21346 machine_mode result_mode = GET_MODE (dest);
21347 rtx temp;
21348 bool is_against_zero;
21349
21350 /* These modes should always match. */
21351 if (GET_MODE (op1) != compare_mode
21352 /* In the isel case however, we can use a compare immediate, so
21353 op1 may be a small constant. */
21354 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
21355 return 0;
21356 if (GET_MODE (true_cond) != result_mode)
21357 return 0;
21358 if (GET_MODE (false_cond) != result_mode)
21359 return 0;
21360
21361 /* Don't allow using floating point comparisons for integer results for
21362 now. */
21363 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
21364 return 0;
21365
21366 /* First, work out if the hardware can do this at all, or
21367 if it's too slow.... */
21368 if (!FLOAT_MODE_P (compare_mode))
21369 {
21370 if (TARGET_ISEL)
21371 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
21372 return 0;
21373 }
21374 else if (TARGET_HARD_FLOAT && !TARGET_FPRS
21375 && SCALAR_FLOAT_MODE_P (compare_mode))
21376 return 0;
21377
21378 is_against_zero = op1 == CONST0_RTX (compare_mode);
21379
21380 /* A floating-point subtract might overflow, underflow, or produce
21381 an inexact result, thus changing the floating-point flags, so it
21382 can't be generated if we care about that. It's safe if one side
21383 of the construct is zero, since then no subtract will be
21384 generated. */
21385 if (SCALAR_FLOAT_MODE_P (compare_mode)
21386 && flag_trapping_math && ! is_against_zero)
21387 return 0;
21388
21389 /* Eliminate half of the comparisons by switching operands, this
21390 makes the remaining code simpler. */
21391 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
21392 || code == LTGT || code == LT || code == UNLE)
21393 {
21394 code = reverse_condition_maybe_unordered (code);
21395 temp = true_cond;
21396 true_cond = false_cond;
21397 false_cond = temp;
21398 }
21399
21400 /* UNEQ and LTGT take four instructions for a comparison with zero,
21401 it'll probably be faster to use a branch here too. */
21402 if (code == UNEQ && HONOR_NANS (compare_mode))
21403 return 0;
21404
21405 /* We're going to try to implement comparisons by performing
21406 a subtract, then comparing against zero. Unfortunately,
21407 Inf - Inf is NaN which is not zero, and so if we don't
21408 know that the operand is finite and the comparison
21409 would treat EQ different to UNORDERED, we can't do it. */
21410 if (HONOR_INFINITIES (compare_mode)
21411 && code != GT && code != UNGE
21412 && (GET_CODE (op1) != CONST_DOUBLE
21413 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1)))
21414 /* Constructs of the form (a OP b ? a : b) are safe. */
21415 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
21416 || (! rtx_equal_p (op0, true_cond)
21417 && ! rtx_equal_p (op1, true_cond))))
21418 return 0;
21419
21420 /* At this point we know we can use fsel. */
21421
21422 /* Reduce the comparison to a comparison against zero. */
21423 if (! is_against_zero)
21424 {
21425 temp = gen_reg_rtx (compare_mode);
21426 emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
21427 op0 = temp;
21428 op1 = CONST0_RTX (compare_mode);
21429 }
21430
21431 /* If we don't care about NaNs we can reduce some of the comparisons
21432 down to faster ones. */
21433 if (! HONOR_NANS (compare_mode))
21434 switch (code)
21435 {
21436 case GT:
21437 code = LE;
21438 temp = true_cond;
21439 true_cond = false_cond;
21440 false_cond = temp;
21441 break;
21442 case UNGE:
21443 code = GE;
21444 break;
21445 case UNEQ:
21446 code = EQ;
21447 break;
21448 default:
21449 break;
21450 }
21451
21452 /* Now, reduce everything down to a GE. */
21453 switch (code)
21454 {
21455 case GE:
21456 break;
21457
21458 case LE:
21459 temp = gen_reg_rtx (compare_mode);
21460 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
21461 op0 = temp;
21462 break;
21463
21464 case ORDERED:
21465 temp = gen_reg_rtx (compare_mode);
21466 emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
21467 op0 = temp;
21468 break;
21469
21470 case EQ:
21471 temp = gen_reg_rtx (compare_mode);
21472 emit_insn (gen_rtx_SET (temp,
21473 gen_rtx_NEG (compare_mode,
21474 gen_rtx_ABS (compare_mode, op0))));
21475 op0 = temp;
21476 break;
21477
21478 case UNGE:
21479 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
21480 temp = gen_reg_rtx (result_mode);
21481 emit_insn (gen_rtx_SET (temp,
21482 gen_rtx_IF_THEN_ELSE (result_mode,
21483 gen_rtx_GE (VOIDmode,
21484 op0, op1),
21485 true_cond, false_cond)));
21486 false_cond = true_cond;
21487 true_cond = temp;
21488
21489 temp = gen_reg_rtx (compare_mode);
21490 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
21491 op0 = temp;
21492 break;
21493
21494 case GT:
21495 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
21496 temp = gen_reg_rtx (result_mode);
21497 emit_insn (gen_rtx_SET (temp,
21498 gen_rtx_IF_THEN_ELSE (result_mode,
21499 gen_rtx_GE (VOIDmode,
21500 op0, op1),
21501 true_cond, false_cond)));
21502 true_cond = false_cond;
21503 false_cond = temp;
21504
21505 temp = gen_reg_rtx (compare_mode);
21506 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
21507 op0 = temp;
21508 break;
21509
21510 default:
21511 gcc_unreachable ();
21512 }
21513
21514 emit_insn (gen_rtx_SET (dest,
21515 gen_rtx_IF_THEN_ELSE (result_mode,
21516 gen_rtx_GE (VOIDmode,
21517 op0, op1),
21518 true_cond, false_cond)));
21519 return 1;
21520 }
21521
21522 /* Same as above, but for ints (isel). */
21523
21524 static int
21525 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
21526 {
21527 rtx condition_rtx, cr;
21528 machine_mode mode = GET_MODE (dest);
21529 enum rtx_code cond_code;
21530 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
21531 bool signedp;
21532
21533 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
21534 return 0;
21535
21536 /* We still have to do the compare, because isel doesn't do a
21537 compare, it just looks at the CRx bits set by a previous compare
21538 instruction. */
21539 condition_rtx = rs6000_generate_compare (op, mode);
21540 cond_code = GET_CODE (condition_rtx);
21541 cr = XEXP (condition_rtx, 0);
21542 signedp = GET_MODE (cr) == CCmode;
21543
21544 isel_func = (mode == SImode
21545 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
21546 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
21547
21548 switch (cond_code)
21549 {
21550 case LT: case GT: case LTU: case GTU: case EQ:
21551 /* isel handles these directly. */
21552 break;
21553
21554 default:
21555 /* We need to swap the sense of the comparison. */
21556 {
21557 std::swap (false_cond, true_cond);
21558 PUT_CODE (condition_rtx, reverse_condition (cond_code));
21559 }
21560 break;
21561 }
21562
21563 false_cond = force_reg (mode, false_cond);
21564 if (true_cond != const0_rtx)
21565 true_cond = force_reg (mode, true_cond);
21566
21567 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
21568
21569 return 1;
21570 }
21571
21572 const char *
21573 output_isel (rtx *operands)
21574 {
21575 enum rtx_code code;
21576
21577 code = GET_CODE (operands[1]);
21578
21579 if (code == GE || code == GEU || code == LE || code == LEU || code == NE)
21580 {
21581 gcc_assert (GET_CODE (operands[2]) == REG
21582 && GET_CODE (operands[3]) == REG);
21583 PUT_CODE (operands[1], reverse_condition (code));
21584 return "isel %0,%3,%2,%j1";
21585 }
21586
21587 return "isel %0,%2,%3,%j1";
21588 }
21589
21590 void
21591 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
21592 {
21593 machine_mode mode = GET_MODE (op0);
21594 enum rtx_code c;
21595 rtx target;
21596
21597 /* VSX/altivec have direct min/max insns. */
21598 if ((code == SMAX || code == SMIN)
21599 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
21600 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))))
21601 {
21602 emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
21603 return;
21604 }
21605
21606 if (code == SMAX || code == SMIN)
21607 c = GE;
21608 else
21609 c = GEU;
21610
21611 if (code == SMAX || code == UMAX)
21612 target = emit_conditional_move (dest, c, op0, op1, mode,
21613 op0, op1, mode, 0);
21614 else
21615 target = emit_conditional_move (dest, c, op0, op1, mode,
21616 op1, op0, mode, 0);
21617 gcc_assert (target);
21618 if (target != dest)
21619 emit_move_insn (dest, target);
21620 }
21621
21622 /* A subroutine of the atomic operation splitters. Jump to LABEL if
21623 COND is true. Mark the jump as unlikely to be taken. */
21624
21625 static void
21626 emit_unlikely_jump (rtx cond, rtx label)
21627 {
21628 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
21629 rtx x;
21630
21631 x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
21632 x = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
21633 add_int_reg_note (x, REG_BR_PROB, very_unlikely);
21634 }
21635
21636 /* A subroutine of the atomic operation splitters. Emit a load-locked
21637 instruction in MODE. For QI/HImode, possibly use a pattern than includes
21638 the zero_extend operation. */
21639
21640 static void
21641 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
21642 {
21643 rtx (*fn) (rtx, rtx) = NULL;
21644
21645 switch (mode)
21646 {
21647 case QImode:
21648 fn = gen_load_lockedqi;
21649 break;
21650 case HImode:
21651 fn = gen_load_lockedhi;
21652 break;
21653 case SImode:
21654 if (GET_MODE (mem) == QImode)
21655 fn = gen_load_lockedqi_si;
21656 else if (GET_MODE (mem) == HImode)
21657 fn = gen_load_lockedhi_si;
21658 else
21659 fn = gen_load_lockedsi;
21660 break;
21661 case DImode:
21662 fn = gen_load_lockeddi;
21663 break;
21664 case TImode:
21665 fn = gen_load_lockedti;
21666 break;
21667 default:
21668 gcc_unreachable ();
21669 }
21670 emit_insn (fn (reg, mem));
21671 }
21672
21673 /* A subroutine of the atomic operation splitters. Emit a store-conditional
21674 instruction in MODE. */
21675
21676 static void
21677 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
21678 {
21679 rtx (*fn) (rtx, rtx, rtx) = NULL;
21680
21681 switch (mode)
21682 {
21683 case QImode:
21684 fn = gen_store_conditionalqi;
21685 break;
21686 case HImode:
21687 fn = gen_store_conditionalhi;
21688 break;
21689 case SImode:
21690 fn = gen_store_conditionalsi;
21691 break;
21692 case DImode:
21693 fn = gen_store_conditionaldi;
21694 break;
21695 case TImode:
21696 fn = gen_store_conditionalti;
21697 break;
21698 default:
21699 gcc_unreachable ();
21700 }
21701
21702 /* Emit sync before stwcx. to address PPC405 Erratum. */
21703 if (PPC405_ERRATUM77)
21704 emit_insn (gen_hwsync ());
21705
21706 emit_insn (fn (res, mem, val));
21707 }
21708
21709 /* Expand barriers before and after a load_locked/store_cond sequence. */
21710
21711 static rtx
21712 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
21713 {
21714 rtx addr = XEXP (mem, 0);
21715 int strict_p = (reload_in_progress || reload_completed);
21716
21717 if (!legitimate_indirect_address_p (addr, strict_p)
21718 && !legitimate_indexed_address_p (addr, strict_p))
21719 {
21720 addr = force_reg (Pmode, addr);
21721 mem = replace_equiv_address_nv (mem, addr);
21722 }
21723
21724 switch (model)
21725 {
21726 case MEMMODEL_RELAXED:
21727 case MEMMODEL_CONSUME:
21728 case MEMMODEL_ACQUIRE:
21729 break;
21730 case MEMMODEL_RELEASE:
21731 case MEMMODEL_ACQ_REL:
21732 emit_insn (gen_lwsync ());
21733 break;
21734 case MEMMODEL_SEQ_CST:
21735 emit_insn (gen_hwsync ());
21736 break;
21737 default:
21738 gcc_unreachable ();
21739 }
21740 return mem;
21741 }
21742
21743 static void
21744 rs6000_post_atomic_barrier (enum memmodel model)
21745 {
21746 switch (model)
21747 {
21748 case MEMMODEL_RELAXED:
21749 case MEMMODEL_CONSUME:
21750 case MEMMODEL_RELEASE:
21751 break;
21752 case MEMMODEL_ACQUIRE:
21753 case MEMMODEL_ACQ_REL:
21754 case MEMMODEL_SEQ_CST:
21755 emit_insn (gen_isync ());
21756 break;
21757 default:
21758 gcc_unreachable ();
21759 }
21760 }
21761
21762 /* A subroutine of the various atomic expanders. For sub-word operations,
21763 we must adjust things to operate on SImode. Given the original MEM,
21764 return a new aligned memory. Also build and return the quantities by
21765 which to shift and mask. */
21766
21767 static rtx
21768 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
21769 {
21770 rtx addr, align, shift, mask, mem;
21771 HOST_WIDE_INT shift_mask;
21772 machine_mode mode = GET_MODE (orig_mem);
21773
21774 /* For smaller modes, we have to implement this via SImode. */
21775 shift_mask = (mode == QImode ? 0x18 : 0x10);
21776
21777 addr = XEXP (orig_mem, 0);
21778 addr = force_reg (GET_MODE (addr), addr);
21779
21780 /* Aligned memory containing subword. Generate a new memory. We
21781 do not want any of the existing MEM_ATTR data, as we're now
21782 accessing memory outside the original object. */
21783 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
21784 NULL_RTX, 1, OPTAB_LIB_WIDEN);
21785 mem = gen_rtx_MEM (SImode, align);
21786 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
21787 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
21788 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
21789
21790 /* Shift amount for subword relative to aligned word. */
21791 shift = gen_reg_rtx (SImode);
21792 addr = gen_lowpart (SImode, addr);
21793 rtx tmp = gen_reg_rtx (SImode);
21794 emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
21795 emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
21796 if (BYTES_BIG_ENDIAN)
21797 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
21798 shift, 1, OPTAB_LIB_WIDEN);
21799 *pshift = shift;
21800
21801 /* Mask for insertion. */
21802 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
21803 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
21804 *pmask = mask;
21805
21806 return mem;
21807 }
21808
21809 /* A subroutine of the various atomic expanders. For sub-word operands,
21810 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
21811
21812 static rtx
21813 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
21814 {
21815 rtx x;
21816
21817 x = gen_reg_rtx (SImode);
21818 emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
21819 gen_rtx_NOT (SImode, mask),
21820 oldval)));
21821
21822 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
21823
21824 return x;
21825 }
21826
21827 /* A subroutine of the various atomic expanders. For sub-word operands,
21828 extract WIDE to NARROW via SHIFT. */
21829
21830 static void
21831 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
21832 {
21833 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
21834 wide, 1, OPTAB_LIB_WIDEN);
21835 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
21836 }
21837
21838 /* Expand an atomic compare and swap operation. */
21839
21840 void
21841 rs6000_expand_atomic_compare_and_swap (rtx operands[])
21842 {
21843 rtx boolval, retval, mem, oldval, newval, cond;
21844 rtx label1, label2, x, mask, shift;
21845 machine_mode mode, orig_mode;
21846 enum memmodel mod_s, mod_f;
21847 bool is_weak;
21848
21849 boolval = operands[0];
21850 retval = operands[1];
21851 mem = operands[2];
21852 oldval = operands[3];
21853 newval = operands[4];
21854 is_weak = (INTVAL (operands[5]) != 0);
21855 mod_s = memmodel_base (INTVAL (operands[6]));
21856 mod_f = memmodel_base (INTVAL (operands[7]));
21857 orig_mode = mode = GET_MODE (mem);
21858
21859 mask = shift = NULL_RTX;
21860 if (mode == QImode || mode == HImode)
21861 {
21862 /* Before power8, we didn't have access to lbarx/lharx, so generate a
21863 lwarx and shift/mask operations. With power8, we need to do the
21864 comparison in SImode, but the store is still done in QI/HImode. */
21865 oldval = convert_modes (SImode, mode, oldval, 1);
21866
21867 if (!TARGET_SYNC_HI_QI)
21868 {
21869 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
21870
21871 /* Shift and mask OLDVAL into position with the word. */
21872 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
21873 NULL_RTX, 1, OPTAB_LIB_WIDEN);
21874
21875 /* Shift and mask NEWVAL into position within the word. */
21876 newval = convert_modes (SImode, mode, newval, 1);
21877 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
21878 NULL_RTX, 1, OPTAB_LIB_WIDEN);
21879 }
21880
21881 /* Prepare to adjust the return value. */
21882 retval = gen_reg_rtx (SImode);
21883 mode = SImode;
21884 }
21885 else if (reg_overlap_mentioned_p (retval, oldval))
21886 oldval = copy_to_reg (oldval);
21887
21888 mem = rs6000_pre_atomic_barrier (mem, mod_s);
21889
21890 label1 = NULL_RTX;
21891 if (!is_weak)
21892 {
21893 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
21894 emit_label (XEXP (label1, 0));
21895 }
21896 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
21897
21898 emit_load_locked (mode, retval, mem);
21899
21900 x = retval;
21901 if (mask)
21902 {
21903 x = expand_simple_binop (SImode, AND, retval, mask,
21904 NULL_RTX, 1, OPTAB_LIB_WIDEN);
21905 }
21906
21907 cond = gen_reg_rtx (CCmode);
21908 /* If we have TImode, synthesize a comparison. */
21909 if (mode != TImode)
21910 x = gen_rtx_COMPARE (CCmode, x, oldval);
21911 else
21912 {
21913 rtx xor1_result = gen_reg_rtx (DImode);
21914 rtx xor2_result = gen_reg_rtx (DImode);
21915 rtx or_result = gen_reg_rtx (DImode);
21916 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
21917 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
21918 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
21919 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
21920
21921 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
21922 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
21923 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
21924 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
21925 }
21926
21927 emit_insn (gen_rtx_SET (cond, x));
21928
21929 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
21930 emit_unlikely_jump (x, label2);
21931
21932 x = newval;
21933 if (mask)
21934 x = rs6000_mask_atomic_subword (retval, newval, mask);
21935
21936 emit_store_conditional (orig_mode, cond, mem, x);
21937
21938 if (!is_weak)
21939 {
21940 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
21941 emit_unlikely_jump (x, label1);
21942 }
21943
21944 if (!is_mm_relaxed (mod_f))
21945 emit_label (XEXP (label2, 0));
21946
21947 rs6000_post_atomic_barrier (mod_s);
21948
21949 if (is_mm_relaxed (mod_f))
21950 emit_label (XEXP (label2, 0));
21951
21952 if (shift)
21953 rs6000_finish_atomic_subword (operands[1], retval, shift);
21954 else if (mode != GET_MODE (operands[1]))
21955 convert_move (operands[1], retval, 1);
21956
21957 /* In all cases, CR0 contains EQ on success, and NE on failure. */
21958 x = gen_rtx_EQ (SImode, cond, const0_rtx);
21959 emit_insn (gen_rtx_SET (boolval, x));
21960 }
21961
21962 /* Expand an atomic exchange operation. */
21963
21964 void
21965 rs6000_expand_atomic_exchange (rtx operands[])
21966 {
21967 rtx retval, mem, val, cond;
21968 machine_mode mode;
21969 enum memmodel model;
21970 rtx label, x, mask, shift;
21971
21972 retval = operands[0];
21973 mem = operands[1];
21974 val = operands[2];
21975 model = memmodel_base (INTVAL (operands[3]));
21976 mode = GET_MODE (mem);
21977
21978 mask = shift = NULL_RTX;
21979 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
21980 {
21981 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
21982
21983 /* Shift and mask VAL into position with the word. */
21984 val = convert_modes (SImode, mode, val, 1);
21985 val = expand_simple_binop (SImode, ASHIFT, val, shift,
21986 NULL_RTX, 1, OPTAB_LIB_WIDEN);
21987
21988 /* Prepare to adjust the return value. */
21989 retval = gen_reg_rtx (SImode);
21990 mode = SImode;
21991 }
21992
21993 mem = rs6000_pre_atomic_barrier (mem, model);
21994
21995 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
21996 emit_label (XEXP (label, 0));
21997
21998 emit_load_locked (mode, retval, mem);
21999
22000 x = val;
22001 if (mask)
22002 x = rs6000_mask_atomic_subword (retval, val, mask);
22003
22004 cond = gen_reg_rtx (CCmode);
22005 emit_store_conditional (mode, cond, mem, x);
22006
22007 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
22008 emit_unlikely_jump (x, label);
22009
22010 rs6000_post_atomic_barrier (model);
22011
22012 if (shift)
22013 rs6000_finish_atomic_subword (operands[0], retval, shift);
22014 }
22015
22016 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
22017 to perform. MEM is the memory on which to operate. VAL is the second
22018 operand of the binary operator. BEFORE and AFTER are optional locations to
22019 return the value of MEM either before of after the operation. MODEL_RTX
22020 is a CONST_INT containing the memory model to use. */
22021
22022 void
22023 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
22024 rtx orig_before, rtx orig_after, rtx model_rtx)
22025 {
22026 enum memmodel model = memmodel_base (INTVAL (model_rtx));
22027 machine_mode mode = GET_MODE (mem);
22028 machine_mode store_mode = mode;
22029 rtx label, x, cond, mask, shift;
22030 rtx before = orig_before, after = orig_after;
22031
22032 mask = shift = NULL_RTX;
22033 /* On power8, we want to use SImode for the operation. On previous systems,
22034 use the operation in a subword and shift/mask to get the proper byte or
22035 halfword. */
22036 if (mode == QImode || mode == HImode)
22037 {
22038 if (TARGET_SYNC_HI_QI)
22039 {
22040 val = convert_modes (SImode, mode, val, 1);
22041
22042 /* Prepare to adjust the return value. */
22043 before = gen_reg_rtx (SImode);
22044 if (after)
22045 after = gen_reg_rtx (SImode);
22046 mode = SImode;
22047 }
22048 else
22049 {
22050 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
22051
22052 /* Shift and mask VAL into position with the word. */
22053 val = convert_modes (SImode, mode, val, 1);
22054 val = expand_simple_binop (SImode, ASHIFT, val, shift,
22055 NULL_RTX, 1, OPTAB_LIB_WIDEN);
22056
22057 switch (code)
22058 {
22059 case IOR:
22060 case XOR:
22061 /* We've already zero-extended VAL. That is sufficient to
22062 make certain that it does not affect other bits. */
22063 mask = NULL;
22064 break;
22065
22066 case AND:
22067 /* If we make certain that all of the other bits in VAL are
22068 set, that will be sufficient to not affect other bits. */
22069 x = gen_rtx_NOT (SImode, mask);
22070 x = gen_rtx_IOR (SImode, x, val);
22071 emit_insn (gen_rtx_SET (val, x));
22072 mask = NULL;
22073 break;
22074
22075 case NOT:
22076 case PLUS:
22077 case MINUS:
22078 /* These will all affect bits outside the field and need
22079 adjustment via MASK within the loop. */
22080 break;
22081
22082 default:
22083 gcc_unreachable ();
22084 }
22085
22086 /* Prepare to adjust the return value. */
22087 before = gen_reg_rtx (SImode);
22088 if (after)
22089 after = gen_reg_rtx (SImode);
22090 store_mode = mode = SImode;
22091 }
22092 }
22093
22094 mem = rs6000_pre_atomic_barrier (mem, model);
22095
22096 label = gen_label_rtx ();
22097 emit_label (label);
22098 label = gen_rtx_LABEL_REF (VOIDmode, label);
22099
22100 if (before == NULL_RTX)
22101 before = gen_reg_rtx (mode);
22102
22103 emit_load_locked (mode, before, mem);
22104
22105 if (code == NOT)
22106 {
22107 x = expand_simple_binop (mode, AND, before, val,
22108 NULL_RTX, 1, OPTAB_LIB_WIDEN);
22109 after = expand_simple_unop (mode, NOT, x, after, 1);
22110 }
22111 else
22112 {
22113 after = expand_simple_binop (mode, code, before, val,
22114 after, 1, OPTAB_LIB_WIDEN);
22115 }
22116
22117 x = after;
22118 if (mask)
22119 {
22120 x = expand_simple_binop (SImode, AND, after, mask,
22121 NULL_RTX, 1, OPTAB_LIB_WIDEN);
22122 x = rs6000_mask_atomic_subword (before, x, mask);
22123 }
22124 else if (store_mode != mode)
22125 x = convert_modes (store_mode, mode, x, 1);
22126
22127 cond = gen_reg_rtx (CCmode);
22128 emit_store_conditional (store_mode, cond, mem, x);
22129
22130 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
22131 emit_unlikely_jump (x, label);
22132
22133 rs6000_post_atomic_barrier (model);
22134
22135 if (shift)
22136 {
22137 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
22138 then do the calcuations in a SImode register. */
22139 if (orig_before)
22140 rs6000_finish_atomic_subword (orig_before, before, shift);
22141 if (orig_after)
22142 rs6000_finish_atomic_subword (orig_after, after, shift);
22143 }
22144 else if (store_mode != mode)
22145 {
22146 /* QImode/HImode on machines with lbarx/lharx where we do the native
22147 operation and then do the calcuations in a SImode register. */
22148 if (orig_before)
22149 convert_move (orig_before, before, 1);
22150 if (orig_after)
22151 convert_move (orig_after, after, 1);
22152 }
22153 else if (orig_after && after != orig_after)
22154 emit_move_insn (orig_after, after);
22155 }
22156
22157 /* Emit instructions to move SRC to DST. Called by splitters for
22158 multi-register moves. It will emit at most one instruction for
22159 each register that is accessed; that is, it won't emit li/lis pairs
22160 (or equivalent for 64-bit code). One of SRC or DST must be a hard
22161 register. */
22162
22163 void
22164 rs6000_split_multireg_move (rtx dst, rtx src)
22165 {
22166 /* The register number of the first register being moved. */
22167 int reg;
22168 /* The mode that is to be moved. */
22169 machine_mode mode;
22170 /* The mode that the move is being done in, and its size. */
22171 machine_mode reg_mode;
22172 int reg_mode_size;
22173 /* The number of registers that will be moved. */
22174 int nregs;
22175
22176 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
22177 mode = GET_MODE (dst);
22178 nregs = hard_regno_nregs[reg][mode];
22179 if (FP_REGNO_P (reg))
22180 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
22181 ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? DFmode : SFmode);
22182 else if (ALTIVEC_REGNO_P (reg))
22183 reg_mode = V16QImode;
22184 else if (TARGET_E500_DOUBLE && FLOAT128_2REG_P (mode))
22185 reg_mode = DFmode;
22186 else
22187 reg_mode = word_mode;
22188 reg_mode_size = GET_MODE_SIZE (reg_mode);
22189
22190 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
22191
22192 /* TDmode residing in FP registers is special, since the ISA requires that
22193 the lower-numbered word of a register pair is always the most significant
22194 word, even in little-endian mode. This does not match the usual subreg
22195 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
22196 the appropriate constituent registers "by hand" in little-endian mode.
22197
22198 Note we do not need to check for destructive overlap here since TDmode
22199 can only reside in even/odd register pairs. */
22200 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
22201 {
22202 rtx p_src, p_dst;
22203 int i;
22204
22205 for (i = 0; i < nregs; i++)
22206 {
22207 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
22208 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
22209 else
22210 p_src = simplify_gen_subreg (reg_mode, src, mode,
22211 i * reg_mode_size);
22212
22213 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
22214 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
22215 else
22216 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
22217 i * reg_mode_size);
22218
22219 emit_insn (gen_rtx_SET (p_dst, p_src));
22220 }
22221
22222 return;
22223 }
22224
22225 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
22226 {
22227 /* Move register range backwards, if we might have destructive
22228 overlap. */
22229 int i;
22230 for (i = nregs - 1; i >= 0; i--)
22231 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
22232 i * reg_mode_size),
22233 simplify_gen_subreg (reg_mode, src, mode,
22234 i * reg_mode_size)));
22235 }
22236 else
22237 {
22238 int i;
22239 int j = -1;
22240 bool used_update = false;
22241 rtx restore_basereg = NULL_RTX;
22242
22243 if (MEM_P (src) && INT_REGNO_P (reg))
22244 {
22245 rtx breg;
22246
22247 if (GET_CODE (XEXP (src, 0)) == PRE_INC
22248 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
22249 {
22250 rtx delta_rtx;
22251 breg = XEXP (XEXP (src, 0), 0);
22252 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
22253 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
22254 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
22255 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
22256 src = replace_equiv_address (src, breg);
22257 }
22258 else if (! rs6000_offsettable_memref_p (src, reg_mode))
22259 {
22260 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
22261 {
22262 rtx basereg = XEXP (XEXP (src, 0), 0);
22263 if (TARGET_UPDATE)
22264 {
22265 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
22266 emit_insn (gen_rtx_SET (ndst,
22267 gen_rtx_MEM (reg_mode,
22268 XEXP (src, 0))));
22269 used_update = true;
22270 }
22271 else
22272 emit_insn (gen_rtx_SET (basereg,
22273 XEXP (XEXP (src, 0), 1)));
22274 src = replace_equiv_address (src, basereg);
22275 }
22276 else
22277 {
22278 rtx basereg = gen_rtx_REG (Pmode, reg);
22279 emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
22280 src = replace_equiv_address (src, basereg);
22281 }
22282 }
22283
22284 breg = XEXP (src, 0);
22285 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
22286 breg = XEXP (breg, 0);
22287
22288 /* If the base register we are using to address memory is
22289 also a destination reg, then change that register last. */
22290 if (REG_P (breg)
22291 && REGNO (breg) >= REGNO (dst)
22292 && REGNO (breg) < REGNO (dst) + nregs)
22293 j = REGNO (breg) - REGNO (dst);
22294 }
22295 else if (MEM_P (dst) && INT_REGNO_P (reg))
22296 {
22297 rtx breg;
22298
22299 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
22300 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
22301 {
22302 rtx delta_rtx;
22303 breg = XEXP (XEXP (dst, 0), 0);
22304 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
22305 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
22306 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
22307
22308 /* We have to update the breg before doing the store.
22309 Use store with update, if available. */
22310
22311 if (TARGET_UPDATE)
22312 {
22313 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
22314 emit_insn (TARGET_32BIT
22315 ? (TARGET_POWERPC64
22316 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
22317 : gen_movsi_update (breg, breg, delta_rtx, nsrc))
22318 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
22319 used_update = true;
22320 }
22321 else
22322 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
22323 dst = replace_equiv_address (dst, breg);
22324 }
22325 else if (!rs6000_offsettable_memref_p (dst, reg_mode)
22326 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
22327 {
22328 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
22329 {
22330 rtx basereg = XEXP (XEXP (dst, 0), 0);
22331 if (TARGET_UPDATE)
22332 {
22333 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
22334 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
22335 XEXP (dst, 0)),
22336 nsrc));
22337 used_update = true;
22338 }
22339 else
22340 emit_insn (gen_rtx_SET (basereg,
22341 XEXP (XEXP (dst, 0), 1)));
22342 dst = replace_equiv_address (dst, basereg);
22343 }
22344 else
22345 {
22346 rtx basereg = XEXP (XEXP (dst, 0), 0);
22347 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
22348 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
22349 && REG_P (basereg)
22350 && REG_P (offsetreg)
22351 && REGNO (basereg) != REGNO (offsetreg));
22352 if (REGNO (basereg) == 0)
22353 {
22354 rtx tmp = offsetreg;
22355 offsetreg = basereg;
22356 basereg = tmp;
22357 }
22358 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
22359 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
22360 dst = replace_equiv_address (dst, basereg);
22361 }
22362 }
22363 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
22364 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode));
22365 }
22366
22367 for (i = 0; i < nregs; i++)
22368 {
22369 /* Calculate index to next subword. */
22370 ++j;
22371 if (j == nregs)
22372 j = 0;
22373
22374 /* If compiler already emitted move of first word by
22375 store with update, no need to do anything. */
22376 if (j == 0 && used_update)
22377 continue;
22378
22379 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
22380 j * reg_mode_size),
22381 simplify_gen_subreg (reg_mode, src, mode,
22382 j * reg_mode_size)));
22383 }
22384 if (restore_basereg != NULL_RTX)
22385 emit_insn (restore_basereg);
22386 }
22387 }
22388
22389 \f
22390 /* This page contains routines that are used to determine what the
22391 function prologue and epilogue code will do and write them out. */
22392
22393 static inline bool
22394 save_reg_p (int r)
22395 {
22396 return !call_used_regs[r] && df_regs_ever_live_p (r);
22397 }
22398
22399 /* Determine whether the gp REG is really used. */
22400
22401 static bool
22402 rs6000_reg_live_or_pic_offset_p (int reg)
22403 {
22404 /* We need to mark the PIC offset register live for the same conditions
22405 as it is set up, or otherwise it won't be saved before we clobber it. */
22406
22407 if (reg == RS6000_PIC_OFFSET_TABLE_REGNUM && !TARGET_SINGLE_PIC_BASE)
22408 {
22409 if (TARGET_TOC && TARGET_MINIMAL_TOC
22410 && (crtl->calls_eh_return
22411 || df_regs_ever_live_p (reg)
22412 || get_pool_size ()))
22413 return true;
22414
22415 if ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
22416 && flag_pic)
22417 return true;
22418 }
22419
22420 /* If the function calls eh_return, claim used all the registers that would
22421 be checked for liveness otherwise. */
22422
22423 return ((crtl->calls_eh_return || df_regs_ever_live_p (reg))
22424 && !call_used_regs[reg]);
22425 }
22426
22427 /* Return the first fixed-point register that is required to be
22428 saved. 32 if none. */
22429
22430 int
22431 first_reg_to_save (void)
22432 {
22433 int first_reg;
22434
22435 /* Find lowest numbered live register. */
22436 for (first_reg = 13; first_reg <= 31; first_reg++)
22437 if (save_reg_p (first_reg))
22438 break;
22439
22440 if (first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM
22441 && ((DEFAULT_ABI == ABI_V4 && flag_pic != 0)
22442 || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
22443 || (TARGET_TOC && TARGET_MINIMAL_TOC))
22444 && rs6000_reg_live_or_pic_offset_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
22445 first_reg = RS6000_PIC_OFFSET_TABLE_REGNUM;
22446
22447 #if TARGET_MACHO
22448 if (flag_pic
22449 && crtl->uses_pic_offset_table
22450 && first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM)
22451 return RS6000_PIC_OFFSET_TABLE_REGNUM;
22452 #endif
22453
22454 return first_reg;
22455 }
22456
22457 /* Similar, for FP regs. */
22458
22459 int
22460 first_fp_reg_to_save (void)
22461 {
22462 int first_reg;
22463
22464 /* Find lowest numbered live register. */
22465 for (first_reg = 14 + 32; first_reg <= 63; first_reg++)
22466 if (save_reg_p (first_reg))
22467 break;
22468
22469 return first_reg;
22470 }
22471
22472 /* Similar, for AltiVec regs. */
22473
22474 static int
22475 first_altivec_reg_to_save (void)
22476 {
22477 int i;
22478
22479 /* Stack frame remains as is unless we are in AltiVec ABI. */
22480 if (! TARGET_ALTIVEC_ABI)
22481 return LAST_ALTIVEC_REGNO + 1;
22482
22483 /* On Darwin, the unwind routines are compiled without
22484 TARGET_ALTIVEC, and use save_world to save/restore the
22485 altivec registers when necessary. */
22486 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
22487 && ! TARGET_ALTIVEC)
22488 return FIRST_ALTIVEC_REGNO + 20;
22489
22490 /* Find lowest numbered live register. */
22491 for (i = FIRST_ALTIVEC_REGNO + 20; i <= LAST_ALTIVEC_REGNO; ++i)
22492 if (save_reg_p (i))
22493 break;
22494
22495 return i;
22496 }
22497
22498 /* Return a 32-bit mask of the AltiVec registers we need to set in
22499 VRSAVE. Bit n of the return value is 1 if Vn is live. The MSB in
22500 the 32-bit word is 0. */
22501
22502 static unsigned int
22503 compute_vrsave_mask (void)
22504 {
22505 unsigned int i, mask = 0;
22506
22507 /* On Darwin, the unwind routines are compiled without
22508 TARGET_ALTIVEC, and use save_world to save/restore the
22509 call-saved altivec registers when necessary. */
22510 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
22511 && ! TARGET_ALTIVEC)
22512 mask |= 0xFFF;
22513
22514 /* First, find out if we use _any_ altivec registers. */
22515 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
22516 if (df_regs_ever_live_p (i))
22517 mask |= ALTIVEC_REG_BIT (i);
22518
22519 if (mask == 0)
22520 return mask;
22521
22522 /* Next, remove the argument registers from the set. These must
22523 be in the VRSAVE mask set by the caller, so we don't need to add
22524 them in again. More importantly, the mask we compute here is
22525 used to generate CLOBBERs in the set_vrsave insn, and we do not
22526 wish the argument registers to die. */
22527 for (i = ALTIVEC_ARG_MIN_REG; i < (unsigned) crtl->args.info.vregno; i++)
22528 mask &= ~ALTIVEC_REG_BIT (i);
22529
22530 /* Similarly, remove the return value from the set. */
22531 {
22532 bool yes = false;
22533 diddle_return_value (is_altivec_return_reg, &yes);
22534 if (yes)
22535 mask &= ~ALTIVEC_REG_BIT (ALTIVEC_ARG_RETURN);
22536 }
22537
22538 return mask;
22539 }
22540
22541 /* For a very restricted set of circumstances, we can cut down the
22542 size of prologues/epilogues by calling our own save/restore-the-world
22543 routines. */
22544
22545 static void
22546 compute_save_world_info (rs6000_stack_t *info_ptr)
22547 {
22548 info_ptr->world_save_p = 1;
22549 info_ptr->world_save_p
22550 = (WORLD_SAVE_P (info_ptr)
22551 && DEFAULT_ABI == ABI_DARWIN
22552 && !cfun->has_nonlocal_label
22553 && info_ptr->first_fp_reg_save == FIRST_SAVED_FP_REGNO
22554 && info_ptr->first_gp_reg_save == FIRST_SAVED_GP_REGNO
22555 && info_ptr->first_altivec_reg_save == FIRST_SAVED_ALTIVEC_REGNO
22556 && info_ptr->cr_save_p);
22557
22558 /* This will not work in conjunction with sibcalls. Make sure there
22559 are none. (This check is expensive, but seldom executed.) */
22560 if (WORLD_SAVE_P (info_ptr))
22561 {
22562 rtx_insn *insn;
22563 for (insn = get_last_insn_anywhere (); insn; insn = PREV_INSN (insn))
22564 if (CALL_P (insn) && SIBLING_CALL_P (insn))
22565 {
22566 info_ptr->world_save_p = 0;
22567 break;
22568 }
22569 }
22570
22571 if (WORLD_SAVE_P (info_ptr))
22572 {
22573 /* Even if we're not touching VRsave, make sure there's room on the
22574 stack for it, if it looks like we're calling SAVE_WORLD, which
22575 will attempt to save it. */
22576 info_ptr->vrsave_size = 4;
22577
22578 /* If we are going to save the world, we need to save the link register too. */
22579 info_ptr->lr_save_p = 1;
22580
22581 /* "Save" the VRsave register too if we're saving the world. */
22582 if (info_ptr->vrsave_mask == 0)
22583 info_ptr->vrsave_mask = compute_vrsave_mask ();
22584
22585 /* Because the Darwin register save/restore routines only handle
22586 F14 .. F31 and V20 .. V31 as per the ABI, perform a consistency
22587 check. */
22588 gcc_assert (info_ptr->first_fp_reg_save >= FIRST_SAVED_FP_REGNO
22589 && (info_ptr->first_altivec_reg_save
22590 >= FIRST_SAVED_ALTIVEC_REGNO));
22591 }
22592 return;
22593 }
22594
22595
22596 static void
22597 is_altivec_return_reg (rtx reg, void *xyes)
22598 {
22599 bool *yes = (bool *) xyes;
22600 if (REGNO (reg) == ALTIVEC_ARG_RETURN)
22601 *yes = true;
22602 }
22603
22604 \f
22605 /* Look for user-defined global regs in the range FIRST to LAST-1.
22606 We should not restore these, and so cannot use lmw or out-of-line
22607 restore functions if there are any. We also can't save them
22608 (well, emit frame notes for them), because frame unwinding during
22609 exception handling will restore saved registers. */
22610
22611 static bool
22612 global_regs_p (unsigned first, unsigned last)
22613 {
22614 while (first < last)
22615 if (global_regs[first++])
22616 return true;
22617 return false;
22618 }
22619
22620 /* Determine the strategy for savings/restoring registers. */
22621
22622 enum {
22623 SAVRES_MULTIPLE = 0x1,
22624 SAVE_INLINE_FPRS = 0x2,
22625 SAVE_INLINE_GPRS = 0x4,
22626 REST_INLINE_FPRS = 0x8,
22627 REST_INLINE_GPRS = 0x10,
22628 SAVE_NOINLINE_GPRS_SAVES_LR = 0x20,
22629 SAVE_NOINLINE_FPRS_SAVES_LR = 0x40,
22630 REST_NOINLINE_FPRS_DOESNT_RESTORE_LR = 0x80,
22631 SAVE_INLINE_VRS = 0x100,
22632 REST_INLINE_VRS = 0x200
22633 };
22634
22635 static int
22636 rs6000_savres_strategy (rs6000_stack_t *info,
22637 bool using_static_chain_p)
22638 {
22639 int strategy = 0;
22640 bool lr_save_p;
22641
22642 if (TARGET_MULTIPLE
22643 && !TARGET_POWERPC64
22644 && !(TARGET_SPE_ABI && info->spe_64bit_regs_used)
22645 && info->first_gp_reg_save < 31
22646 && !global_regs_p (info->first_gp_reg_save, 32))
22647 strategy |= SAVRES_MULTIPLE;
22648
22649 if (crtl->calls_eh_return
22650 || cfun->machine->ra_need_lr)
22651 strategy |= (SAVE_INLINE_FPRS | REST_INLINE_FPRS
22652 | SAVE_INLINE_GPRS | REST_INLINE_GPRS
22653 | SAVE_INLINE_VRS | REST_INLINE_VRS);
22654
22655 if (info->first_fp_reg_save == 64
22656 /* The out-of-line FP routines use double-precision stores;
22657 we can't use those routines if we don't have such stores. */
22658 || (TARGET_HARD_FLOAT && !TARGET_DOUBLE_FLOAT)
22659 || global_regs_p (info->first_fp_reg_save, 64))
22660 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
22661
22662 if (info->first_gp_reg_save == 32
22663 || (!(strategy & SAVRES_MULTIPLE)
22664 && global_regs_p (info->first_gp_reg_save, 32)))
22665 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
22666
22667 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
22668 || global_regs_p (info->first_altivec_reg_save, LAST_ALTIVEC_REGNO + 1))
22669 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
22670
22671 /* Define cutoff for using out-of-line functions to save registers. */
22672 if (DEFAULT_ABI == ABI_V4 || TARGET_ELF)
22673 {
22674 if (!optimize_size)
22675 {
22676 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
22677 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
22678 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
22679 }
22680 else
22681 {
22682 /* Prefer out-of-line restore if it will exit. */
22683 if (info->first_fp_reg_save > 61)
22684 strategy |= SAVE_INLINE_FPRS;
22685 if (info->first_gp_reg_save > 29)
22686 {
22687 if (info->first_fp_reg_save == 64)
22688 strategy |= SAVE_INLINE_GPRS;
22689 else
22690 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
22691 }
22692 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO)
22693 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
22694 }
22695 }
22696 else if (DEFAULT_ABI == ABI_DARWIN)
22697 {
22698 if (info->first_fp_reg_save > 60)
22699 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
22700 if (info->first_gp_reg_save > 29)
22701 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
22702 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
22703 }
22704 else
22705 {
22706 gcc_checking_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
22707 if (info->first_fp_reg_save > 61)
22708 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
22709 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
22710 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
22711 }
22712
22713 /* Don't bother to try to save things out-of-line if r11 is occupied
22714 by the static chain. It would require too much fiddling and the
22715 static chain is rarely used anyway. FPRs are saved w.r.t the stack
22716 pointer on Darwin, and AIX uses r1 or r12. */
22717 if (using_static_chain_p
22718 && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
22719 strategy |= ((DEFAULT_ABI == ABI_DARWIN ? 0 : SAVE_INLINE_FPRS)
22720 | SAVE_INLINE_GPRS
22721 | SAVE_INLINE_VRS | REST_INLINE_VRS);
22722
22723 /* We can only use the out-of-line routines to restore if we've
22724 saved all the registers from first_fp_reg_save in the prologue.
22725 Otherwise, we risk loading garbage. */
22726 if ((strategy & (SAVE_INLINE_FPRS | REST_INLINE_FPRS)) == SAVE_INLINE_FPRS)
22727 {
22728 int i;
22729
22730 for (i = info->first_fp_reg_save; i < 64; i++)
22731 if (!save_reg_p (i))
22732 {
22733 strategy |= REST_INLINE_FPRS;
22734 break;
22735 }
22736 }
22737
22738 /* If we are going to use store multiple, then don't even bother
22739 with the out-of-line routines, since the store-multiple
22740 instruction will always be smaller. */
22741 if ((strategy & SAVRES_MULTIPLE))
22742 strategy |= SAVE_INLINE_GPRS;
22743
22744 /* info->lr_save_p isn't yet set if the only reason lr needs to be
22745 saved is an out-of-line save or restore. Set up the value for
22746 the next test (excluding out-of-line gpr restore). */
22747 lr_save_p = (info->lr_save_p
22748 || !(strategy & SAVE_INLINE_GPRS)
22749 || !(strategy & SAVE_INLINE_FPRS)
22750 || !(strategy & SAVE_INLINE_VRS)
22751 || !(strategy & REST_INLINE_FPRS)
22752 || !(strategy & REST_INLINE_VRS));
22753
22754 /* The situation is more complicated with load multiple. We'd
22755 prefer to use the out-of-line routines for restores, since the
22756 "exit" out-of-line routines can handle the restore of LR and the
22757 frame teardown. However if doesn't make sense to use the
22758 out-of-line routine if that is the only reason we'd need to save
22759 LR, and we can't use the "exit" out-of-line gpr restore if we
22760 have saved some fprs; In those cases it is advantageous to use
22761 load multiple when available. */
22762 if ((strategy & SAVRES_MULTIPLE)
22763 && (!lr_save_p
22764 || info->first_fp_reg_save != 64))
22765 strategy |= REST_INLINE_GPRS;
22766
22767 /* Saving CR interferes with the exit routines used on the SPE, so
22768 just punt here. */
22769 if (TARGET_SPE_ABI
22770 && info->spe_64bit_regs_used
22771 && info->cr_save_p)
22772 strategy |= REST_INLINE_GPRS;
22773
22774 /* We can only use load multiple or the out-of-line routines to
22775 restore if we've used store multiple or out-of-line routines
22776 in the prologue, i.e. if we've saved all the registers from
22777 first_gp_reg_save. Otherwise, we risk loading garbage. */
22778 if ((strategy & (SAVE_INLINE_GPRS | REST_INLINE_GPRS | SAVRES_MULTIPLE))
22779 == SAVE_INLINE_GPRS)
22780 {
22781 int i;
22782
22783 for (i = info->first_gp_reg_save; i < 32; i++)
22784 if (!save_reg_p (i))
22785 {
22786 strategy |= REST_INLINE_GPRS;
22787 break;
22788 }
22789 }
22790
22791 if (TARGET_ELF && TARGET_64BIT)
22792 {
22793 if (!(strategy & SAVE_INLINE_FPRS))
22794 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
22795 else if (!(strategy & SAVE_INLINE_GPRS)
22796 && info->first_fp_reg_save == 64)
22797 strategy |= SAVE_NOINLINE_GPRS_SAVES_LR;
22798 }
22799 else if (TARGET_AIX && !(strategy & REST_INLINE_FPRS))
22800 strategy |= REST_NOINLINE_FPRS_DOESNT_RESTORE_LR;
22801
22802 if (TARGET_MACHO && !(strategy & SAVE_INLINE_FPRS))
22803 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
22804
22805 return strategy;
22806 }
22807
22808 /* Calculate the stack information for the current function. This is
22809 complicated by having two separate calling sequences, the AIX calling
22810 sequence and the V.4 calling sequence.
22811
22812 AIX (and Darwin/Mac OS X) stack frames look like:
22813 32-bit 64-bit
22814 SP----> +---------------------------------------+
22815 | back chain to caller | 0 0
22816 +---------------------------------------+
22817 | saved CR | 4 8 (8-11)
22818 +---------------------------------------+
22819 | saved LR | 8 16
22820 +---------------------------------------+
22821 | reserved for compilers | 12 24
22822 +---------------------------------------+
22823 | reserved for binders | 16 32
22824 +---------------------------------------+
22825 | saved TOC pointer | 20 40
22826 +---------------------------------------+
22827 | Parameter save area (P) | 24 48
22828 +---------------------------------------+
22829 | Alloca space (A) | 24+P etc.
22830 +---------------------------------------+
22831 | Local variable space (L) | 24+P+A
22832 +---------------------------------------+
22833 | Float/int conversion temporary (X) | 24+P+A+L
22834 +---------------------------------------+
22835 | Save area for AltiVec registers (W) | 24+P+A+L+X
22836 +---------------------------------------+
22837 | AltiVec alignment padding (Y) | 24+P+A+L+X+W
22838 +---------------------------------------+
22839 | Save area for VRSAVE register (Z) | 24+P+A+L+X+W+Y
22840 +---------------------------------------+
22841 | Save area for GP registers (G) | 24+P+A+X+L+X+W+Y+Z
22842 +---------------------------------------+
22843 | Save area for FP registers (F) | 24+P+A+X+L+X+W+Y+Z+G
22844 +---------------------------------------+
22845 old SP->| back chain to caller's caller |
22846 +---------------------------------------+
22847
22848 The required alignment for AIX configurations is two words (i.e., 8
22849 or 16 bytes).
22850
22851 The ELFv2 ABI is a variant of the AIX ABI. Stack frames look like:
22852
22853 SP----> +---------------------------------------+
22854 | Back chain to caller | 0
22855 +---------------------------------------+
22856 | Save area for CR | 8
22857 +---------------------------------------+
22858 | Saved LR | 16
22859 +---------------------------------------+
22860 | Saved TOC pointer | 24
22861 +---------------------------------------+
22862 | Parameter save area (P) | 32
22863 +---------------------------------------+
22864 | Alloca space (A) | 32+P
22865 +---------------------------------------+
22866 | Local variable space (L) | 32+P+A
22867 +---------------------------------------+
22868 | Save area for AltiVec registers (W) | 32+P+A+L
22869 +---------------------------------------+
22870 | AltiVec alignment padding (Y) | 32+P+A+L+W
22871 +---------------------------------------+
22872 | Save area for GP registers (G) | 32+P+A+L+W+Y
22873 +---------------------------------------+
22874 | Save area for FP registers (F) | 32+P+A+L+W+Y+G
22875 +---------------------------------------+
22876 old SP->| back chain to caller's caller | 32+P+A+L+W+Y+G+F
22877 +---------------------------------------+
22878
22879
22880 V.4 stack frames look like:
22881
22882 SP----> +---------------------------------------+
22883 | back chain to caller | 0
22884 +---------------------------------------+
22885 | caller's saved LR | 4
22886 +---------------------------------------+
22887 | Parameter save area (P) | 8
22888 +---------------------------------------+
22889 | Alloca space (A) | 8+P
22890 +---------------------------------------+
22891 | Varargs save area (V) | 8+P+A
22892 +---------------------------------------+
22893 | Local variable space (L) | 8+P+A+V
22894 +---------------------------------------+
22895 | Float/int conversion temporary (X) | 8+P+A+V+L
22896 +---------------------------------------+
22897 | Save area for AltiVec registers (W) | 8+P+A+V+L+X
22898 +---------------------------------------+
22899 | AltiVec alignment padding (Y) | 8+P+A+V+L+X+W
22900 +---------------------------------------+
22901 | Save area for VRSAVE register (Z) | 8+P+A+V+L+X+W+Y
22902 +---------------------------------------+
22903 | SPE: area for 64-bit GP registers |
22904 +---------------------------------------+
22905 | SPE alignment padding |
22906 +---------------------------------------+
22907 | saved CR (C) | 8+P+A+V+L+X+W+Y+Z
22908 +---------------------------------------+
22909 | Save area for GP registers (G) | 8+P+A+V+L+X+W+Y+Z+C
22910 +---------------------------------------+
22911 | Save area for FP registers (F) | 8+P+A+V+L+X+W+Y+Z+C+G
22912 +---------------------------------------+
22913 old SP->| back chain to caller's caller |
22914 +---------------------------------------+
22915
22916 The required alignment for V.4 is 16 bytes, or 8 bytes if -meabi is
22917 given. (But note below and in sysv4.h that we require only 8 and
22918 may round up the size of our stack frame anyways. The historical
22919 reason is early versions of powerpc-linux which didn't properly
22920 align the stack at program startup. A happy side-effect is that
22921 -mno-eabi libraries can be used with -meabi programs.)
22922
22923 The EABI configuration defaults to the V.4 layout. However,
22924 the stack alignment requirements may differ. If -mno-eabi is not
22925 given, the required stack alignment is 8 bytes; if -mno-eabi is
22926 given, the required alignment is 16 bytes. (But see V.4 comment
22927 above.) */
22928
22929 #ifndef ABI_STACK_BOUNDARY
22930 #define ABI_STACK_BOUNDARY STACK_BOUNDARY
22931 #endif
22932
22933 static rs6000_stack_t *
22934 rs6000_stack_info (void)
22935 {
22936 /* We should never be called for thunks, we are not set up for that. */
22937 gcc_assert (!cfun->is_thunk);
22938
22939 rs6000_stack_t *info_ptr = &stack_info;
22940 int reg_size = TARGET_32BIT ? 4 : 8;
22941 int ehrd_size;
22942 int ehcr_size;
22943 int save_align;
22944 int first_gp;
22945 HOST_WIDE_INT non_fixed_size;
22946 bool using_static_chain_p;
22947
22948 if (reload_completed && info_ptr->reload_completed)
22949 return info_ptr;
22950
22951 memset (info_ptr, 0, sizeof (*info_ptr));
22952 info_ptr->reload_completed = reload_completed;
22953
22954 if (TARGET_SPE)
22955 {
22956 /* Cache value so we don't rescan instruction chain over and over. */
22957 if (cfun->machine->insn_chain_scanned_p == 0)
22958 cfun->machine->insn_chain_scanned_p
22959 = spe_func_has_64bit_regs_p () + 1;
22960 info_ptr->spe_64bit_regs_used = cfun->machine->insn_chain_scanned_p - 1;
22961 }
22962
22963 /* Select which calling sequence. */
22964 info_ptr->abi = DEFAULT_ABI;
22965
22966 /* Calculate which registers need to be saved & save area size. */
22967 info_ptr->first_gp_reg_save = first_reg_to_save ();
22968 /* Assume that we will have to save RS6000_PIC_OFFSET_TABLE_REGNUM,
22969 even if it currently looks like we won't. Reload may need it to
22970 get at a constant; if so, it will have already created a constant
22971 pool entry for it. */
22972 if (((TARGET_TOC && TARGET_MINIMAL_TOC)
22973 || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
22974 || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
22975 && crtl->uses_const_pool
22976 && info_ptr->first_gp_reg_save > RS6000_PIC_OFFSET_TABLE_REGNUM)
22977 first_gp = RS6000_PIC_OFFSET_TABLE_REGNUM;
22978 else
22979 first_gp = info_ptr->first_gp_reg_save;
22980
22981 info_ptr->gp_size = reg_size * (32 - first_gp);
22982
22983 /* For the SPE, we have an additional upper 32-bits on each GPR.
22984 Ideally we should save the entire 64-bits only when the upper
22985 half is used in SIMD instructions. Since we only record
22986 registers live (not the size they are used in), this proves
22987 difficult because we'd have to traverse the instruction chain at
22988 the right time, taking reload into account. This is a real pain,
22989 so we opt to save the GPRs in 64-bits always if but one register
22990 gets used in 64-bits. Otherwise, all the registers in the frame
22991 get saved in 32-bits.
22992
22993 So... since when we save all GPRs (except the SP) in 64-bits, the
22994 traditional GP save area will be empty. */
22995 if (TARGET_SPE_ABI && info_ptr->spe_64bit_regs_used != 0)
22996 info_ptr->gp_size = 0;
22997
22998 info_ptr->first_fp_reg_save = first_fp_reg_to_save ();
22999 info_ptr->fp_size = 8 * (64 - info_ptr->first_fp_reg_save);
23000
23001 info_ptr->first_altivec_reg_save = first_altivec_reg_to_save ();
23002 info_ptr->altivec_size = 16 * (LAST_ALTIVEC_REGNO + 1
23003 - info_ptr->first_altivec_reg_save);
23004
23005 /* Does this function call anything? */
23006 info_ptr->calls_p = (! crtl->is_leaf
23007 || cfun->machine->ra_needs_full_frame);
23008
23009 /* Determine if we need to save the condition code registers. */
23010 if (df_regs_ever_live_p (CR2_REGNO)
23011 || df_regs_ever_live_p (CR3_REGNO)
23012 || df_regs_ever_live_p (CR4_REGNO))
23013 {
23014 info_ptr->cr_save_p = 1;
23015 if (DEFAULT_ABI == ABI_V4)
23016 info_ptr->cr_size = reg_size;
23017 }
23018
23019 /* If the current function calls __builtin_eh_return, then we need
23020 to allocate stack space for registers that will hold data for
23021 the exception handler. */
23022 if (crtl->calls_eh_return)
23023 {
23024 unsigned int i;
23025 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
23026 continue;
23027
23028 /* SPE saves EH registers in 64-bits. */
23029 ehrd_size = i * (TARGET_SPE_ABI
23030 && info_ptr->spe_64bit_regs_used != 0
23031 ? UNITS_PER_SPE_WORD : UNITS_PER_WORD);
23032 }
23033 else
23034 ehrd_size = 0;
23035
23036 /* In the ELFv2 ABI, we also need to allocate space for separate
23037 CR field save areas if the function calls __builtin_eh_return. */
23038 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
23039 {
23040 /* This hard-codes that we have three call-saved CR fields. */
23041 ehcr_size = 3 * reg_size;
23042 /* We do *not* use the regular CR save mechanism. */
23043 info_ptr->cr_save_p = 0;
23044 }
23045 else
23046 ehcr_size = 0;
23047
23048 /* Determine various sizes. */
23049 info_ptr->reg_size = reg_size;
23050 info_ptr->fixed_size = RS6000_SAVE_AREA;
23051 info_ptr->vars_size = RS6000_ALIGN (get_frame_size (), 8);
23052 info_ptr->parm_size = RS6000_ALIGN (crtl->outgoing_args_size,
23053 TARGET_ALTIVEC ? 16 : 8);
23054 if (FRAME_GROWS_DOWNWARD)
23055 info_ptr->vars_size
23056 += RS6000_ALIGN (info_ptr->fixed_size + info_ptr->vars_size
23057 + info_ptr->parm_size,
23058 ABI_STACK_BOUNDARY / BITS_PER_UNIT)
23059 - (info_ptr->fixed_size + info_ptr->vars_size
23060 + info_ptr->parm_size);
23061
23062 if (TARGET_SPE_ABI && info_ptr->spe_64bit_regs_used != 0)
23063 info_ptr->spe_gp_size = 8 * (32 - first_gp);
23064 else
23065 info_ptr->spe_gp_size = 0;
23066
23067 if (TARGET_ALTIVEC_ABI)
23068 info_ptr->vrsave_mask = compute_vrsave_mask ();
23069 else
23070 info_ptr->vrsave_mask = 0;
23071
23072 if (TARGET_ALTIVEC_VRSAVE && info_ptr->vrsave_mask)
23073 info_ptr->vrsave_size = 4;
23074 else
23075 info_ptr->vrsave_size = 0;
23076
23077 compute_save_world_info (info_ptr);
23078
23079 /* Calculate the offsets. */
23080 switch (DEFAULT_ABI)
23081 {
23082 case ABI_NONE:
23083 default:
23084 gcc_unreachable ();
23085
23086 case ABI_AIX:
23087 case ABI_ELFv2:
23088 case ABI_DARWIN:
23089 info_ptr->fp_save_offset = - info_ptr->fp_size;
23090 info_ptr->gp_save_offset = info_ptr->fp_save_offset - info_ptr->gp_size;
23091
23092 if (TARGET_ALTIVEC_ABI)
23093 {
23094 info_ptr->vrsave_save_offset
23095 = info_ptr->gp_save_offset - info_ptr->vrsave_size;
23096
23097 /* Align stack so vector save area is on a quadword boundary.
23098 The padding goes above the vectors. */
23099 if (info_ptr->altivec_size != 0)
23100 info_ptr->altivec_padding_size
23101 = info_ptr->vrsave_save_offset & 0xF;
23102 else
23103 info_ptr->altivec_padding_size = 0;
23104
23105 info_ptr->altivec_save_offset
23106 = info_ptr->vrsave_save_offset
23107 - info_ptr->altivec_padding_size
23108 - info_ptr->altivec_size;
23109 gcc_assert (info_ptr->altivec_size == 0
23110 || info_ptr->altivec_save_offset % 16 == 0);
23111
23112 /* Adjust for AltiVec case. */
23113 info_ptr->ehrd_offset = info_ptr->altivec_save_offset - ehrd_size;
23114 }
23115 else
23116 info_ptr->ehrd_offset = info_ptr->gp_save_offset - ehrd_size;
23117
23118 info_ptr->ehcr_offset = info_ptr->ehrd_offset - ehcr_size;
23119 info_ptr->cr_save_offset = reg_size; /* first word when 64-bit. */
23120 info_ptr->lr_save_offset = 2*reg_size;
23121 break;
23122
23123 case ABI_V4:
23124 info_ptr->fp_save_offset = - info_ptr->fp_size;
23125 info_ptr->gp_save_offset = info_ptr->fp_save_offset - info_ptr->gp_size;
23126 info_ptr->cr_save_offset = info_ptr->gp_save_offset - info_ptr->cr_size;
23127
23128 if (TARGET_SPE_ABI && info_ptr->spe_64bit_regs_used != 0)
23129 {
23130 /* Align stack so SPE GPR save area is aligned on a
23131 double-word boundary. */
23132 if (info_ptr->spe_gp_size != 0 && info_ptr->cr_save_offset != 0)
23133 info_ptr->spe_padding_size
23134 = 8 - (-info_ptr->cr_save_offset % 8);
23135 else
23136 info_ptr->spe_padding_size = 0;
23137
23138 info_ptr->spe_gp_save_offset
23139 = info_ptr->cr_save_offset
23140 - info_ptr->spe_padding_size
23141 - info_ptr->spe_gp_size;
23142
23143 /* Adjust for SPE case. */
23144 info_ptr->ehrd_offset = info_ptr->spe_gp_save_offset;
23145 }
23146 else if (TARGET_ALTIVEC_ABI)
23147 {
23148 info_ptr->vrsave_save_offset
23149 = info_ptr->cr_save_offset - info_ptr->vrsave_size;
23150
23151 /* Align stack so vector save area is on a quadword boundary. */
23152 if (info_ptr->altivec_size != 0)
23153 info_ptr->altivec_padding_size
23154 = 16 - (-info_ptr->vrsave_save_offset % 16);
23155 else
23156 info_ptr->altivec_padding_size = 0;
23157
23158 info_ptr->altivec_save_offset
23159 = info_ptr->vrsave_save_offset
23160 - info_ptr->altivec_padding_size
23161 - info_ptr->altivec_size;
23162
23163 /* Adjust for AltiVec case. */
23164 info_ptr->ehrd_offset = info_ptr->altivec_save_offset;
23165 }
23166 else
23167 info_ptr->ehrd_offset = info_ptr->cr_save_offset;
23168 info_ptr->ehrd_offset -= ehrd_size;
23169 info_ptr->lr_save_offset = reg_size;
23170 break;
23171 }
23172
23173 save_align = (TARGET_ALTIVEC_ABI || DEFAULT_ABI == ABI_DARWIN) ? 16 : 8;
23174 info_ptr->save_size = RS6000_ALIGN (info_ptr->fp_size
23175 + info_ptr->gp_size
23176 + info_ptr->altivec_size
23177 + info_ptr->altivec_padding_size
23178 + info_ptr->spe_gp_size
23179 + info_ptr->spe_padding_size
23180 + ehrd_size
23181 + ehcr_size
23182 + info_ptr->cr_size
23183 + info_ptr->vrsave_size,
23184 save_align);
23185
23186 non_fixed_size = (info_ptr->vars_size
23187 + info_ptr->parm_size
23188 + info_ptr->save_size);
23189
23190 info_ptr->total_size = RS6000_ALIGN (non_fixed_size + info_ptr->fixed_size,
23191 ABI_STACK_BOUNDARY / BITS_PER_UNIT);
23192
23193 /* Determine if we need to save the link register. */
23194 if (info_ptr->calls_p
23195 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
23196 && crtl->profile
23197 && !TARGET_PROFILE_KERNEL)
23198 || (DEFAULT_ABI == ABI_V4 && cfun->calls_alloca)
23199 #ifdef TARGET_RELOCATABLE
23200 || (TARGET_RELOCATABLE && (get_pool_size () != 0))
23201 #endif
23202 || rs6000_ra_ever_killed ())
23203 info_ptr->lr_save_p = 1;
23204
23205 using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
23206 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
23207 && call_used_regs[STATIC_CHAIN_REGNUM]);
23208 info_ptr->savres_strategy = rs6000_savres_strategy (info_ptr,
23209 using_static_chain_p);
23210
23211 if (!(info_ptr->savres_strategy & SAVE_INLINE_GPRS)
23212 || !(info_ptr->savres_strategy & SAVE_INLINE_FPRS)
23213 || !(info_ptr->savres_strategy & SAVE_INLINE_VRS)
23214 || !(info_ptr->savres_strategy & REST_INLINE_GPRS)
23215 || !(info_ptr->savres_strategy & REST_INLINE_FPRS)
23216 || !(info_ptr->savres_strategy & REST_INLINE_VRS))
23217 info_ptr->lr_save_p = 1;
23218
23219 if (info_ptr->lr_save_p)
23220 df_set_regs_ever_live (LR_REGNO, true);
23221
23222 /* Determine if we need to allocate any stack frame:
23223
23224 For AIX we need to push the stack if a frame pointer is needed
23225 (because the stack might be dynamically adjusted), if we are
23226 debugging, if we make calls, or if the sum of fp_save, gp_save,
23227 and local variables are more than the space needed to save all
23228 non-volatile registers: 32-bit: 18*8 + 19*4 = 220 or 64-bit: 18*8
23229 + 18*8 = 288 (GPR13 reserved).
23230
23231 For V.4 we don't have the stack cushion that AIX uses, but assume
23232 that the debugger can handle stackless frames. */
23233
23234 if (info_ptr->calls_p)
23235 info_ptr->push_p = 1;
23236
23237 else if (DEFAULT_ABI == ABI_V4)
23238 info_ptr->push_p = non_fixed_size != 0;
23239
23240 else if (frame_pointer_needed)
23241 info_ptr->push_p = 1;
23242
23243 else if (TARGET_XCOFF && write_symbols != NO_DEBUG)
23244 info_ptr->push_p = 1;
23245
23246 else
23247 info_ptr->push_p = non_fixed_size > (TARGET_32BIT ? 220 : 288);
23248
23249 return info_ptr;
23250 }
23251
23252 /* Return true if the current function uses any GPRs in 64-bit SIMD
23253 mode. */
23254
23255 static bool
23256 spe_func_has_64bit_regs_p (void)
23257 {
23258 rtx_insn *insns, *insn;
23259
23260 /* Functions that save and restore all the call-saved registers will
23261 need to save/restore the registers in 64-bits. */
23262 if (crtl->calls_eh_return
23263 || cfun->calls_setjmp
23264 || crtl->has_nonlocal_goto)
23265 return true;
23266
23267 insns = get_insns ();
23268
23269 for (insn = NEXT_INSN (insns); insn != NULL_RTX; insn = NEXT_INSN (insn))
23270 {
23271 if (INSN_P (insn))
23272 {
23273 rtx i;
23274
23275 /* FIXME: This should be implemented with attributes...
23276
23277 (set_attr "spe64" "true")....then,
23278 if (get_spe64(insn)) return true;
23279
23280 It's the only reliable way to do the stuff below. */
23281
23282 i = PATTERN (insn);
23283 if (GET_CODE (i) == SET)
23284 {
23285 machine_mode mode = GET_MODE (SET_SRC (i));
23286
23287 if (SPE_VECTOR_MODE (mode))
23288 return true;
23289 if (TARGET_E500_DOUBLE
23290 && (mode == DFmode || FLOAT128_2REG_P (mode)))
23291 return true;
23292 }
23293 }
23294 }
23295
23296 return false;
23297 }
23298
23299 static void
23300 debug_stack_info (rs6000_stack_t *info)
23301 {
23302 const char *abi_string;
23303
23304 if (! info)
23305 info = rs6000_stack_info ();
23306
23307 fprintf (stderr, "\nStack information for function %s:\n",
23308 ((current_function_decl && DECL_NAME (current_function_decl))
23309 ? IDENTIFIER_POINTER (DECL_NAME (current_function_decl))
23310 : "<unknown>"));
23311
23312 switch (info->abi)
23313 {
23314 default: abi_string = "Unknown"; break;
23315 case ABI_NONE: abi_string = "NONE"; break;
23316 case ABI_AIX: abi_string = "AIX"; break;
23317 case ABI_ELFv2: abi_string = "ELFv2"; break;
23318 case ABI_DARWIN: abi_string = "Darwin"; break;
23319 case ABI_V4: abi_string = "V.4"; break;
23320 }
23321
23322 fprintf (stderr, "\tABI = %5s\n", abi_string);
23323
23324 if (TARGET_ALTIVEC_ABI)
23325 fprintf (stderr, "\tALTIVEC ABI extensions enabled.\n");
23326
23327 if (TARGET_SPE_ABI)
23328 fprintf (stderr, "\tSPE ABI extensions enabled.\n");
23329
23330 if (info->first_gp_reg_save != 32)
23331 fprintf (stderr, "\tfirst_gp_reg_save = %5d\n", info->first_gp_reg_save);
23332
23333 if (info->first_fp_reg_save != 64)
23334 fprintf (stderr, "\tfirst_fp_reg_save = %5d\n", info->first_fp_reg_save);
23335
23336 if (info->first_altivec_reg_save <= LAST_ALTIVEC_REGNO)
23337 fprintf (stderr, "\tfirst_altivec_reg_save = %5d\n",
23338 info->first_altivec_reg_save);
23339
23340 if (info->lr_save_p)
23341 fprintf (stderr, "\tlr_save_p = %5d\n", info->lr_save_p);
23342
23343 if (info->cr_save_p)
23344 fprintf (stderr, "\tcr_save_p = %5d\n", info->cr_save_p);
23345
23346 if (info->vrsave_mask)
23347 fprintf (stderr, "\tvrsave_mask = 0x%x\n", info->vrsave_mask);
23348
23349 if (info->push_p)
23350 fprintf (stderr, "\tpush_p = %5d\n", info->push_p);
23351
23352 if (info->calls_p)
23353 fprintf (stderr, "\tcalls_p = %5d\n", info->calls_p);
23354
23355 if (info->gp_size)
23356 fprintf (stderr, "\tgp_save_offset = %5d\n", info->gp_save_offset);
23357
23358 if (info->fp_size)
23359 fprintf (stderr, "\tfp_save_offset = %5d\n", info->fp_save_offset);
23360
23361 if (info->altivec_size)
23362 fprintf (stderr, "\taltivec_save_offset = %5d\n",
23363 info->altivec_save_offset);
23364
23365 if (info->spe_gp_size)
23366 fprintf (stderr, "\tspe_gp_save_offset = %5d\n",
23367 info->spe_gp_save_offset);
23368
23369 if (info->vrsave_size)
23370 fprintf (stderr, "\tvrsave_save_offset = %5d\n",
23371 info->vrsave_save_offset);
23372
23373 if (info->lr_save_p)
23374 fprintf (stderr, "\tlr_save_offset = %5d\n", info->lr_save_offset);
23375
23376 if (info->cr_save_p)
23377 fprintf (stderr, "\tcr_save_offset = %5d\n", info->cr_save_offset);
23378
23379 if (info->varargs_save_offset)
23380 fprintf (stderr, "\tvarargs_save_offset = %5d\n", info->varargs_save_offset);
23381
23382 if (info->total_size)
23383 fprintf (stderr, "\ttotal_size = " HOST_WIDE_INT_PRINT_DEC"\n",
23384 info->total_size);
23385
23386 if (info->vars_size)
23387 fprintf (stderr, "\tvars_size = " HOST_WIDE_INT_PRINT_DEC"\n",
23388 info->vars_size);
23389
23390 if (info->parm_size)
23391 fprintf (stderr, "\tparm_size = %5d\n", info->parm_size);
23392
23393 if (info->fixed_size)
23394 fprintf (stderr, "\tfixed_size = %5d\n", info->fixed_size);
23395
23396 if (info->gp_size)
23397 fprintf (stderr, "\tgp_size = %5d\n", info->gp_size);
23398
23399 if (info->spe_gp_size)
23400 fprintf (stderr, "\tspe_gp_size = %5d\n", info->spe_gp_size);
23401
23402 if (info->fp_size)
23403 fprintf (stderr, "\tfp_size = %5d\n", info->fp_size);
23404
23405 if (info->altivec_size)
23406 fprintf (stderr, "\taltivec_size = %5d\n", info->altivec_size);
23407
23408 if (info->vrsave_size)
23409 fprintf (stderr, "\tvrsave_size = %5d\n", info->vrsave_size);
23410
23411 if (info->altivec_padding_size)
23412 fprintf (stderr, "\taltivec_padding_size= %5d\n",
23413 info->altivec_padding_size);
23414
23415 if (info->spe_padding_size)
23416 fprintf (stderr, "\tspe_padding_size = %5d\n",
23417 info->spe_padding_size);
23418
23419 if (info->cr_size)
23420 fprintf (stderr, "\tcr_size = %5d\n", info->cr_size);
23421
23422 if (info->save_size)
23423 fprintf (stderr, "\tsave_size = %5d\n", info->save_size);
23424
23425 if (info->reg_size != 4)
23426 fprintf (stderr, "\treg_size = %5d\n", info->reg_size);
23427
23428 fprintf (stderr, "\tsave-strategy = %04x\n", info->savres_strategy);
23429
23430 fprintf (stderr, "\n");
23431 }
23432
23433 rtx
23434 rs6000_return_addr (int count, rtx frame)
23435 {
23436 /* Currently we don't optimize very well between prolog and body
23437 code and for PIC code the code can be actually quite bad, so
23438 don't try to be too clever here. */
23439 if (count != 0
23440 || ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN) && flag_pic))
23441 {
23442 cfun->machine->ra_needs_full_frame = 1;
23443
23444 return
23445 gen_rtx_MEM
23446 (Pmode,
23447 memory_address
23448 (Pmode,
23449 plus_constant (Pmode,
23450 copy_to_reg
23451 (gen_rtx_MEM (Pmode,
23452 memory_address (Pmode, frame))),
23453 RETURN_ADDRESS_OFFSET)));
23454 }
23455
23456 cfun->machine->ra_need_lr = 1;
23457 return get_hard_reg_initial_val (Pmode, LR_REGNO);
23458 }
23459
23460 /* Say whether a function is a candidate for sibcall handling or not. */
23461
23462 static bool
23463 rs6000_function_ok_for_sibcall (tree decl, tree exp)
23464 {
23465 tree fntype;
23466
23467 if (decl)
23468 fntype = TREE_TYPE (decl);
23469 else
23470 fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
23471
23472 /* We can't do it if the called function has more vector parameters
23473 than the current function; there's nowhere to put the VRsave code. */
23474 if (TARGET_ALTIVEC_ABI
23475 && TARGET_ALTIVEC_VRSAVE
23476 && !(decl && decl == current_function_decl))
23477 {
23478 function_args_iterator args_iter;
23479 tree type;
23480 int nvreg = 0;
23481
23482 /* Functions with vector parameters are required to have a
23483 prototype, so the argument type info must be available
23484 here. */
23485 FOREACH_FUNCTION_ARGS(fntype, type, args_iter)
23486 if (TREE_CODE (type) == VECTOR_TYPE
23487 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
23488 nvreg++;
23489
23490 FOREACH_FUNCTION_ARGS(TREE_TYPE (current_function_decl), type, args_iter)
23491 if (TREE_CODE (type) == VECTOR_TYPE
23492 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
23493 nvreg--;
23494
23495 if (nvreg > 0)
23496 return false;
23497 }
23498
23499 /* Under the AIX or ELFv2 ABIs we can't allow calls to non-local
23500 functions, because the callee may have a different TOC pointer to
23501 the caller and there's no way to ensure we restore the TOC when
23502 we return. With the secure-plt SYSV ABI we can't make non-local
23503 calls when -fpic/PIC because the plt call stubs use r30. */
23504 if (DEFAULT_ABI == ABI_DARWIN
23505 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
23506 && decl
23507 && !DECL_EXTERNAL (decl)
23508 && !DECL_WEAK (decl)
23509 && (*targetm.binds_local_p) (decl))
23510 || (DEFAULT_ABI == ABI_V4
23511 && (!TARGET_SECURE_PLT
23512 || !flag_pic
23513 || (decl
23514 && (*targetm.binds_local_p) (decl)))))
23515 {
23516 tree attr_list = TYPE_ATTRIBUTES (fntype);
23517
23518 if (!lookup_attribute ("longcall", attr_list)
23519 || lookup_attribute ("shortcall", attr_list))
23520 return true;
23521 }
23522
23523 return false;
23524 }
23525
23526 static int
23527 rs6000_ra_ever_killed (void)
23528 {
23529 rtx_insn *top;
23530 rtx reg;
23531 rtx_insn *insn;
23532
23533 if (cfun->is_thunk)
23534 return 0;
23535
23536 if (cfun->machine->lr_save_state)
23537 return cfun->machine->lr_save_state - 1;
23538
23539 /* regs_ever_live has LR marked as used if any sibcalls are present,
23540 but this should not force saving and restoring in the
23541 pro/epilogue. Likewise, reg_set_between_p thinks a sibcall
23542 clobbers LR, so that is inappropriate. */
23543
23544 /* Also, the prologue can generate a store into LR that
23545 doesn't really count, like this:
23546
23547 move LR->R0
23548 bcl to set PIC register
23549 move LR->R31
23550 move R0->LR
23551
23552 When we're called from the epilogue, we need to avoid counting
23553 this as a store. */
23554
23555 push_topmost_sequence ();
23556 top = get_insns ();
23557 pop_topmost_sequence ();
23558 reg = gen_rtx_REG (Pmode, LR_REGNO);
23559
23560 for (insn = NEXT_INSN (top); insn != NULL_RTX; insn = NEXT_INSN (insn))
23561 {
23562 if (INSN_P (insn))
23563 {
23564 if (CALL_P (insn))
23565 {
23566 if (!SIBLING_CALL_P (insn))
23567 return 1;
23568 }
23569 else if (find_regno_note (insn, REG_INC, LR_REGNO))
23570 return 1;
23571 else if (set_of (reg, insn) != NULL_RTX
23572 && !prologue_epilogue_contains (insn))
23573 return 1;
23574 }
23575 }
23576 return 0;
23577 }
23578 \f
23579 /* Emit instructions needed to load the TOC register.
23580 This is only needed when TARGET_TOC, TARGET_MINIMAL_TOC, and there is
23581 a constant pool; or for SVR4 -fpic. */
23582
23583 void
23584 rs6000_emit_load_toc_table (int fromprolog)
23585 {
23586 rtx dest;
23587 dest = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
23588
23589 if (TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI == ABI_V4 && flag_pic)
23590 {
23591 char buf[30];
23592 rtx lab, tmp1, tmp2, got;
23593
23594 lab = gen_label_rtx ();
23595 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (lab));
23596 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
23597 if (flag_pic == 2)
23598 got = gen_rtx_SYMBOL_REF (Pmode, toc_label_name);
23599 else
23600 got = rs6000_got_sym ();
23601 tmp1 = tmp2 = dest;
23602 if (!fromprolog)
23603 {
23604 tmp1 = gen_reg_rtx (Pmode);
23605 tmp2 = gen_reg_rtx (Pmode);
23606 }
23607 emit_insn (gen_load_toc_v4_PIC_1 (lab));
23608 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
23609 emit_insn (gen_load_toc_v4_PIC_3b (tmp2, tmp1, got, lab));
23610 emit_insn (gen_load_toc_v4_PIC_3c (dest, tmp2, got, lab));
23611 }
23612 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 1)
23613 {
23614 emit_insn (gen_load_toc_v4_pic_si ());
23615 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
23616 }
23617 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2)
23618 {
23619 char buf[30];
23620 rtx temp0 = (fromprolog
23621 ? gen_rtx_REG (Pmode, 0)
23622 : gen_reg_rtx (Pmode));
23623
23624 if (fromprolog)
23625 {
23626 rtx symF, symL;
23627
23628 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
23629 symF = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
23630
23631 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
23632 symL = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
23633
23634 emit_insn (gen_load_toc_v4_PIC_1 (symF));
23635 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
23636 emit_insn (gen_load_toc_v4_PIC_2 (temp0, dest, symL, symF));
23637 }
23638 else
23639 {
23640 rtx tocsym, lab;
23641
23642 tocsym = gen_rtx_SYMBOL_REF (Pmode, toc_label_name);
23643 lab = gen_label_rtx ();
23644 emit_insn (gen_load_toc_v4_PIC_1b (tocsym, lab));
23645 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
23646 if (TARGET_LINK_STACK)
23647 emit_insn (gen_addsi3 (dest, dest, GEN_INT (4)));
23648 emit_move_insn (temp0, gen_rtx_MEM (Pmode, dest));
23649 }
23650 emit_insn (gen_addsi3 (dest, temp0, dest));
23651 }
23652 else if (TARGET_ELF && !TARGET_AIX && flag_pic == 0 && TARGET_MINIMAL_TOC)
23653 {
23654 /* This is for AIX code running in non-PIC ELF32. */
23655 char buf[30];
23656 rtx realsym;
23657 ASM_GENERATE_INTERNAL_LABEL (buf, "LCTOC", 1);
23658 realsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
23659
23660 emit_insn (gen_elf_high (dest, realsym));
23661 emit_insn (gen_elf_low (dest, dest, realsym));
23662 }
23663 else
23664 {
23665 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
23666
23667 if (TARGET_32BIT)
23668 emit_insn (gen_load_toc_aix_si (dest));
23669 else
23670 emit_insn (gen_load_toc_aix_di (dest));
23671 }
23672 }
23673
23674 /* Emit instructions to restore the link register after determining where
23675 its value has been stored. */
23676
23677 void
23678 rs6000_emit_eh_reg_restore (rtx source, rtx scratch)
23679 {
23680 rs6000_stack_t *info = rs6000_stack_info ();
23681 rtx operands[2];
23682
23683 operands[0] = source;
23684 operands[1] = scratch;
23685
23686 if (info->lr_save_p)
23687 {
23688 rtx frame_rtx = stack_pointer_rtx;
23689 HOST_WIDE_INT sp_offset = 0;
23690 rtx tmp;
23691
23692 if (frame_pointer_needed
23693 || cfun->calls_alloca
23694 || info->total_size > 32767)
23695 {
23696 tmp = gen_frame_mem (Pmode, frame_rtx);
23697 emit_move_insn (operands[1], tmp);
23698 frame_rtx = operands[1];
23699 }
23700 else if (info->push_p)
23701 sp_offset = info->total_size;
23702
23703 tmp = plus_constant (Pmode, frame_rtx,
23704 info->lr_save_offset + sp_offset);
23705 tmp = gen_frame_mem (Pmode, tmp);
23706 emit_move_insn (tmp, operands[0]);
23707 }
23708 else
23709 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNO), operands[0]);
23710
23711 /* Freeze lr_save_p. We've just emitted rtl that depends on the
23712 state of lr_save_p so any change from here on would be a bug. In
23713 particular, stop rs6000_ra_ever_killed from considering the SET
23714 of lr we may have added just above. */
23715 cfun->machine->lr_save_state = info->lr_save_p + 1;
23716 }
23717
23718 static GTY(()) alias_set_type set = -1;
23719
23720 alias_set_type
23721 get_TOC_alias_set (void)
23722 {
23723 if (set == -1)
23724 set = new_alias_set ();
23725 return set;
23726 }
23727
23728 /* This returns nonzero if the current function uses the TOC. This is
23729 determined by the presence of (use (unspec ... UNSPEC_TOC)), which
23730 is generated by the ABI_V4 load_toc_* patterns. */
23731 #if TARGET_ELF
23732 static int
23733 uses_TOC (void)
23734 {
23735 rtx_insn *insn;
23736
23737 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23738 if (INSN_P (insn))
23739 {
23740 rtx pat = PATTERN (insn);
23741 int i;
23742
23743 if (GET_CODE (pat) == PARALLEL)
23744 for (i = 0; i < XVECLEN (pat, 0); i++)
23745 {
23746 rtx sub = XVECEXP (pat, 0, i);
23747 if (GET_CODE (sub) == USE)
23748 {
23749 sub = XEXP (sub, 0);
23750 if (GET_CODE (sub) == UNSPEC
23751 && XINT (sub, 1) == UNSPEC_TOC)
23752 return 1;
23753 }
23754 }
23755 }
23756 return 0;
23757 }
23758 #endif
23759
23760 rtx
23761 create_TOC_reference (rtx symbol, rtx largetoc_reg)
23762 {
23763 rtx tocrel, tocreg, hi;
23764
23765 if (TARGET_DEBUG_ADDR)
23766 {
23767 if (GET_CODE (symbol) == SYMBOL_REF)
23768 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
23769 XSTR (symbol, 0));
23770 else
23771 {
23772 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
23773 GET_RTX_NAME (GET_CODE (symbol)));
23774 debug_rtx (symbol);
23775 }
23776 }
23777
23778 if (!can_create_pseudo_p ())
23779 df_set_regs_ever_live (TOC_REGISTER, true);
23780
23781 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
23782 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
23783 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
23784 return tocrel;
23785
23786 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
23787 if (largetoc_reg != NULL)
23788 {
23789 emit_move_insn (largetoc_reg, hi);
23790 hi = largetoc_reg;
23791 }
23792 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
23793 }
23794
23795 /* Issue assembly directives that create a reference to the given DWARF
23796 FRAME_TABLE_LABEL from the current function section. */
23797 void
23798 rs6000_aix_asm_output_dwarf_table_ref (char * frame_table_label)
23799 {
23800 fprintf (asm_out_file, "\t.ref %s\n",
23801 (* targetm.strip_name_encoding) (frame_table_label));
23802 }
23803 \f
23804 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
23805 and the change to the stack pointer. */
23806
23807 static void
23808 rs6000_emit_stack_tie (rtx fp, bool hard_frame_needed)
23809 {
23810 rtvec p;
23811 int i;
23812 rtx regs[3];
23813
23814 i = 0;
23815 regs[i++] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
23816 if (hard_frame_needed)
23817 regs[i++] = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
23818 if (!(REGNO (fp) == STACK_POINTER_REGNUM
23819 || (hard_frame_needed
23820 && REGNO (fp) == HARD_FRAME_POINTER_REGNUM)))
23821 regs[i++] = fp;
23822
23823 p = rtvec_alloc (i);
23824 while (--i >= 0)
23825 {
23826 rtx mem = gen_frame_mem (BLKmode, regs[i]);
23827 RTVEC_ELT (p, i) = gen_rtx_SET (mem, const0_rtx);
23828 }
23829
23830 emit_insn (gen_stack_tie (gen_rtx_PARALLEL (VOIDmode, p)));
23831 }
23832
23833 /* Emit the correct code for allocating stack space, as insns.
23834 If COPY_REG, make sure a copy of the old frame is left there.
23835 The generated code may use hard register 0 as a temporary. */
23836
23837 static rtx_insn *
23838 rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off)
23839 {
23840 rtx_insn *insn;
23841 rtx stack_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
23842 rtx tmp_reg = gen_rtx_REG (Pmode, 0);
23843 rtx todec = gen_int_mode (-size, Pmode);
23844 rtx par, set, mem;
23845
23846 if (INTVAL (todec) != -size)
23847 {
23848 warning (0, "stack frame too large");
23849 emit_insn (gen_trap ());
23850 return 0;
23851 }
23852
23853 if (crtl->limit_stack)
23854 {
23855 if (REG_P (stack_limit_rtx)
23856 && REGNO (stack_limit_rtx) > 1
23857 && REGNO (stack_limit_rtx) <= 31)
23858 {
23859 emit_insn (gen_add3_insn (tmp_reg, stack_limit_rtx, GEN_INT (size)));
23860 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
23861 const0_rtx));
23862 }
23863 else if (GET_CODE (stack_limit_rtx) == SYMBOL_REF
23864 && TARGET_32BIT
23865 && DEFAULT_ABI == ABI_V4)
23866 {
23867 rtx toload = gen_rtx_CONST (VOIDmode,
23868 gen_rtx_PLUS (Pmode,
23869 stack_limit_rtx,
23870 GEN_INT (size)));
23871
23872 emit_insn (gen_elf_high (tmp_reg, toload));
23873 emit_insn (gen_elf_low (tmp_reg, tmp_reg, toload));
23874 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
23875 const0_rtx));
23876 }
23877 else
23878 warning (0, "stack limit expression is not supported");
23879 }
23880
23881 if (copy_reg)
23882 {
23883 if (copy_off != 0)
23884 emit_insn (gen_add3_insn (copy_reg, stack_reg, GEN_INT (copy_off)));
23885 else
23886 emit_move_insn (copy_reg, stack_reg);
23887 }
23888
23889 if (size > 32767)
23890 {
23891 /* Need a note here so that try_split doesn't get confused. */
23892 if (get_last_insn () == NULL_RTX)
23893 emit_note (NOTE_INSN_DELETED);
23894 insn = emit_move_insn (tmp_reg, todec);
23895 try_split (PATTERN (insn), insn, 0);
23896 todec = tmp_reg;
23897 }
23898
23899 insn = emit_insn (TARGET_32BIT
23900 ? gen_movsi_update_stack (stack_reg, stack_reg,
23901 todec, stack_reg)
23902 : gen_movdi_di_update_stack (stack_reg, stack_reg,
23903 todec, stack_reg));
23904 /* Since we didn't use gen_frame_mem to generate the MEM, grab
23905 it now and set the alias set/attributes. The above gen_*_update
23906 calls will generate a PARALLEL with the MEM set being the first
23907 operation. */
23908 par = PATTERN (insn);
23909 gcc_assert (GET_CODE (par) == PARALLEL);
23910 set = XVECEXP (par, 0, 0);
23911 gcc_assert (GET_CODE (set) == SET);
23912 mem = SET_DEST (set);
23913 gcc_assert (MEM_P (mem));
23914 MEM_NOTRAP_P (mem) = 1;
23915 set_mem_alias_set (mem, get_frame_alias_set ());
23916
23917 RTX_FRAME_RELATED_P (insn) = 1;
23918 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
23919 gen_rtx_SET (stack_reg, gen_rtx_PLUS (Pmode, stack_reg,
23920 GEN_INT (-size))));
23921 return insn;
23922 }
23923
23924 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
23925
23926 #if PROBE_INTERVAL > 32768
23927 #error Cannot use indexed addressing mode for stack probing
23928 #endif
23929
23930 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
23931 inclusive. These are offsets from the current stack pointer. */
23932
23933 static void
23934 rs6000_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
23935 {
23936 /* See if we have a constant small number of probes to generate. If so,
23937 that's the easy case. */
23938 if (first + size <= 32768)
23939 {
23940 HOST_WIDE_INT i;
23941
23942 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
23943 it exceeds SIZE. If only one probe is needed, this will not
23944 generate any code. Then probe at FIRST + SIZE. */
23945 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
23946 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
23947 -(first + i)));
23948
23949 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
23950 -(first + size)));
23951 }
23952
23953 /* Otherwise, do the same as above, but in a loop. Note that we must be
23954 extra careful with variables wrapping around because we might be at
23955 the very top (or the very bottom) of the address space and we have
23956 to be able to handle this case properly; in particular, we use an
23957 equality test for the loop condition. */
23958 else
23959 {
23960 HOST_WIDE_INT rounded_size;
23961 rtx r12 = gen_rtx_REG (Pmode, 12);
23962 rtx r0 = gen_rtx_REG (Pmode, 0);
23963
23964 /* Sanity check for the addressing mode we're going to use. */
23965 gcc_assert (first <= 32768);
23966
23967 /* Step 1: round SIZE to the previous multiple of the interval. */
23968
23969 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
23970
23971
23972 /* Step 2: compute initial and final value of the loop counter. */
23973
23974 /* TEST_ADDR = SP + FIRST. */
23975 emit_insn (gen_rtx_SET (r12, plus_constant (Pmode, stack_pointer_rtx,
23976 -first)));
23977
23978 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
23979 if (rounded_size > 32768)
23980 {
23981 emit_move_insn (r0, GEN_INT (-rounded_size));
23982 emit_insn (gen_rtx_SET (r0, gen_rtx_PLUS (Pmode, r12, r0)));
23983 }
23984 else
23985 emit_insn (gen_rtx_SET (r0, plus_constant (Pmode, r12,
23986 -rounded_size)));
23987
23988
23989 /* Step 3: the loop
23990
23991 while (TEST_ADDR != LAST_ADDR)
23992 {
23993 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
23994 probe at TEST_ADDR
23995 }
23996
23997 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
23998 until it is equal to ROUNDED_SIZE. */
23999
24000 if (TARGET_64BIT)
24001 emit_insn (gen_probe_stack_rangedi (r12, r12, r0));
24002 else
24003 emit_insn (gen_probe_stack_rangesi (r12, r12, r0));
24004
24005
24006 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
24007 that SIZE is equal to ROUNDED_SIZE. */
24008
24009 if (size != rounded_size)
24010 emit_stack_probe (plus_constant (Pmode, r12, rounded_size - size));
24011 }
24012 }
24013
24014 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
24015 absolute addresses. */
24016
24017 const char *
24018 output_probe_stack_range (rtx reg1, rtx reg2)
24019 {
24020 static int labelno = 0;
24021 char loop_lab[32], end_lab[32];
24022 rtx xops[2];
24023
24024 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
24025 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
24026
24027 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
24028
24029 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
24030 xops[0] = reg1;
24031 xops[1] = reg2;
24032 if (TARGET_64BIT)
24033 output_asm_insn ("cmpd 0,%0,%1", xops);
24034 else
24035 output_asm_insn ("cmpw 0,%0,%1", xops);
24036
24037 fputs ("\tbeq 0,", asm_out_file);
24038 assemble_name_raw (asm_out_file, end_lab);
24039 fputc ('\n', asm_out_file);
24040
24041 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
24042 xops[1] = GEN_INT (-PROBE_INTERVAL);
24043 output_asm_insn ("addi %0,%0,%1", xops);
24044
24045 /* Probe at TEST_ADDR and branch. */
24046 xops[1] = gen_rtx_REG (Pmode, 0);
24047 output_asm_insn ("stw %1,0(%0)", xops);
24048 fprintf (asm_out_file, "\tb ");
24049 assemble_name_raw (asm_out_file, loop_lab);
24050 fputc ('\n', asm_out_file);
24051
24052 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
24053
24054 return "";
24055 }
24056
24057 /* Add to 'insn' a note which is PATTERN (INSN) but with REG replaced
24058 with (plus:P (reg 1) VAL), and with REG2 replaced with RREG if REG2
24059 is not NULL. It would be nice if dwarf2out_frame_debug_expr could
24060 deduce these equivalences by itself so it wasn't necessary to hold
24061 its hand so much. Don't be tempted to always supply d2_f_d_e with
24062 the actual cfa register, ie. r31 when we are using a hard frame
24063 pointer. That fails when saving regs off r1, and sched moves the
24064 r31 setup past the reg saves. */
24065
24066 static rtx
24067 rs6000_frame_related (rtx insn, rtx reg, HOST_WIDE_INT val,
24068 rtx reg2, rtx rreg)
24069 {
24070 rtx real, temp;
24071
24072 if (REGNO (reg) == STACK_POINTER_REGNUM && reg2 == NULL_RTX)
24073 {
24074 /* No need for any replacement. Just set RTX_FRAME_RELATED_P. */
24075 int i;
24076
24077 gcc_checking_assert (val == 0);
24078 real = PATTERN (insn);
24079 if (GET_CODE (real) == PARALLEL)
24080 for (i = 0; i < XVECLEN (real, 0); i++)
24081 if (GET_CODE (XVECEXP (real, 0, i)) == SET)
24082 {
24083 rtx set = XVECEXP (real, 0, i);
24084
24085 RTX_FRAME_RELATED_P (set) = 1;
24086 }
24087 RTX_FRAME_RELATED_P (insn) = 1;
24088 return insn;
24089 }
24090
24091 /* copy_rtx will not make unique copies of registers, so we need to
24092 ensure we don't have unwanted sharing here. */
24093 if (reg == reg2)
24094 reg = gen_raw_REG (GET_MODE (reg), REGNO (reg));
24095
24096 if (reg == rreg)
24097 reg = gen_raw_REG (GET_MODE (reg), REGNO (reg));
24098
24099 real = copy_rtx (PATTERN (insn));
24100
24101 if (reg2 != NULL_RTX)
24102 real = replace_rtx (real, reg2, rreg);
24103
24104 if (REGNO (reg) == STACK_POINTER_REGNUM)
24105 gcc_checking_assert (val == 0);
24106 else
24107 real = replace_rtx (real, reg,
24108 gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode,
24109 STACK_POINTER_REGNUM),
24110 GEN_INT (val)));
24111
24112 /* We expect that 'real' is either a SET or a PARALLEL containing
24113 SETs (and possibly other stuff). In a PARALLEL, all the SETs
24114 are important so they all have to be marked RTX_FRAME_RELATED_P. */
24115
24116 if (GET_CODE (real) == SET)
24117 {
24118 rtx set = real;
24119
24120 temp = simplify_rtx (SET_SRC (set));
24121 if (temp)
24122 SET_SRC (set) = temp;
24123 temp = simplify_rtx (SET_DEST (set));
24124 if (temp)
24125 SET_DEST (set) = temp;
24126 if (GET_CODE (SET_DEST (set)) == MEM)
24127 {
24128 temp = simplify_rtx (XEXP (SET_DEST (set), 0));
24129 if (temp)
24130 XEXP (SET_DEST (set), 0) = temp;
24131 }
24132 }
24133 else
24134 {
24135 int i;
24136
24137 gcc_assert (GET_CODE (real) == PARALLEL);
24138 for (i = 0; i < XVECLEN (real, 0); i++)
24139 if (GET_CODE (XVECEXP (real, 0, i)) == SET)
24140 {
24141 rtx set = XVECEXP (real, 0, i);
24142
24143 temp = simplify_rtx (SET_SRC (set));
24144 if (temp)
24145 SET_SRC (set) = temp;
24146 temp = simplify_rtx (SET_DEST (set));
24147 if (temp)
24148 SET_DEST (set) = temp;
24149 if (GET_CODE (SET_DEST (set)) == MEM)
24150 {
24151 temp = simplify_rtx (XEXP (SET_DEST (set), 0));
24152 if (temp)
24153 XEXP (SET_DEST (set), 0) = temp;
24154 }
24155 RTX_FRAME_RELATED_P (set) = 1;
24156 }
24157 }
24158
24159 RTX_FRAME_RELATED_P (insn) = 1;
24160 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
24161
24162 return insn;
24163 }
24164
24165 /* Returns an insn that has a vrsave set operation with the
24166 appropriate CLOBBERs. */
24167
24168 static rtx
24169 generate_set_vrsave (rtx reg, rs6000_stack_t *info, int epiloguep)
24170 {
24171 int nclobs, i;
24172 rtx insn, clobs[TOTAL_ALTIVEC_REGS + 1];
24173 rtx vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
24174
24175 clobs[0]
24176 = gen_rtx_SET (vrsave,
24177 gen_rtx_UNSPEC_VOLATILE (SImode,
24178 gen_rtvec (2, reg, vrsave),
24179 UNSPECV_SET_VRSAVE));
24180
24181 nclobs = 1;
24182
24183 /* We need to clobber the registers in the mask so the scheduler
24184 does not move sets to VRSAVE before sets of AltiVec registers.
24185
24186 However, if the function receives nonlocal gotos, reload will set
24187 all call saved registers live. We will end up with:
24188
24189 (set (reg 999) (mem))
24190 (parallel [ (set (reg vrsave) (unspec blah))
24191 (clobber (reg 999))])
24192
24193 The clobber will cause the store into reg 999 to be dead, and
24194 flow will attempt to delete an epilogue insn. In this case, we
24195 need an unspec use/set of the register. */
24196
24197 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
24198 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
24199 {
24200 if (!epiloguep || call_used_regs [i])
24201 clobs[nclobs++] = gen_rtx_CLOBBER (VOIDmode,
24202 gen_rtx_REG (V4SImode, i));
24203 else
24204 {
24205 rtx reg = gen_rtx_REG (V4SImode, i);
24206
24207 clobs[nclobs++]
24208 = gen_rtx_SET (reg,
24209 gen_rtx_UNSPEC (V4SImode,
24210 gen_rtvec (1, reg), 27));
24211 }
24212 }
24213
24214 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nclobs));
24215
24216 for (i = 0; i < nclobs; ++i)
24217 XVECEXP (insn, 0, i) = clobs[i];
24218
24219 return insn;
24220 }
24221
24222 static rtx
24223 gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
24224 {
24225 rtx addr, mem;
24226
24227 addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
24228 mem = gen_frame_mem (GET_MODE (reg), addr);
24229 return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
24230 }
24231
24232 static rtx
24233 gen_frame_load (rtx reg, rtx frame_reg, int offset)
24234 {
24235 return gen_frame_set (reg, frame_reg, offset, false);
24236 }
24237
24238 static rtx
24239 gen_frame_store (rtx reg, rtx frame_reg, int offset)
24240 {
24241 return gen_frame_set (reg, frame_reg, offset, true);
24242 }
24243
24244 /* Save a register into the frame, and emit RTX_FRAME_RELATED_P notes.
24245 Save REGNO into [FRAME_REG + OFFSET] in mode MODE. */
24246
24247 static rtx
24248 emit_frame_save (rtx frame_reg, machine_mode mode,
24249 unsigned int regno, int offset, HOST_WIDE_INT frame_reg_to_sp)
24250 {
24251 rtx reg, insn;
24252
24253 /* Some cases that need register indexed addressing. */
24254 gcc_checking_assert (!((TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
24255 || (TARGET_VSX && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
24256 || (TARGET_E500_DOUBLE && mode == DFmode)
24257 || (TARGET_SPE_ABI
24258 && SPE_VECTOR_MODE (mode)
24259 && !SPE_CONST_OFFSET_OK (offset))));
24260
24261 reg = gen_rtx_REG (mode, regno);
24262 insn = emit_insn (gen_frame_store (reg, frame_reg, offset));
24263 return rs6000_frame_related (insn, frame_reg, frame_reg_to_sp,
24264 NULL_RTX, NULL_RTX);
24265 }
24266
24267 /* Emit an offset memory reference suitable for a frame store, while
24268 converting to a valid addressing mode. */
24269
24270 static rtx
24271 gen_frame_mem_offset (machine_mode mode, rtx reg, int offset)
24272 {
24273 rtx int_rtx, offset_rtx;
24274
24275 int_rtx = GEN_INT (offset);
24276
24277 if ((TARGET_SPE_ABI && SPE_VECTOR_MODE (mode) && !SPE_CONST_OFFSET_OK (offset))
24278 || (TARGET_E500_DOUBLE && mode == DFmode))
24279 {
24280 offset_rtx = gen_rtx_REG (Pmode, FIXED_SCRATCH);
24281 emit_move_insn (offset_rtx, int_rtx);
24282 }
24283 else
24284 offset_rtx = int_rtx;
24285
24286 return gen_frame_mem (mode, gen_rtx_PLUS (Pmode, reg, offset_rtx));
24287 }
24288
24289 #ifndef TARGET_FIX_AND_CONTINUE
24290 #define TARGET_FIX_AND_CONTINUE 0
24291 #endif
24292
24293 /* It's really GPR 13 or 14, FPR 14 and VR 20. We need the smallest. */
24294 #define FIRST_SAVRES_REGISTER FIRST_SAVED_GP_REGNO
24295 #define LAST_SAVRES_REGISTER 31
24296 #define N_SAVRES_REGISTERS (LAST_SAVRES_REGISTER - FIRST_SAVRES_REGISTER + 1)
24297
24298 enum {
24299 SAVRES_LR = 0x1,
24300 SAVRES_SAVE = 0x2,
24301 SAVRES_REG = 0x0c,
24302 SAVRES_GPR = 0,
24303 SAVRES_FPR = 4,
24304 SAVRES_VR = 8
24305 };
24306
24307 static GTY(()) rtx savres_routine_syms[N_SAVRES_REGISTERS][12];
24308
24309 /* Temporary holding space for an out-of-line register save/restore
24310 routine name. */
24311 static char savres_routine_name[30];
24312
24313 /* Return the name for an out-of-line register save/restore routine.
24314 We are saving/restoring GPRs if GPR is true. */
24315
24316 static char *
24317 rs6000_savres_routine_name (rs6000_stack_t *info, int regno, int sel)
24318 {
24319 const char *prefix = "";
24320 const char *suffix = "";
24321
24322 /* Different targets are supposed to define
24323 {SAVE,RESTORE}_FP_{PREFIX,SUFFIX} with the idea that the needed
24324 routine name could be defined with:
24325
24326 sprintf (name, "%s%d%s", SAVE_FP_PREFIX, regno, SAVE_FP_SUFFIX)
24327
24328 This is a nice idea in practice, but in reality, things are
24329 complicated in several ways:
24330
24331 - ELF targets have save/restore routines for GPRs.
24332
24333 - SPE targets use different prefixes for 32/64-bit registers, and
24334 neither of them fit neatly in the FOO_{PREFIX,SUFFIX} regimen.
24335
24336 - PPC64 ELF targets have routines for save/restore of GPRs that
24337 differ in what they do with the link register, so having a set
24338 prefix doesn't work. (We only use one of the save routines at
24339 the moment, though.)
24340
24341 - PPC32 elf targets have "exit" versions of the restore routines
24342 that restore the link register and can save some extra space.
24343 These require an extra suffix. (There are also "tail" versions
24344 of the restore routines and "GOT" versions of the save routines,
24345 but we don't generate those at present. Same problems apply,
24346 though.)
24347
24348 We deal with all this by synthesizing our own prefix/suffix and
24349 using that for the simple sprintf call shown above. */
24350 if (TARGET_SPE)
24351 {
24352 /* No floating point saves on the SPE. */
24353 gcc_assert ((sel & SAVRES_REG) == SAVRES_GPR);
24354
24355 if ((sel & SAVRES_SAVE))
24356 prefix = info->spe_64bit_regs_used ? "_save64gpr_" : "_save32gpr_";
24357 else
24358 prefix = info->spe_64bit_regs_used ? "_rest64gpr_" : "_rest32gpr_";
24359
24360 if ((sel & SAVRES_LR))
24361 suffix = "_x";
24362 }
24363 else if (DEFAULT_ABI == ABI_V4)
24364 {
24365 if (TARGET_64BIT)
24366 goto aix_names;
24367
24368 if ((sel & SAVRES_REG) == SAVRES_GPR)
24369 prefix = (sel & SAVRES_SAVE) ? "_savegpr_" : "_restgpr_";
24370 else if ((sel & SAVRES_REG) == SAVRES_FPR)
24371 prefix = (sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_";
24372 else if ((sel & SAVRES_REG) == SAVRES_VR)
24373 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
24374 else
24375 abort ();
24376
24377 if ((sel & SAVRES_LR))
24378 suffix = "_x";
24379 }
24380 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
24381 {
24382 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
24383 /* No out-of-line save/restore routines for GPRs on AIX. */
24384 gcc_assert (!TARGET_AIX || (sel & SAVRES_REG) != SAVRES_GPR);
24385 #endif
24386
24387 aix_names:
24388 if ((sel & SAVRES_REG) == SAVRES_GPR)
24389 prefix = ((sel & SAVRES_SAVE)
24390 ? ((sel & SAVRES_LR) ? "_savegpr0_" : "_savegpr1_")
24391 : ((sel & SAVRES_LR) ? "_restgpr0_" : "_restgpr1_"));
24392 else if ((sel & SAVRES_REG) == SAVRES_FPR)
24393 {
24394 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
24395 if ((sel & SAVRES_LR))
24396 prefix = ((sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_");
24397 else
24398 #endif
24399 {
24400 prefix = (sel & SAVRES_SAVE) ? SAVE_FP_PREFIX : RESTORE_FP_PREFIX;
24401 suffix = (sel & SAVRES_SAVE) ? SAVE_FP_SUFFIX : RESTORE_FP_SUFFIX;
24402 }
24403 }
24404 else if ((sel & SAVRES_REG) == SAVRES_VR)
24405 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
24406 else
24407 abort ();
24408 }
24409
24410 if (DEFAULT_ABI == ABI_DARWIN)
24411 {
24412 /* The Darwin approach is (slightly) different, in order to be
24413 compatible with code generated by the system toolchain. There is a
24414 single symbol for the start of save sequence, and the code here
24415 embeds an offset into that code on the basis of the first register
24416 to be saved. */
24417 prefix = (sel & SAVRES_SAVE) ? "save" : "rest" ;
24418 if ((sel & SAVRES_REG) == SAVRES_GPR)
24419 sprintf (savres_routine_name, "*%sGPR%s%s%.0d ; %s r%d-r31", prefix,
24420 ((sel & SAVRES_LR) ? "x" : ""), (regno == 13 ? "" : "+"),
24421 (regno - 13) * 4, prefix, regno);
24422 else if ((sel & SAVRES_REG) == SAVRES_FPR)
24423 sprintf (savres_routine_name, "*%sFP%s%.0d ; %s f%d-f31", prefix,
24424 (regno == 14 ? "" : "+"), (regno - 14) * 4, prefix, regno);
24425 else if ((sel & SAVRES_REG) == SAVRES_VR)
24426 sprintf (savres_routine_name, "*%sVEC%s%.0d ; %s v%d-v31", prefix,
24427 (regno == 20 ? "" : "+"), (regno - 20) * 8, prefix, regno);
24428 else
24429 abort ();
24430 }
24431 else
24432 sprintf (savres_routine_name, "%s%d%s", prefix, regno, suffix);
24433
24434 return savres_routine_name;
24435 }
24436
24437 /* Return an RTL SYMBOL_REF for an out-of-line register save/restore routine.
24438 We are saving/restoring GPRs if GPR is true. */
24439
24440 static rtx
24441 rs6000_savres_routine_sym (rs6000_stack_t *info, int sel)
24442 {
24443 int regno = ((sel & SAVRES_REG) == SAVRES_GPR
24444 ? info->first_gp_reg_save
24445 : (sel & SAVRES_REG) == SAVRES_FPR
24446 ? info->first_fp_reg_save - 32
24447 : (sel & SAVRES_REG) == SAVRES_VR
24448 ? info->first_altivec_reg_save - FIRST_ALTIVEC_REGNO
24449 : -1);
24450 rtx sym;
24451 int select = sel;
24452
24453 /* On the SPE, we never have any FPRs, but we do have 32/64-bit
24454 versions of the gpr routines. */
24455 if (TARGET_SPE_ABI && (sel & SAVRES_REG) == SAVRES_GPR
24456 && info->spe_64bit_regs_used)
24457 select ^= SAVRES_FPR ^ SAVRES_GPR;
24458
24459 /* Don't generate bogus routine names. */
24460 gcc_assert (FIRST_SAVRES_REGISTER <= regno
24461 && regno <= LAST_SAVRES_REGISTER
24462 && select >= 0 && select <= 12);
24463
24464 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select];
24465
24466 if (sym == NULL)
24467 {
24468 char *name;
24469
24470 name = rs6000_savres_routine_name (info, regno, sel);
24471
24472 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select]
24473 = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
24474 SYMBOL_REF_FLAGS (sym) |= SYMBOL_FLAG_FUNCTION;
24475 }
24476
24477 return sym;
24478 }
24479
24480 /* Emit a sequence of insns, including a stack tie if needed, for
24481 resetting the stack pointer. If UPDT_REGNO is not 1, then don't
24482 reset the stack pointer, but move the base of the frame into
24483 reg UPDT_REGNO for use by out-of-line register restore routines. */
24484
24485 static rtx
24486 rs6000_emit_stack_reset (rs6000_stack_t *info,
24487 rtx frame_reg_rtx, HOST_WIDE_INT frame_off,
24488 unsigned updt_regno)
24489 {
24490 rtx updt_reg_rtx;
24491
24492 /* This blockage is needed so that sched doesn't decide to move
24493 the sp change before the register restores. */
24494 if (DEFAULT_ABI == ABI_V4
24495 || (TARGET_SPE_ABI
24496 && info->spe_64bit_regs_used != 0
24497 && info->first_gp_reg_save != 32))
24498 rs6000_emit_stack_tie (frame_reg_rtx, frame_pointer_needed);
24499
24500 /* If we are restoring registers out-of-line, we will be using the
24501 "exit" variants of the restore routines, which will reset the
24502 stack for us. But we do need to point updt_reg into the
24503 right place for those routines. */
24504 updt_reg_rtx = gen_rtx_REG (Pmode, updt_regno);
24505
24506 if (frame_off != 0)
24507 return emit_insn (gen_add3_insn (updt_reg_rtx,
24508 frame_reg_rtx, GEN_INT (frame_off)));
24509 else if (REGNO (frame_reg_rtx) != updt_regno)
24510 return emit_move_insn (updt_reg_rtx, frame_reg_rtx);
24511
24512 return NULL_RTX;
24513 }
24514
24515 /* Return the register number used as a pointer by out-of-line
24516 save/restore functions. */
24517
24518 static inline unsigned
24519 ptr_regno_for_savres (int sel)
24520 {
24521 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
24522 return (sel & SAVRES_REG) == SAVRES_FPR || (sel & SAVRES_LR) ? 1 : 12;
24523 return DEFAULT_ABI == ABI_DARWIN && (sel & SAVRES_REG) == SAVRES_FPR ? 1 : 11;
24524 }
24525
24526 /* Construct a parallel rtx describing the effect of a call to an
24527 out-of-line register save/restore routine, and emit the insn
24528 or jump_insn as appropriate. */
24529
24530 static rtx
24531 rs6000_emit_savres_rtx (rs6000_stack_t *info,
24532 rtx frame_reg_rtx, int save_area_offset, int lr_offset,
24533 machine_mode reg_mode, int sel)
24534 {
24535 int i;
24536 int offset, start_reg, end_reg, n_regs, use_reg;
24537 int reg_size = GET_MODE_SIZE (reg_mode);
24538 rtx sym;
24539 rtvec p;
24540 rtx par, insn;
24541
24542 offset = 0;
24543 start_reg = ((sel & SAVRES_REG) == SAVRES_GPR
24544 ? info->first_gp_reg_save
24545 : (sel & SAVRES_REG) == SAVRES_FPR
24546 ? info->first_fp_reg_save
24547 : (sel & SAVRES_REG) == SAVRES_VR
24548 ? info->first_altivec_reg_save
24549 : -1);
24550 end_reg = ((sel & SAVRES_REG) == SAVRES_GPR
24551 ? 32
24552 : (sel & SAVRES_REG) == SAVRES_FPR
24553 ? 64
24554 : (sel & SAVRES_REG) == SAVRES_VR
24555 ? LAST_ALTIVEC_REGNO + 1
24556 : -1);
24557 n_regs = end_reg - start_reg;
24558 p = rtvec_alloc (3 + ((sel & SAVRES_LR) ? 1 : 0)
24559 + ((sel & SAVRES_REG) == SAVRES_VR ? 1 : 0)
24560 + n_regs);
24561
24562 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
24563 RTVEC_ELT (p, offset++) = ret_rtx;
24564
24565 RTVEC_ELT (p, offset++)
24566 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
24567
24568 sym = rs6000_savres_routine_sym (info, sel);
24569 RTVEC_ELT (p, offset++) = gen_rtx_USE (VOIDmode, sym);
24570
24571 use_reg = ptr_regno_for_savres (sel);
24572 if ((sel & SAVRES_REG) == SAVRES_VR)
24573 {
24574 /* Vector regs are saved/restored using [reg+reg] addressing. */
24575 RTVEC_ELT (p, offset++)
24576 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, use_reg));
24577 RTVEC_ELT (p, offset++)
24578 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, 0));
24579 }
24580 else
24581 RTVEC_ELT (p, offset++)
24582 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, use_reg));
24583
24584 for (i = 0; i < end_reg - start_reg; i++)
24585 RTVEC_ELT (p, i + offset)
24586 = gen_frame_set (gen_rtx_REG (reg_mode, start_reg + i),
24587 frame_reg_rtx, save_area_offset + reg_size * i,
24588 (sel & SAVRES_SAVE) != 0);
24589
24590 if ((sel & SAVRES_SAVE) && (sel & SAVRES_LR))
24591 RTVEC_ELT (p, i + offset)
24592 = gen_frame_store (gen_rtx_REG (Pmode, 0), frame_reg_rtx, lr_offset);
24593
24594 par = gen_rtx_PARALLEL (VOIDmode, p);
24595
24596 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
24597 {
24598 insn = emit_jump_insn (par);
24599 JUMP_LABEL (insn) = ret_rtx;
24600 }
24601 else
24602 insn = emit_insn (par);
24603 return insn;
24604 }
24605
24606 /* Emit code to store CR fields that need to be saved into REG. */
24607
24608 static void
24609 rs6000_emit_move_from_cr (rtx reg)
24610 {
24611 /* Only the ELFv2 ABI allows storing only selected fields. */
24612 if (DEFAULT_ABI == ABI_ELFv2 && TARGET_MFCRF)
24613 {
24614 int i, cr_reg[8], count = 0;
24615
24616 /* Collect CR fields that must be saved. */
24617 for (i = 0; i < 8; i++)
24618 if (save_reg_p (CR0_REGNO + i))
24619 cr_reg[count++] = i;
24620
24621 /* If it's just a single one, use mfcrf. */
24622 if (count == 1)
24623 {
24624 rtvec p = rtvec_alloc (1);
24625 rtvec r = rtvec_alloc (2);
24626 RTVEC_ELT (r, 0) = gen_rtx_REG (CCmode, CR0_REGNO + cr_reg[0]);
24627 RTVEC_ELT (r, 1) = GEN_INT (1 << (7 - cr_reg[0]));
24628 RTVEC_ELT (p, 0)
24629 = gen_rtx_SET (reg,
24630 gen_rtx_UNSPEC (SImode, r, UNSPEC_MOVESI_FROM_CR));
24631
24632 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
24633 return;
24634 }
24635
24636 /* ??? It might be better to handle count == 2 / 3 cases here
24637 as well, using logical operations to combine the values. */
24638 }
24639
24640 emit_insn (gen_movesi_from_cr (reg));
24641 }
24642
24643 /* Return whether the split-stack arg pointer (r12) is used. */
24644
24645 static bool
24646 split_stack_arg_pointer_used_p (void)
24647 {
24648 /* If the pseudo holding the arg pointer is no longer a pseudo,
24649 then the arg pointer is used. */
24650 if (cfun->machine->split_stack_arg_pointer != NULL_RTX
24651 && (!REG_P (cfun->machine->split_stack_arg_pointer)
24652 || (REGNO (cfun->machine->split_stack_arg_pointer)
24653 < FIRST_PSEUDO_REGISTER)))
24654 return true;
24655
24656 /* Unfortunately we also need to do some code scanning, since
24657 r12 may have been substituted for the pseudo. */
24658 rtx_insn *insn;
24659 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
24660 FOR_BB_INSNS (bb, insn)
24661 if (NONDEBUG_INSN_P (insn))
24662 {
24663 /* A call destroys r12. */
24664 if (CALL_P (insn))
24665 return false;
24666
24667 df_ref use;
24668 FOR_EACH_INSN_USE (use, insn)
24669 {
24670 rtx x = DF_REF_REG (use);
24671 if (REG_P (x) && REGNO (x) == 12)
24672 return true;
24673 }
24674 df_ref def;
24675 FOR_EACH_INSN_DEF (def, insn)
24676 {
24677 rtx x = DF_REF_REG (def);
24678 if (REG_P (x) && REGNO (x) == 12)
24679 return false;
24680 }
24681 }
24682 return bitmap_bit_p (DF_LR_OUT (bb), 12);
24683 }
24684
24685 /* Emit function prologue as insns. */
24686
24687 void
24688 rs6000_emit_prologue (void)
24689 {
24690 rs6000_stack_t *info = rs6000_stack_info ();
24691 machine_mode reg_mode = Pmode;
24692 int reg_size = TARGET_32BIT ? 4 : 8;
24693 rtx sp_reg_rtx = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
24694 rtx frame_reg_rtx = sp_reg_rtx;
24695 unsigned int cr_save_regno;
24696 rtx cr_save_rtx = NULL_RTX;
24697 rtx insn;
24698 int strategy;
24699 int using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
24700 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
24701 && call_used_regs[STATIC_CHAIN_REGNUM]);
24702 int using_split_stack = (flag_split_stack
24703 && (lookup_attribute ("no_split_stack",
24704 DECL_ATTRIBUTES (cfun->decl))
24705 == NULL));
24706
24707 /* Offset to top of frame for frame_reg and sp respectively. */
24708 HOST_WIDE_INT frame_off = 0;
24709 HOST_WIDE_INT sp_off = 0;
24710 /* sp_adjust is the stack adjusting instruction, tracked so that the
24711 insn setting up the split-stack arg pointer can be emitted just
24712 prior to it, when r12 is not used here for other purposes. */
24713 rtx_insn *sp_adjust = 0;
24714
24715 #if CHECKING_P
24716 /* Track and check usage of r0, r11, r12. */
24717 int reg_inuse = using_static_chain_p ? 1 << 11 : 0;
24718 #define START_USE(R) do \
24719 { \
24720 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
24721 reg_inuse |= 1 << (R); \
24722 } while (0)
24723 #define END_USE(R) do \
24724 { \
24725 gcc_assert ((reg_inuse & (1 << (R))) != 0); \
24726 reg_inuse &= ~(1 << (R)); \
24727 } while (0)
24728 #define NOT_INUSE(R) do \
24729 { \
24730 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
24731 } while (0)
24732 #else
24733 #define START_USE(R) do {} while (0)
24734 #define END_USE(R) do {} while (0)
24735 #define NOT_INUSE(R) do {} while (0)
24736 #endif
24737
24738 if (DEFAULT_ABI == ABI_ELFv2
24739 && !TARGET_SINGLE_PIC_BASE)
24740 {
24741 cfun->machine->r2_setup_needed = df_regs_ever_live_p (TOC_REGNUM);
24742
24743 /* With -mminimal-toc we may generate an extra use of r2 below. */
24744 if (TARGET_TOC && TARGET_MINIMAL_TOC && get_pool_size () != 0)
24745 cfun->machine->r2_setup_needed = true;
24746 }
24747
24748
24749 if (flag_stack_usage_info)
24750 current_function_static_stack_size = info->total_size;
24751
24752 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
24753 {
24754 HOST_WIDE_INT size = info->total_size;
24755
24756 if (crtl->is_leaf && !cfun->calls_alloca)
24757 {
24758 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
24759 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT,
24760 size - STACK_CHECK_PROTECT);
24761 }
24762 else if (size > 0)
24763 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
24764 }
24765
24766 if (TARGET_FIX_AND_CONTINUE)
24767 {
24768 /* gdb on darwin arranges to forward a function from the old
24769 address by modifying the first 5 instructions of the function
24770 to branch to the overriding function. This is necessary to
24771 permit function pointers that point to the old function to
24772 actually forward to the new function. */
24773 emit_insn (gen_nop ());
24774 emit_insn (gen_nop ());
24775 emit_insn (gen_nop ());
24776 emit_insn (gen_nop ());
24777 emit_insn (gen_nop ());
24778 }
24779
24780 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
24781 {
24782 reg_mode = V2SImode;
24783 reg_size = 8;
24784 }
24785
24786 /* Handle world saves specially here. */
24787 if (WORLD_SAVE_P (info))
24788 {
24789 int i, j, sz;
24790 rtx treg;
24791 rtvec p;
24792 rtx reg0;
24793
24794 /* save_world expects lr in r0. */
24795 reg0 = gen_rtx_REG (Pmode, 0);
24796 if (info->lr_save_p)
24797 {
24798 insn = emit_move_insn (reg0,
24799 gen_rtx_REG (Pmode, LR_REGNO));
24800 RTX_FRAME_RELATED_P (insn) = 1;
24801 }
24802
24803 /* The SAVE_WORLD and RESTORE_WORLD routines make a number of
24804 assumptions about the offsets of various bits of the stack
24805 frame. */
24806 gcc_assert (info->gp_save_offset == -220
24807 && info->fp_save_offset == -144
24808 && info->lr_save_offset == 8
24809 && info->cr_save_offset == 4
24810 && info->push_p
24811 && info->lr_save_p
24812 && (!crtl->calls_eh_return
24813 || info->ehrd_offset == -432)
24814 && info->vrsave_save_offset == -224
24815 && info->altivec_save_offset == -416);
24816
24817 treg = gen_rtx_REG (SImode, 11);
24818 emit_move_insn (treg, GEN_INT (-info->total_size));
24819
24820 /* SAVE_WORLD takes the caller's LR in R0 and the frame size
24821 in R11. It also clobbers R12, so beware! */
24822
24823 /* Preserve CR2 for save_world prologues */
24824 sz = 5;
24825 sz += 32 - info->first_gp_reg_save;
24826 sz += 64 - info->first_fp_reg_save;
24827 sz += LAST_ALTIVEC_REGNO - info->first_altivec_reg_save + 1;
24828 p = rtvec_alloc (sz);
24829 j = 0;
24830 RTVEC_ELT (p, j++) = gen_rtx_CLOBBER (VOIDmode,
24831 gen_rtx_REG (SImode,
24832 LR_REGNO));
24833 RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
24834 gen_rtx_SYMBOL_REF (Pmode,
24835 "*save_world"));
24836 /* We do floats first so that the instruction pattern matches
24837 properly. */
24838 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
24839 RTVEC_ELT (p, j++)
24840 = gen_frame_store (gen_rtx_REG (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
24841 ? DFmode : SFmode,
24842 info->first_fp_reg_save + i),
24843 frame_reg_rtx,
24844 info->fp_save_offset + frame_off + 8 * i);
24845 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
24846 RTVEC_ELT (p, j++)
24847 = gen_frame_store (gen_rtx_REG (V4SImode,
24848 info->first_altivec_reg_save + i),
24849 frame_reg_rtx,
24850 info->altivec_save_offset + frame_off + 16 * i);
24851 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
24852 RTVEC_ELT (p, j++)
24853 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
24854 frame_reg_rtx,
24855 info->gp_save_offset + frame_off + reg_size * i);
24856
24857 /* CR register traditionally saved as CR2. */
24858 RTVEC_ELT (p, j++)
24859 = gen_frame_store (gen_rtx_REG (SImode, CR2_REGNO),
24860 frame_reg_rtx, info->cr_save_offset + frame_off);
24861 /* Explain about use of R0. */
24862 if (info->lr_save_p)
24863 RTVEC_ELT (p, j++)
24864 = gen_frame_store (reg0,
24865 frame_reg_rtx, info->lr_save_offset + frame_off);
24866 /* Explain what happens to the stack pointer. */
24867 {
24868 rtx newval = gen_rtx_PLUS (Pmode, sp_reg_rtx, treg);
24869 RTVEC_ELT (p, j++) = gen_rtx_SET (sp_reg_rtx, newval);
24870 }
24871
24872 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
24873 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
24874 treg, GEN_INT (-info->total_size));
24875 sp_off = frame_off = info->total_size;
24876 }
24877
24878 strategy = info->savres_strategy;
24879
24880 /* For V.4, update stack before we do any saving and set back pointer. */
24881 if (! WORLD_SAVE_P (info)
24882 && info->push_p
24883 && (DEFAULT_ABI == ABI_V4
24884 || crtl->calls_eh_return))
24885 {
24886 bool need_r11 = (TARGET_SPE
24887 ? (!(strategy & SAVE_INLINE_GPRS)
24888 && info->spe_64bit_regs_used == 0)
24889 : (!(strategy & SAVE_INLINE_FPRS)
24890 || !(strategy & SAVE_INLINE_GPRS)
24891 || !(strategy & SAVE_INLINE_VRS)));
24892 int ptr_regno = -1;
24893 rtx ptr_reg = NULL_RTX;
24894 int ptr_off = 0;
24895
24896 if (info->total_size < 32767)
24897 frame_off = info->total_size;
24898 else if (need_r11)
24899 ptr_regno = 11;
24900 else if (info->cr_save_p
24901 || info->lr_save_p
24902 || info->first_fp_reg_save < 64
24903 || info->first_gp_reg_save < 32
24904 || info->altivec_size != 0
24905 || info->vrsave_size != 0
24906 || crtl->calls_eh_return)
24907 ptr_regno = 12;
24908 else
24909 {
24910 /* The prologue won't be saving any regs so there is no need
24911 to set up a frame register to access any frame save area.
24912 We also won't be using frame_off anywhere below, but set
24913 the correct value anyway to protect against future
24914 changes to this function. */
24915 frame_off = info->total_size;
24916 }
24917 if (ptr_regno != -1)
24918 {
24919 /* Set up the frame offset to that needed by the first
24920 out-of-line save function. */
24921 START_USE (ptr_regno);
24922 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
24923 frame_reg_rtx = ptr_reg;
24924 if (!(strategy & SAVE_INLINE_FPRS) && info->fp_size != 0)
24925 gcc_checking_assert (info->fp_save_offset + info->fp_size == 0);
24926 else if (!(strategy & SAVE_INLINE_GPRS) && info->first_gp_reg_save < 32)
24927 ptr_off = info->gp_save_offset + info->gp_size;
24928 else if (!(strategy & SAVE_INLINE_VRS) && info->altivec_size != 0)
24929 ptr_off = info->altivec_save_offset + info->altivec_size;
24930 frame_off = -ptr_off;
24931 }
24932 sp_adjust = rs6000_emit_allocate_stack (info->total_size,
24933 ptr_reg, ptr_off);
24934 if (REGNO (frame_reg_rtx) == 12)
24935 sp_adjust = 0;
24936 sp_off = info->total_size;
24937 if (frame_reg_rtx != sp_reg_rtx)
24938 rs6000_emit_stack_tie (frame_reg_rtx, false);
24939 }
24940
24941 /* If we use the link register, get it into r0. */
24942 if (!WORLD_SAVE_P (info) && info->lr_save_p)
24943 {
24944 rtx addr, reg, mem;
24945
24946 reg = gen_rtx_REG (Pmode, 0);
24947 START_USE (0);
24948 insn = emit_move_insn (reg, gen_rtx_REG (Pmode, LR_REGNO));
24949 RTX_FRAME_RELATED_P (insn) = 1;
24950
24951 if (!(strategy & (SAVE_NOINLINE_GPRS_SAVES_LR
24952 | SAVE_NOINLINE_FPRS_SAVES_LR)))
24953 {
24954 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
24955 GEN_INT (info->lr_save_offset + frame_off));
24956 mem = gen_rtx_MEM (Pmode, addr);
24957 /* This should not be of rs6000_sr_alias_set, because of
24958 __builtin_return_address. */
24959
24960 insn = emit_move_insn (mem, reg);
24961 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
24962 NULL_RTX, NULL_RTX);
24963 END_USE (0);
24964 }
24965 }
24966
24967 /* If we need to save CR, put it into r12 or r11. Choose r12 except when
24968 r12 will be needed by out-of-line gpr restore. */
24969 cr_save_regno = ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
24970 && !(strategy & (SAVE_INLINE_GPRS
24971 | SAVE_NOINLINE_GPRS_SAVES_LR))
24972 ? 11 : 12);
24973 if (!WORLD_SAVE_P (info)
24974 && info->cr_save_p
24975 && REGNO (frame_reg_rtx) != cr_save_regno
24976 && !(using_static_chain_p && cr_save_regno == 11)
24977 && !(using_split_stack && cr_save_regno == 12 && sp_adjust))
24978 {
24979 cr_save_rtx = gen_rtx_REG (SImode, cr_save_regno);
24980 START_USE (cr_save_regno);
24981 rs6000_emit_move_from_cr (cr_save_rtx);
24982 }
24983
24984 /* Do any required saving of fpr's. If only one or two to save, do
24985 it ourselves. Otherwise, call function. */
24986 if (!WORLD_SAVE_P (info) && (strategy & SAVE_INLINE_FPRS))
24987 {
24988 int i;
24989 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
24990 if (save_reg_p (info->first_fp_reg_save + i))
24991 emit_frame_save (frame_reg_rtx,
24992 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
24993 ? DFmode : SFmode),
24994 info->first_fp_reg_save + i,
24995 info->fp_save_offset + frame_off + 8 * i,
24996 sp_off - frame_off);
24997 }
24998 else if (!WORLD_SAVE_P (info) && info->first_fp_reg_save != 64)
24999 {
25000 bool lr = (strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
25001 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
25002 unsigned ptr_regno = ptr_regno_for_savres (sel);
25003 rtx ptr_reg = frame_reg_rtx;
25004
25005 if (REGNO (frame_reg_rtx) == ptr_regno)
25006 gcc_checking_assert (frame_off == 0);
25007 else
25008 {
25009 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
25010 NOT_INUSE (ptr_regno);
25011 emit_insn (gen_add3_insn (ptr_reg,
25012 frame_reg_rtx, GEN_INT (frame_off)));
25013 }
25014 insn = rs6000_emit_savres_rtx (info, ptr_reg,
25015 info->fp_save_offset,
25016 info->lr_save_offset,
25017 DFmode, sel);
25018 rs6000_frame_related (insn, ptr_reg, sp_off,
25019 NULL_RTX, NULL_RTX);
25020 if (lr)
25021 END_USE (0);
25022 }
25023
25024 /* Save GPRs. This is done as a PARALLEL if we are using
25025 the store-multiple instructions. */
25026 if (!WORLD_SAVE_P (info)
25027 && TARGET_SPE_ABI
25028 && info->spe_64bit_regs_used != 0
25029 && info->first_gp_reg_save != 32)
25030 {
25031 int i;
25032 rtx spe_save_area_ptr;
25033 HOST_WIDE_INT save_off;
25034 int ool_adjust = 0;
25035
25036 /* Determine whether we can address all of the registers that need
25037 to be saved with an offset from frame_reg_rtx that fits in
25038 the small const field for SPE memory instructions. */
25039 int spe_regs_addressable
25040 = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
25041 + reg_size * (32 - info->first_gp_reg_save - 1))
25042 && (strategy & SAVE_INLINE_GPRS));
25043
25044 if (spe_regs_addressable)
25045 {
25046 spe_save_area_ptr = frame_reg_rtx;
25047 save_off = frame_off;
25048 }
25049 else
25050 {
25051 /* Make r11 point to the start of the SPE save area. We need
25052 to be careful here if r11 is holding the static chain. If
25053 it is, then temporarily save it in r0. */
25054 HOST_WIDE_INT offset;
25055
25056 if (!(strategy & SAVE_INLINE_GPRS))
25057 ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
25058 offset = info->spe_gp_save_offset + frame_off - ool_adjust;
25059 spe_save_area_ptr = gen_rtx_REG (Pmode, 11);
25060 save_off = frame_off - offset;
25061
25062 if (using_static_chain_p)
25063 {
25064 rtx r0 = gen_rtx_REG (Pmode, 0);
25065
25066 START_USE (0);
25067 gcc_assert (info->first_gp_reg_save > 11);
25068
25069 emit_move_insn (r0, spe_save_area_ptr);
25070 }
25071 else if (REGNO (frame_reg_rtx) != 11)
25072 START_USE (11);
25073
25074 emit_insn (gen_addsi3 (spe_save_area_ptr,
25075 frame_reg_rtx, GEN_INT (offset)));
25076 if (!using_static_chain_p && REGNO (frame_reg_rtx) == 11)
25077 frame_off = -info->spe_gp_save_offset + ool_adjust;
25078 }
25079
25080 if ((strategy & SAVE_INLINE_GPRS))
25081 {
25082 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
25083 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
25084 emit_frame_save (spe_save_area_ptr, reg_mode,
25085 info->first_gp_reg_save + i,
25086 (info->spe_gp_save_offset + save_off
25087 + reg_size * i),
25088 sp_off - save_off);
25089 }
25090 else
25091 {
25092 insn = rs6000_emit_savres_rtx (info, spe_save_area_ptr,
25093 info->spe_gp_save_offset + save_off,
25094 0, reg_mode,
25095 SAVRES_SAVE | SAVRES_GPR);
25096
25097 rs6000_frame_related (insn, spe_save_area_ptr, sp_off - save_off,
25098 NULL_RTX, NULL_RTX);
25099 }
25100
25101 /* Move the static chain pointer back. */
25102 if (!spe_regs_addressable)
25103 {
25104 if (using_static_chain_p)
25105 {
25106 emit_move_insn (spe_save_area_ptr, gen_rtx_REG (Pmode, 0));
25107 END_USE (0);
25108 }
25109 else if (REGNO (frame_reg_rtx) != 11)
25110 END_USE (11);
25111 }
25112 }
25113 else if (!WORLD_SAVE_P (info) && !(strategy & SAVE_INLINE_GPRS))
25114 {
25115 bool lr = (strategy & SAVE_NOINLINE_GPRS_SAVES_LR) != 0;
25116 int sel = SAVRES_SAVE | SAVRES_GPR | (lr ? SAVRES_LR : 0);
25117 unsigned ptr_regno = ptr_regno_for_savres (sel);
25118 rtx ptr_reg = frame_reg_rtx;
25119 bool ptr_set_up = REGNO (ptr_reg) == ptr_regno;
25120 int end_save = info->gp_save_offset + info->gp_size;
25121 int ptr_off;
25122
25123 if (ptr_regno == 12)
25124 sp_adjust = 0;
25125 if (!ptr_set_up)
25126 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
25127
25128 /* Need to adjust r11 (r12) if we saved any FPRs. */
25129 if (end_save + frame_off != 0)
25130 {
25131 rtx offset = GEN_INT (end_save + frame_off);
25132
25133 if (ptr_set_up)
25134 frame_off = -end_save;
25135 else
25136 NOT_INUSE (ptr_regno);
25137 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
25138 }
25139 else if (!ptr_set_up)
25140 {
25141 NOT_INUSE (ptr_regno);
25142 emit_move_insn (ptr_reg, frame_reg_rtx);
25143 }
25144 ptr_off = -end_save;
25145 insn = rs6000_emit_savres_rtx (info, ptr_reg,
25146 info->gp_save_offset + ptr_off,
25147 info->lr_save_offset + ptr_off,
25148 reg_mode, sel);
25149 rs6000_frame_related (insn, ptr_reg, sp_off - ptr_off,
25150 NULL_RTX, NULL_RTX);
25151 if (lr)
25152 END_USE (0);
25153 }
25154 else if (!WORLD_SAVE_P (info) && (strategy & SAVRES_MULTIPLE))
25155 {
25156 rtvec p;
25157 int i;
25158 p = rtvec_alloc (32 - info->first_gp_reg_save);
25159 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
25160 RTVEC_ELT (p, i)
25161 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
25162 frame_reg_rtx,
25163 info->gp_save_offset + frame_off + reg_size * i);
25164 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
25165 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
25166 NULL_RTX, NULL_RTX);
25167 }
25168 else if (!WORLD_SAVE_P (info))
25169 {
25170 int i;
25171 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
25172 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
25173 emit_frame_save (frame_reg_rtx, reg_mode,
25174 info->first_gp_reg_save + i,
25175 info->gp_save_offset + frame_off + reg_size * i,
25176 sp_off - frame_off);
25177 }
25178
25179 if (crtl->calls_eh_return)
25180 {
25181 unsigned int i;
25182 rtvec p;
25183
25184 for (i = 0; ; ++i)
25185 {
25186 unsigned int regno = EH_RETURN_DATA_REGNO (i);
25187 if (regno == INVALID_REGNUM)
25188 break;
25189 }
25190
25191 p = rtvec_alloc (i);
25192
25193 for (i = 0; ; ++i)
25194 {
25195 unsigned int regno = EH_RETURN_DATA_REGNO (i);
25196 if (regno == INVALID_REGNUM)
25197 break;
25198
25199 insn
25200 = gen_frame_store (gen_rtx_REG (reg_mode, regno),
25201 sp_reg_rtx,
25202 info->ehrd_offset + sp_off + reg_size * (int) i);
25203 RTVEC_ELT (p, i) = insn;
25204 RTX_FRAME_RELATED_P (insn) = 1;
25205 }
25206
25207 insn = emit_insn (gen_blockage ());
25208 RTX_FRAME_RELATED_P (insn) = 1;
25209 add_reg_note (insn, REG_FRAME_RELATED_EXPR, gen_rtx_PARALLEL (VOIDmode, p));
25210 }
25211
25212 /* In AIX ABI we need to make sure r2 is really saved. */
25213 if (TARGET_AIX && crtl->calls_eh_return)
25214 {
25215 rtx tmp_reg, tmp_reg_si, hi, lo, compare_result, toc_save_done, jump;
25216 rtx save_insn, join_insn, note;
25217 long toc_restore_insn;
25218
25219 tmp_reg = gen_rtx_REG (Pmode, 11);
25220 tmp_reg_si = gen_rtx_REG (SImode, 11);
25221 if (using_static_chain_p)
25222 {
25223 START_USE (0);
25224 emit_move_insn (gen_rtx_REG (Pmode, 0), tmp_reg);
25225 }
25226 else
25227 START_USE (11);
25228 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, LR_REGNO));
25229 /* Peek at instruction to which this function returns. If it's
25230 restoring r2, then we know we've already saved r2. We can't
25231 unconditionally save r2 because the value we have will already
25232 be updated if we arrived at this function via a plt call or
25233 toc adjusting stub. */
25234 emit_move_insn (tmp_reg_si, gen_rtx_MEM (SImode, tmp_reg));
25235 toc_restore_insn = ((TARGET_32BIT ? 0x80410000 : 0xE8410000)
25236 + RS6000_TOC_SAVE_SLOT);
25237 hi = gen_int_mode (toc_restore_insn & ~0xffff, SImode);
25238 emit_insn (gen_xorsi3 (tmp_reg_si, tmp_reg_si, hi));
25239 compare_result = gen_rtx_REG (CCUNSmode, CR0_REGNO);
25240 validate_condition_mode (EQ, CCUNSmode);
25241 lo = gen_int_mode (toc_restore_insn & 0xffff, SImode);
25242 emit_insn (gen_rtx_SET (compare_result,
25243 gen_rtx_COMPARE (CCUNSmode, tmp_reg_si, lo)));
25244 toc_save_done = gen_label_rtx ();
25245 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
25246 gen_rtx_EQ (VOIDmode, compare_result,
25247 const0_rtx),
25248 gen_rtx_LABEL_REF (VOIDmode, toc_save_done),
25249 pc_rtx);
25250 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
25251 JUMP_LABEL (jump) = toc_save_done;
25252 LABEL_NUSES (toc_save_done) += 1;
25253
25254 save_insn = emit_frame_save (frame_reg_rtx, reg_mode,
25255 TOC_REGNUM, frame_off + RS6000_TOC_SAVE_SLOT,
25256 sp_off - frame_off);
25257
25258 emit_label (toc_save_done);
25259
25260 /* ??? If we leave SAVE_INSN as marked as saving R2, then we'll
25261 have a CFG that has different saves along different paths.
25262 Move the note to a dummy blockage insn, which describes that
25263 R2 is unconditionally saved after the label. */
25264 /* ??? An alternate representation might be a special insn pattern
25265 containing both the branch and the store. That might let the
25266 code that minimizes the number of DW_CFA_advance opcodes better
25267 freedom in placing the annotations. */
25268 note = find_reg_note (save_insn, REG_FRAME_RELATED_EXPR, NULL);
25269 if (note)
25270 remove_note (save_insn, note);
25271 else
25272 note = alloc_reg_note (REG_FRAME_RELATED_EXPR,
25273 copy_rtx (PATTERN (save_insn)), NULL_RTX);
25274 RTX_FRAME_RELATED_P (save_insn) = 0;
25275
25276 join_insn = emit_insn (gen_blockage ());
25277 REG_NOTES (join_insn) = note;
25278 RTX_FRAME_RELATED_P (join_insn) = 1;
25279
25280 if (using_static_chain_p)
25281 {
25282 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, 0));
25283 END_USE (0);
25284 }
25285 else
25286 END_USE (11);
25287 }
25288
25289 /* Save CR if we use any that must be preserved. */
25290 if (!WORLD_SAVE_P (info) && info->cr_save_p)
25291 {
25292 rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
25293 GEN_INT (info->cr_save_offset + frame_off));
25294 rtx mem = gen_frame_mem (SImode, addr);
25295
25296 /* If we didn't copy cr before, do so now using r0. */
25297 if (cr_save_rtx == NULL_RTX)
25298 {
25299 START_USE (0);
25300 cr_save_rtx = gen_rtx_REG (SImode, 0);
25301 rs6000_emit_move_from_cr (cr_save_rtx);
25302 }
25303
25304 /* Saving CR requires a two-instruction sequence: one instruction
25305 to move the CR to a general-purpose register, and a second
25306 instruction that stores the GPR to memory.
25307
25308 We do not emit any DWARF CFI records for the first of these,
25309 because we cannot properly represent the fact that CR is saved in
25310 a register. One reason is that we cannot express that multiple
25311 CR fields are saved; another reason is that on 64-bit, the size
25312 of the CR register in DWARF (4 bytes) differs from the size of
25313 a general-purpose register.
25314
25315 This means if any intervening instruction were to clobber one of
25316 the call-saved CR fields, we'd have incorrect CFI. To prevent
25317 this from happening, we mark the store to memory as a use of
25318 those CR fields, which prevents any such instruction from being
25319 scheduled in between the two instructions. */
25320 rtx crsave_v[9];
25321 int n_crsave = 0;
25322 int i;
25323
25324 crsave_v[n_crsave++] = gen_rtx_SET (mem, cr_save_rtx);
25325 for (i = 0; i < 8; i++)
25326 if (save_reg_p (CR0_REGNO + i))
25327 crsave_v[n_crsave++]
25328 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
25329
25330 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode,
25331 gen_rtvec_v (n_crsave, crsave_v)));
25332 END_USE (REGNO (cr_save_rtx));
25333
25334 /* Now, there's no way that dwarf2out_frame_debug_expr is going to
25335 understand '(unspec:SI [(reg:CC 68) ...] UNSPEC_MOVESI_FROM_CR)',
25336 so we need to construct a frame expression manually. */
25337 RTX_FRAME_RELATED_P (insn) = 1;
25338
25339 /* Update address to be stack-pointer relative, like
25340 rs6000_frame_related would do. */
25341 addr = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
25342 GEN_INT (info->cr_save_offset + sp_off));
25343 mem = gen_frame_mem (SImode, addr);
25344
25345 if (DEFAULT_ABI == ABI_ELFv2)
25346 {
25347 /* In the ELFv2 ABI we generate separate CFI records for each
25348 CR field that was actually saved. They all point to the
25349 same 32-bit stack slot. */
25350 rtx crframe[8];
25351 int n_crframe = 0;
25352
25353 for (i = 0; i < 8; i++)
25354 if (save_reg_p (CR0_REGNO + i))
25355 {
25356 crframe[n_crframe]
25357 = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR0_REGNO + i));
25358
25359 RTX_FRAME_RELATED_P (crframe[n_crframe]) = 1;
25360 n_crframe++;
25361 }
25362
25363 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
25364 gen_rtx_PARALLEL (VOIDmode,
25365 gen_rtvec_v (n_crframe, crframe)));
25366 }
25367 else
25368 {
25369 /* In other ABIs, by convention, we use a single CR regnum to
25370 represent the fact that all call-saved CR fields are saved.
25371 We use CR2_REGNO to be compatible with gcc-2.95 on Linux. */
25372 rtx set = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR2_REGNO));
25373 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
25374 }
25375 }
25376
25377 /* In the ELFv2 ABI we need to save all call-saved CR fields into
25378 *separate* slots if the routine calls __builtin_eh_return, so
25379 that they can be independently restored by the unwinder. */
25380 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
25381 {
25382 int i, cr_off = info->ehcr_offset;
25383 rtx crsave;
25384
25385 /* ??? We might get better performance by using multiple mfocrf
25386 instructions. */
25387 crsave = gen_rtx_REG (SImode, 0);
25388 emit_insn (gen_movesi_from_cr (crsave));
25389
25390 for (i = 0; i < 8; i++)
25391 if (!call_used_regs[CR0_REGNO + i])
25392 {
25393 rtvec p = rtvec_alloc (2);
25394 RTVEC_ELT (p, 0)
25395 = gen_frame_store (crsave, frame_reg_rtx, cr_off + frame_off);
25396 RTVEC_ELT (p, 1)
25397 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
25398
25399 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
25400
25401 RTX_FRAME_RELATED_P (insn) = 1;
25402 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
25403 gen_frame_store (gen_rtx_REG (SImode, CR0_REGNO + i),
25404 sp_reg_rtx, cr_off + sp_off));
25405
25406 cr_off += reg_size;
25407 }
25408 }
25409
25410 /* Update stack and set back pointer unless this is V.4,
25411 for which it was done previously. */
25412 if (!WORLD_SAVE_P (info) && info->push_p
25413 && !(DEFAULT_ABI == ABI_V4 || crtl->calls_eh_return))
25414 {
25415 rtx ptr_reg = NULL;
25416 int ptr_off = 0;
25417
25418 /* If saving altivec regs we need to be able to address all save
25419 locations using a 16-bit offset. */
25420 if ((strategy & SAVE_INLINE_VRS) == 0
25421 || (info->altivec_size != 0
25422 && (info->altivec_save_offset + info->altivec_size - 16
25423 + info->total_size - frame_off) > 32767)
25424 || (info->vrsave_size != 0
25425 && (info->vrsave_save_offset
25426 + info->total_size - frame_off) > 32767))
25427 {
25428 int sel = SAVRES_SAVE | SAVRES_VR;
25429 unsigned ptr_regno = ptr_regno_for_savres (sel);
25430
25431 if (using_static_chain_p
25432 && ptr_regno == STATIC_CHAIN_REGNUM)
25433 ptr_regno = 12;
25434 if (REGNO (frame_reg_rtx) != ptr_regno)
25435 START_USE (ptr_regno);
25436 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
25437 frame_reg_rtx = ptr_reg;
25438 ptr_off = info->altivec_save_offset + info->altivec_size;
25439 frame_off = -ptr_off;
25440 }
25441 else if (REGNO (frame_reg_rtx) == 1)
25442 frame_off = info->total_size;
25443 sp_adjust = rs6000_emit_allocate_stack (info->total_size,
25444 ptr_reg, ptr_off);
25445 if (REGNO (frame_reg_rtx) == 12)
25446 sp_adjust = 0;
25447 sp_off = info->total_size;
25448 if (frame_reg_rtx != sp_reg_rtx)
25449 rs6000_emit_stack_tie (frame_reg_rtx, false);
25450 }
25451
25452 /* Set frame pointer, if needed. */
25453 if (frame_pointer_needed)
25454 {
25455 insn = emit_move_insn (gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM),
25456 sp_reg_rtx);
25457 RTX_FRAME_RELATED_P (insn) = 1;
25458 }
25459
25460 /* Save AltiVec registers if needed. Save here because the red zone does
25461 not always include AltiVec registers. */
25462 if (!WORLD_SAVE_P (info)
25463 && info->altivec_size != 0 && (strategy & SAVE_INLINE_VRS) == 0)
25464 {
25465 int end_save = info->altivec_save_offset + info->altivec_size;
25466 int ptr_off;
25467 /* Oddly, the vector save/restore functions point r0 at the end
25468 of the save area, then use r11 or r12 to load offsets for
25469 [reg+reg] addressing. */
25470 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
25471 int scratch_regno = ptr_regno_for_savres (SAVRES_SAVE | SAVRES_VR);
25472 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
25473
25474 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
25475 NOT_INUSE (0);
25476 if (scratch_regno == 12)
25477 sp_adjust = 0;
25478 if (end_save + frame_off != 0)
25479 {
25480 rtx offset = GEN_INT (end_save + frame_off);
25481
25482 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
25483 }
25484 else
25485 emit_move_insn (ptr_reg, frame_reg_rtx);
25486
25487 ptr_off = -end_save;
25488 insn = rs6000_emit_savres_rtx (info, scratch_reg,
25489 info->altivec_save_offset + ptr_off,
25490 0, V4SImode, SAVRES_SAVE | SAVRES_VR);
25491 rs6000_frame_related (insn, scratch_reg, sp_off - ptr_off,
25492 NULL_RTX, NULL_RTX);
25493 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
25494 {
25495 /* The oddity mentioned above clobbered our frame reg. */
25496 emit_move_insn (frame_reg_rtx, ptr_reg);
25497 frame_off = ptr_off;
25498 }
25499 }
25500 else if (!WORLD_SAVE_P (info)
25501 && info->altivec_size != 0)
25502 {
25503 int i;
25504
25505 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
25506 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
25507 {
25508 rtx areg, savereg, mem;
25509 int offset;
25510
25511 offset = (info->altivec_save_offset + frame_off
25512 + 16 * (i - info->first_altivec_reg_save));
25513
25514 savereg = gen_rtx_REG (V4SImode, i);
25515
25516 NOT_INUSE (0);
25517 areg = gen_rtx_REG (Pmode, 0);
25518 emit_move_insn (areg, GEN_INT (offset));
25519
25520 /* AltiVec addressing mode is [reg+reg]. */
25521 mem = gen_frame_mem (V4SImode,
25522 gen_rtx_PLUS (Pmode, frame_reg_rtx, areg));
25523
25524 /* Rather than emitting a generic move, force use of the stvx
25525 instruction, which we always want. In particular we don't
25526 want xxpermdi/stxvd2x for little endian. */
25527 insn = emit_insn (gen_altivec_stvx_v4si_internal (mem, savereg));
25528
25529 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
25530 areg, GEN_INT (offset));
25531 }
25532 }
25533
25534 /* VRSAVE is a bit vector representing which AltiVec registers
25535 are used. The OS uses this to determine which vector
25536 registers to save on a context switch. We need to save
25537 VRSAVE on the stack frame, add whatever AltiVec registers we
25538 used in this function, and do the corresponding magic in the
25539 epilogue. */
25540
25541 if (!WORLD_SAVE_P (info)
25542 && info->vrsave_size != 0)
25543 {
25544 rtx reg, vrsave;
25545 int offset;
25546 int save_regno;
25547
25548 /* Get VRSAVE onto a GPR. Note that ABI_V4 and ABI_DARWIN might
25549 be using r12 as frame_reg_rtx and r11 as the static chain
25550 pointer for nested functions. */
25551 save_regno = 12;
25552 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
25553 && !using_static_chain_p)
25554 save_regno = 11;
25555 else if (using_split_stack || REGNO (frame_reg_rtx) == 12)
25556 {
25557 save_regno = 11;
25558 if (using_static_chain_p)
25559 save_regno = 0;
25560 }
25561
25562 NOT_INUSE (save_regno);
25563 reg = gen_rtx_REG (SImode, save_regno);
25564 vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
25565 if (TARGET_MACHO)
25566 emit_insn (gen_get_vrsave_internal (reg));
25567 else
25568 emit_insn (gen_rtx_SET (reg, vrsave));
25569
25570 /* Save VRSAVE. */
25571 offset = info->vrsave_save_offset + frame_off;
25572 insn = emit_insn (gen_frame_store (reg, frame_reg_rtx, offset));
25573
25574 /* Include the registers in the mask. */
25575 emit_insn (gen_iorsi3 (reg, reg, GEN_INT ((int) info->vrsave_mask)));
25576
25577 insn = emit_insn (generate_set_vrsave (reg, info, 0));
25578 }
25579
25580 /* If we are using RS6000_PIC_OFFSET_TABLE_REGNUM, we need to set it up. */
25581 if (!TARGET_SINGLE_PIC_BASE
25582 && ((TARGET_TOC && TARGET_MINIMAL_TOC && get_pool_size () != 0)
25583 || (DEFAULT_ABI == ABI_V4
25584 && (flag_pic == 1 || (flag_pic && TARGET_SECURE_PLT))
25585 && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))))
25586 {
25587 /* If emit_load_toc_table will use the link register, we need to save
25588 it. We use R12 for this purpose because emit_load_toc_table
25589 can use register 0. This allows us to use a plain 'blr' to return
25590 from the procedure more often. */
25591 int save_LR_around_toc_setup = (TARGET_ELF
25592 && DEFAULT_ABI == ABI_V4
25593 && flag_pic
25594 && ! info->lr_save_p
25595 && EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) > 0);
25596 if (save_LR_around_toc_setup)
25597 {
25598 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
25599 rtx tmp = gen_rtx_REG (Pmode, 12);
25600
25601 sp_adjust = 0;
25602 insn = emit_move_insn (tmp, lr);
25603 RTX_FRAME_RELATED_P (insn) = 1;
25604
25605 rs6000_emit_load_toc_table (TRUE);
25606
25607 insn = emit_move_insn (lr, tmp);
25608 add_reg_note (insn, REG_CFA_RESTORE, lr);
25609 RTX_FRAME_RELATED_P (insn) = 1;
25610 }
25611 else
25612 rs6000_emit_load_toc_table (TRUE);
25613 }
25614
25615 #if TARGET_MACHO
25616 if (!TARGET_SINGLE_PIC_BASE
25617 && DEFAULT_ABI == ABI_DARWIN
25618 && flag_pic && crtl->uses_pic_offset_table)
25619 {
25620 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
25621 rtx src = gen_rtx_SYMBOL_REF (Pmode, MACHOPIC_FUNCTION_BASE_NAME);
25622
25623 /* Save and restore LR locally around this call (in R0). */
25624 if (!info->lr_save_p)
25625 emit_move_insn (gen_rtx_REG (Pmode, 0), lr);
25626
25627 emit_insn (gen_load_macho_picbase (src));
25628
25629 emit_move_insn (gen_rtx_REG (Pmode,
25630 RS6000_PIC_OFFSET_TABLE_REGNUM),
25631 lr);
25632
25633 if (!info->lr_save_p)
25634 emit_move_insn (lr, gen_rtx_REG (Pmode, 0));
25635 }
25636 #endif
25637
25638 /* If we need to, save the TOC register after doing the stack setup.
25639 Do not emit eh frame info for this save. The unwinder wants info,
25640 conceptually attached to instructions in this function, about
25641 register values in the caller of this function. This R2 may have
25642 already been changed from the value in the caller.
25643 We don't attempt to write accurate DWARF EH frame info for R2
25644 because code emitted by gcc for a (non-pointer) function call
25645 doesn't save and restore R2. Instead, R2 is managed out-of-line
25646 by a linker generated plt call stub when the function resides in
25647 a shared library. This behaviour is costly to describe in DWARF,
25648 both in terms of the size of DWARF info and the time taken in the
25649 unwinder to interpret it. R2 changes, apart from the
25650 calls_eh_return case earlier in this function, are handled by
25651 linux-unwind.h frob_update_context. */
25652 if (rs6000_save_toc_in_prologue_p ())
25653 {
25654 rtx reg = gen_rtx_REG (reg_mode, TOC_REGNUM);
25655 emit_insn (gen_frame_store (reg, sp_reg_rtx, RS6000_TOC_SAVE_SLOT));
25656 }
25657
25658 if (using_split_stack && split_stack_arg_pointer_used_p ())
25659 {
25660 /* Set up the arg pointer (r12) for -fsplit-stack code. If
25661 __morestack was called, it left the arg pointer to the old
25662 stack in r29. Otherwise, the arg pointer is the top of the
25663 current frame. */
25664 cfun->machine->split_stack_argp_used = true;
25665 if (sp_adjust)
25666 {
25667 rtx r12 = gen_rtx_REG (Pmode, 12);
25668 rtx set_r12 = gen_rtx_SET (r12, sp_reg_rtx);
25669 emit_insn_before (set_r12, sp_adjust);
25670 }
25671 else if (frame_off != 0 || REGNO (frame_reg_rtx) != 12)
25672 {
25673 rtx r12 = gen_rtx_REG (Pmode, 12);
25674 if (frame_off == 0)
25675 emit_move_insn (r12, frame_reg_rtx);
25676 else
25677 emit_insn (gen_add3_insn (r12, frame_reg_rtx, GEN_INT (frame_off)));
25678 }
25679 if (info->push_p)
25680 {
25681 rtx r12 = gen_rtx_REG (Pmode, 12);
25682 rtx r29 = gen_rtx_REG (Pmode, 29);
25683 rtx cr7 = gen_rtx_REG (CCUNSmode, CR7_REGNO);
25684 rtx not_more = gen_label_rtx ();
25685 rtx jump;
25686
25687 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
25688 gen_rtx_GEU (VOIDmode, cr7, const0_rtx),
25689 gen_rtx_LABEL_REF (VOIDmode, not_more),
25690 pc_rtx);
25691 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
25692 JUMP_LABEL (jump) = not_more;
25693 LABEL_NUSES (not_more) += 1;
25694 emit_move_insn (r12, r29);
25695 emit_label (not_more);
25696 }
25697 }
25698 }
25699
25700 /* Output .extern statements for the save/restore routines we use. */
25701
25702 static void
25703 rs6000_output_savres_externs (FILE *file)
25704 {
25705 rs6000_stack_t *info = rs6000_stack_info ();
25706
25707 if (TARGET_DEBUG_STACK)
25708 debug_stack_info (info);
25709
25710 /* Write .extern for any function we will call to save and restore
25711 fp values. */
25712 if (info->first_fp_reg_save < 64
25713 && !TARGET_MACHO
25714 && !TARGET_ELF)
25715 {
25716 char *name;
25717 int regno = info->first_fp_reg_save - 32;
25718
25719 if ((info->savres_strategy & SAVE_INLINE_FPRS) == 0)
25720 {
25721 bool lr = (info->savres_strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
25722 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
25723 name = rs6000_savres_routine_name (info, regno, sel);
25724 fprintf (file, "\t.extern %s\n", name);
25725 }
25726 if ((info->savres_strategy & REST_INLINE_FPRS) == 0)
25727 {
25728 bool lr = (info->savres_strategy
25729 & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
25730 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
25731 name = rs6000_savres_routine_name (info, regno, sel);
25732 fprintf (file, "\t.extern %s\n", name);
25733 }
25734 }
25735 }
25736
25737 /* Write function prologue. */
25738
25739 static void
25740 rs6000_output_function_prologue (FILE *file,
25741 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
25742 {
25743 if (!cfun->is_thunk)
25744 rs6000_output_savres_externs (file);
25745
25746 /* ELFv2 ABI r2 setup code and local entry point. This must follow
25747 immediately after the global entry point label. */
25748 if (DEFAULT_ABI == ABI_ELFv2 && cfun->machine->r2_setup_needed)
25749 {
25750 const char *name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25751
25752 fprintf (file, "0:\taddis 2,12,.TOC.-0b@ha\n");
25753 fprintf (file, "\taddi 2,2,.TOC.-0b@l\n");
25754
25755 fputs ("\t.localentry\t", file);
25756 assemble_name (file, name);
25757 fputs (",.-", file);
25758 assemble_name (file, name);
25759 fputs ("\n", file);
25760 }
25761
25762 /* Output -mprofile-kernel code. This needs to be done here instead of
25763 in output_function_profile since it must go after the ELFv2 ABI
25764 local entry point. */
25765 if (TARGET_PROFILE_KERNEL && crtl->profile)
25766 {
25767 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
25768 gcc_assert (!TARGET_32BIT);
25769
25770 asm_fprintf (file, "\tmflr %s\n", reg_names[0]);
25771
25772 /* In the ELFv2 ABI we have no compiler stack word. It must be
25773 the resposibility of _mcount to preserve the static chain
25774 register if required. */
25775 if (DEFAULT_ABI != ABI_ELFv2
25776 && cfun->static_chain_decl != NULL)
25777 {
25778 asm_fprintf (file, "\tstd %s,24(%s)\n",
25779 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
25780 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
25781 asm_fprintf (file, "\tld %s,24(%s)\n",
25782 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
25783 }
25784 else
25785 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
25786 }
25787
25788 rs6000_pic_labelno++;
25789 }
25790
25791 /* Non-zero if vmx regs are restored before the frame pop, zero if
25792 we restore after the pop when possible. */
25793 #define ALWAYS_RESTORE_ALTIVEC_BEFORE_POP 0
25794
25795 /* Restoring cr is a two step process: loading a reg from the frame
25796 save, then moving the reg to cr. For ABI_V4 we must let the
25797 unwinder know that the stack location is no longer valid at or
25798 before the stack deallocation, but we can't emit a cfa_restore for
25799 cr at the stack deallocation like we do for other registers.
25800 The trouble is that it is possible for the move to cr to be
25801 scheduled after the stack deallocation. So say exactly where cr
25802 is located on each of the two insns. */
25803
25804 static rtx
25805 load_cr_save (int regno, rtx frame_reg_rtx, int offset, bool exit_func)
25806 {
25807 rtx mem = gen_frame_mem_offset (SImode, frame_reg_rtx, offset);
25808 rtx reg = gen_rtx_REG (SImode, regno);
25809 rtx_insn *insn = emit_move_insn (reg, mem);
25810
25811 if (!exit_func && DEFAULT_ABI == ABI_V4)
25812 {
25813 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
25814 rtx set = gen_rtx_SET (reg, cr);
25815
25816 add_reg_note (insn, REG_CFA_REGISTER, set);
25817 RTX_FRAME_RELATED_P (insn) = 1;
25818 }
25819 return reg;
25820 }
25821
25822 /* Reload CR from REG. */
25823
25824 static void
25825 restore_saved_cr (rtx reg, int using_mfcr_multiple, bool exit_func)
25826 {
25827 int count = 0;
25828 int i;
25829
25830 if (using_mfcr_multiple)
25831 {
25832 for (i = 0; i < 8; i++)
25833 if (save_reg_p (CR0_REGNO + i))
25834 count++;
25835 gcc_assert (count);
25836 }
25837
25838 if (using_mfcr_multiple && count > 1)
25839 {
25840 rtx_insn *insn;
25841 rtvec p;
25842 int ndx;
25843
25844 p = rtvec_alloc (count);
25845
25846 ndx = 0;
25847 for (i = 0; i < 8; i++)
25848 if (save_reg_p (CR0_REGNO + i))
25849 {
25850 rtvec r = rtvec_alloc (2);
25851 RTVEC_ELT (r, 0) = reg;
25852 RTVEC_ELT (r, 1) = GEN_INT (1 << (7-i));
25853 RTVEC_ELT (p, ndx) =
25854 gen_rtx_SET (gen_rtx_REG (CCmode, CR0_REGNO + i),
25855 gen_rtx_UNSPEC (CCmode, r, UNSPEC_MOVESI_TO_CR));
25856 ndx++;
25857 }
25858 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
25859 gcc_assert (ndx == count);
25860
25861 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
25862 CR field separately. */
25863 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
25864 {
25865 for (i = 0; i < 8; i++)
25866 if (save_reg_p (CR0_REGNO + i))
25867 add_reg_note (insn, REG_CFA_RESTORE,
25868 gen_rtx_REG (SImode, CR0_REGNO + i));
25869
25870 RTX_FRAME_RELATED_P (insn) = 1;
25871 }
25872 }
25873 else
25874 for (i = 0; i < 8; i++)
25875 if (save_reg_p (CR0_REGNO + i))
25876 {
25877 rtx insn = emit_insn (gen_movsi_to_cr_one
25878 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
25879
25880 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
25881 CR field separately, attached to the insn that in fact
25882 restores this particular CR field. */
25883 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
25884 {
25885 add_reg_note (insn, REG_CFA_RESTORE,
25886 gen_rtx_REG (SImode, CR0_REGNO + i));
25887
25888 RTX_FRAME_RELATED_P (insn) = 1;
25889 }
25890 }
25891
25892 /* For other ABIs, we just generate a single CFA_RESTORE for CR2. */
25893 if (!exit_func && DEFAULT_ABI != ABI_ELFv2
25894 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
25895 {
25896 rtx_insn *insn = get_last_insn ();
25897 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
25898
25899 add_reg_note (insn, REG_CFA_RESTORE, cr);
25900 RTX_FRAME_RELATED_P (insn) = 1;
25901 }
25902 }
25903
25904 /* Like cr, the move to lr instruction can be scheduled after the
25905 stack deallocation, but unlike cr, its stack frame save is still
25906 valid. So we only need to emit the cfa_restore on the correct
25907 instruction. */
25908
25909 static void
25910 load_lr_save (int regno, rtx frame_reg_rtx, int offset)
25911 {
25912 rtx mem = gen_frame_mem_offset (Pmode, frame_reg_rtx, offset);
25913 rtx reg = gen_rtx_REG (Pmode, regno);
25914
25915 emit_move_insn (reg, mem);
25916 }
25917
25918 static void
25919 restore_saved_lr (int regno, bool exit_func)
25920 {
25921 rtx reg = gen_rtx_REG (Pmode, regno);
25922 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
25923 rtx_insn *insn = emit_move_insn (lr, reg);
25924
25925 if (!exit_func && flag_shrink_wrap)
25926 {
25927 add_reg_note (insn, REG_CFA_RESTORE, lr);
25928 RTX_FRAME_RELATED_P (insn) = 1;
25929 }
25930 }
25931
25932 static rtx
25933 add_crlr_cfa_restore (const rs6000_stack_t *info, rtx cfa_restores)
25934 {
25935 if (DEFAULT_ABI == ABI_ELFv2)
25936 {
25937 int i;
25938 for (i = 0; i < 8; i++)
25939 if (save_reg_p (CR0_REGNO + i))
25940 {
25941 rtx cr = gen_rtx_REG (SImode, CR0_REGNO + i);
25942 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, cr,
25943 cfa_restores);
25944 }
25945 }
25946 else if (info->cr_save_p)
25947 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
25948 gen_rtx_REG (SImode, CR2_REGNO),
25949 cfa_restores);
25950
25951 if (info->lr_save_p)
25952 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
25953 gen_rtx_REG (Pmode, LR_REGNO),
25954 cfa_restores);
25955 return cfa_restores;
25956 }
25957
25958 /* Return true if OFFSET from stack pointer can be clobbered by signals.
25959 V.4 doesn't have any stack cushion, AIX ABIs have 220 or 288 bytes
25960 below stack pointer not cloberred by signals. */
25961
25962 static inline bool
25963 offset_below_red_zone_p (HOST_WIDE_INT offset)
25964 {
25965 return offset < (DEFAULT_ABI == ABI_V4
25966 ? 0
25967 : TARGET_32BIT ? -220 : -288);
25968 }
25969
25970 /* Append CFA_RESTORES to any existing REG_NOTES on the last insn. */
25971
25972 static void
25973 emit_cfa_restores (rtx cfa_restores)
25974 {
25975 rtx_insn *insn = get_last_insn ();
25976 rtx *loc = &REG_NOTES (insn);
25977
25978 while (*loc)
25979 loc = &XEXP (*loc, 1);
25980 *loc = cfa_restores;
25981 RTX_FRAME_RELATED_P (insn) = 1;
25982 }
25983
25984 /* Emit function epilogue as insns. */
25985
25986 void
25987 rs6000_emit_epilogue (int sibcall)
25988 {
25989 rs6000_stack_t *info;
25990 int restoring_GPRs_inline;
25991 int restoring_FPRs_inline;
25992 int using_load_multiple;
25993 int using_mtcr_multiple;
25994 int use_backchain_to_restore_sp;
25995 int restore_lr;
25996 int strategy;
25997 HOST_WIDE_INT frame_off = 0;
25998 rtx sp_reg_rtx = gen_rtx_REG (Pmode, 1);
25999 rtx frame_reg_rtx = sp_reg_rtx;
26000 rtx cfa_restores = NULL_RTX;
26001 rtx insn;
26002 rtx cr_save_reg = NULL_RTX;
26003 machine_mode reg_mode = Pmode;
26004 int reg_size = TARGET_32BIT ? 4 : 8;
26005 int i;
26006 bool exit_func;
26007 unsigned ptr_regno;
26008
26009 info = rs6000_stack_info ();
26010
26011 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
26012 {
26013 reg_mode = V2SImode;
26014 reg_size = 8;
26015 }
26016
26017 strategy = info->savres_strategy;
26018 using_load_multiple = strategy & SAVRES_MULTIPLE;
26019 restoring_FPRs_inline = sibcall || (strategy & REST_INLINE_FPRS);
26020 restoring_GPRs_inline = sibcall || (strategy & REST_INLINE_GPRS);
26021 using_mtcr_multiple = (rs6000_cpu == PROCESSOR_PPC601
26022 || rs6000_cpu == PROCESSOR_PPC603
26023 || rs6000_cpu == PROCESSOR_PPC750
26024 || optimize_size);
26025 /* Restore via the backchain when we have a large frame, since this
26026 is more efficient than an addis, addi pair. The second condition
26027 here will not trigger at the moment; We don't actually need a
26028 frame pointer for alloca, but the generic parts of the compiler
26029 give us one anyway. */
26030 use_backchain_to_restore_sp = (info->total_size + (info->lr_save_p
26031 ? info->lr_save_offset
26032 : 0) > 32767
26033 || (cfun->calls_alloca
26034 && !frame_pointer_needed));
26035 restore_lr = (info->lr_save_p
26036 && (restoring_FPRs_inline
26037 || (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR))
26038 && (restoring_GPRs_inline
26039 || info->first_fp_reg_save < 64));
26040
26041 if (WORLD_SAVE_P (info))
26042 {
26043 int i, j;
26044 char rname[30];
26045 const char *alloc_rname;
26046 rtvec p;
26047
26048 /* eh_rest_world_r10 will return to the location saved in the LR
26049 stack slot (which is not likely to be our caller.)
26050 Input: R10 -- stack adjustment. Clobbers R0, R11, R12, R7, R8.
26051 rest_world is similar, except any R10 parameter is ignored.
26052 The exception-handling stuff that was here in 2.95 is no
26053 longer necessary. */
26054
26055 p = rtvec_alloc (9
26056 + 1
26057 + 32 - info->first_gp_reg_save
26058 + LAST_ALTIVEC_REGNO + 1 - info->first_altivec_reg_save
26059 + 63 + 1 - info->first_fp_reg_save);
26060
26061 strcpy (rname, ((crtl->calls_eh_return) ?
26062 "*eh_rest_world_r10" : "*rest_world"));
26063 alloc_rname = ggc_strdup (rname);
26064
26065 j = 0;
26066 RTVEC_ELT (p, j++) = ret_rtx;
26067 RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
26068 gen_rtx_REG (Pmode,
26069 LR_REGNO));
26070 RTVEC_ELT (p, j++)
26071 = gen_rtx_USE (VOIDmode, gen_rtx_SYMBOL_REF (Pmode, alloc_rname));
26072 /* The instruction pattern requires a clobber here;
26073 it is shared with the restVEC helper. */
26074 RTVEC_ELT (p, j++)
26075 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 11));
26076
26077 {
26078 /* CR register traditionally saved as CR2. */
26079 rtx reg = gen_rtx_REG (SImode, CR2_REGNO);
26080 RTVEC_ELT (p, j++)
26081 = gen_frame_load (reg, frame_reg_rtx, info->cr_save_offset);
26082 if (flag_shrink_wrap)
26083 {
26084 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
26085 gen_rtx_REG (Pmode, LR_REGNO),
26086 cfa_restores);
26087 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
26088 }
26089 }
26090
26091 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
26092 {
26093 rtx reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
26094 RTVEC_ELT (p, j++)
26095 = gen_frame_load (reg,
26096 frame_reg_rtx, info->gp_save_offset + reg_size * i);
26097 if (flag_shrink_wrap)
26098 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
26099 }
26100 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
26101 {
26102 rtx reg = gen_rtx_REG (V4SImode, info->first_altivec_reg_save + i);
26103 RTVEC_ELT (p, j++)
26104 = gen_frame_load (reg,
26105 frame_reg_rtx, info->altivec_save_offset + 16 * i);
26106 if (flag_shrink_wrap)
26107 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
26108 }
26109 for (i = 0; info->first_fp_reg_save + i <= 63; i++)
26110 {
26111 rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
26112 ? DFmode : SFmode),
26113 info->first_fp_reg_save + i);
26114 RTVEC_ELT (p, j++)
26115 = gen_frame_load (reg, frame_reg_rtx, info->fp_save_offset + 8 * i);
26116 if (flag_shrink_wrap)
26117 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
26118 }
26119 RTVEC_ELT (p, j++)
26120 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 0));
26121 RTVEC_ELT (p, j++)
26122 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 12));
26123 RTVEC_ELT (p, j++)
26124 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 7));
26125 RTVEC_ELT (p, j++)
26126 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 8));
26127 RTVEC_ELT (p, j++)
26128 = gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, 10));
26129 insn = emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
26130
26131 if (flag_shrink_wrap)
26132 {
26133 REG_NOTES (insn) = cfa_restores;
26134 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
26135 RTX_FRAME_RELATED_P (insn) = 1;
26136 }
26137 return;
26138 }
26139
26140 /* frame_reg_rtx + frame_off points to the top of this stack frame. */
26141 if (info->push_p)
26142 frame_off = info->total_size;
26143
26144 /* Restore AltiVec registers if we must do so before adjusting the
26145 stack. */
26146 if (info->altivec_size != 0
26147 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
26148 || (DEFAULT_ABI != ABI_V4
26149 && offset_below_red_zone_p (info->altivec_save_offset))))
26150 {
26151 int i;
26152 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
26153
26154 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
26155 if (use_backchain_to_restore_sp)
26156 {
26157 int frame_regno = 11;
26158
26159 if ((strategy & REST_INLINE_VRS) == 0)
26160 {
26161 /* Of r11 and r12, select the one not clobbered by an
26162 out-of-line restore function for the frame register. */
26163 frame_regno = 11 + 12 - scratch_regno;
26164 }
26165 frame_reg_rtx = gen_rtx_REG (Pmode, frame_regno);
26166 emit_move_insn (frame_reg_rtx,
26167 gen_rtx_MEM (Pmode, sp_reg_rtx));
26168 frame_off = 0;
26169 }
26170 else if (frame_pointer_needed)
26171 frame_reg_rtx = hard_frame_pointer_rtx;
26172
26173 if ((strategy & REST_INLINE_VRS) == 0)
26174 {
26175 int end_save = info->altivec_save_offset + info->altivec_size;
26176 int ptr_off;
26177 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
26178 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
26179
26180 if (end_save + frame_off != 0)
26181 {
26182 rtx offset = GEN_INT (end_save + frame_off);
26183
26184 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
26185 }
26186 else
26187 emit_move_insn (ptr_reg, frame_reg_rtx);
26188
26189 ptr_off = -end_save;
26190 insn = rs6000_emit_savres_rtx (info, scratch_reg,
26191 info->altivec_save_offset + ptr_off,
26192 0, V4SImode, SAVRES_VR);
26193 }
26194 else
26195 {
26196 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
26197 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
26198 {
26199 rtx addr, areg, mem, reg;
26200
26201 areg = gen_rtx_REG (Pmode, 0);
26202 emit_move_insn
26203 (areg, GEN_INT (info->altivec_save_offset
26204 + frame_off
26205 + 16 * (i - info->first_altivec_reg_save)));
26206
26207 /* AltiVec addressing mode is [reg+reg]. */
26208 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
26209 mem = gen_frame_mem (V4SImode, addr);
26210
26211 reg = gen_rtx_REG (V4SImode, i);
26212 /* Rather than emitting a generic move, force use of the
26213 lvx instruction, which we always want. In particular
26214 we don't want lxvd2x/xxpermdi for little endian. */
26215 (void) emit_insn (gen_altivec_lvx_v4si_internal (reg, mem));
26216 }
26217 }
26218
26219 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
26220 if (((strategy & REST_INLINE_VRS) == 0
26221 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
26222 && (flag_shrink_wrap
26223 || (offset_below_red_zone_p
26224 (info->altivec_save_offset
26225 + 16 * (i - info->first_altivec_reg_save)))))
26226 {
26227 rtx reg = gen_rtx_REG (V4SImode, i);
26228 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
26229 }
26230 }
26231
26232 /* Restore VRSAVE if we must do so before adjusting the stack. */
26233 if (info->vrsave_size != 0
26234 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
26235 || (DEFAULT_ABI != ABI_V4
26236 && offset_below_red_zone_p (info->vrsave_save_offset))))
26237 {
26238 rtx reg;
26239
26240 if (frame_reg_rtx == sp_reg_rtx)
26241 {
26242 if (use_backchain_to_restore_sp)
26243 {
26244 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
26245 emit_move_insn (frame_reg_rtx,
26246 gen_rtx_MEM (Pmode, sp_reg_rtx));
26247 frame_off = 0;
26248 }
26249 else if (frame_pointer_needed)
26250 frame_reg_rtx = hard_frame_pointer_rtx;
26251 }
26252
26253 reg = gen_rtx_REG (SImode, 12);
26254 emit_insn (gen_frame_load (reg, frame_reg_rtx,
26255 info->vrsave_save_offset + frame_off));
26256
26257 emit_insn (generate_set_vrsave (reg, info, 1));
26258 }
26259
26260 insn = NULL_RTX;
26261 /* If we have a large stack frame, restore the old stack pointer
26262 using the backchain. */
26263 if (use_backchain_to_restore_sp)
26264 {
26265 if (frame_reg_rtx == sp_reg_rtx)
26266 {
26267 /* Under V.4, don't reset the stack pointer until after we're done
26268 loading the saved registers. */
26269 if (DEFAULT_ABI == ABI_V4)
26270 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
26271
26272 insn = emit_move_insn (frame_reg_rtx,
26273 gen_rtx_MEM (Pmode, sp_reg_rtx));
26274 frame_off = 0;
26275 }
26276 else if (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
26277 && DEFAULT_ABI == ABI_V4)
26278 /* frame_reg_rtx has been set up by the altivec restore. */
26279 ;
26280 else
26281 {
26282 insn = emit_move_insn (sp_reg_rtx, frame_reg_rtx);
26283 frame_reg_rtx = sp_reg_rtx;
26284 }
26285 }
26286 /* If we have a frame pointer, we can restore the old stack pointer
26287 from it. */
26288 else if (frame_pointer_needed)
26289 {
26290 frame_reg_rtx = sp_reg_rtx;
26291 if (DEFAULT_ABI == ABI_V4)
26292 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
26293 /* Prevent reordering memory accesses against stack pointer restore. */
26294 else if (cfun->calls_alloca
26295 || offset_below_red_zone_p (-info->total_size))
26296 rs6000_emit_stack_tie (frame_reg_rtx, true);
26297
26298 insn = emit_insn (gen_add3_insn (frame_reg_rtx, hard_frame_pointer_rtx,
26299 GEN_INT (info->total_size)));
26300 frame_off = 0;
26301 }
26302 else if (info->push_p
26303 && DEFAULT_ABI != ABI_V4
26304 && !crtl->calls_eh_return)
26305 {
26306 /* Prevent reordering memory accesses against stack pointer restore. */
26307 if (cfun->calls_alloca
26308 || offset_below_red_zone_p (-info->total_size))
26309 rs6000_emit_stack_tie (frame_reg_rtx, false);
26310 insn = emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx,
26311 GEN_INT (info->total_size)));
26312 frame_off = 0;
26313 }
26314 if (insn && frame_reg_rtx == sp_reg_rtx)
26315 {
26316 if (cfa_restores)
26317 {
26318 REG_NOTES (insn) = cfa_restores;
26319 cfa_restores = NULL_RTX;
26320 }
26321 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
26322 RTX_FRAME_RELATED_P (insn) = 1;
26323 }
26324
26325 /* Restore AltiVec registers if we have not done so already. */
26326 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
26327 && info->altivec_size != 0
26328 && (DEFAULT_ABI == ABI_V4
26329 || !offset_below_red_zone_p (info->altivec_save_offset)))
26330 {
26331 int i;
26332
26333 if ((strategy & REST_INLINE_VRS) == 0)
26334 {
26335 int end_save = info->altivec_save_offset + info->altivec_size;
26336 int ptr_off;
26337 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
26338 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
26339 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
26340
26341 if (end_save + frame_off != 0)
26342 {
26343 rtx offset = GEN_INT (end_save + frame_off);
26344
26345 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
26346 }
26347 else
26348 emit_move_insn (ptr_reg, frame_reg_rtx);
26349
26350 ptr_off = -end_save;
26351 insn = rs6000_emit_savres_rtx (info, scratch_reg,
26352 info->altivec_save_offset + ptr_off,
26353 0, V4SImode, SAVRES_VR);
26354 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
26355 {
26356 /* Frame reg was clobbered by out-of-line save. Restore it
26357 from ptr_reg, and if we are calling out-of-line gpr or
26358 fpr restore set up the correct pointer and offset. */
26359 unsigned newptr_regno = 1;
26360 if (!restoring_GPRs_inline)
26361 {
26362 bool lr = info->gp_save_offset + info->gp_size == 0;
26363 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
26364 newptr_regno = ptr_regno_for_savres (sel);
26365 end_save = info->gp_save_offset + info->gp_size;
26366 }
26367 else if (!restoring_FPRs_inline)
26368 {
26369 bool lr = !(strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR);
26370 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
26371 newptr_regno = ptr_regno_for_savres (sel);
26372 end_save = info->fp_save_offset + info->fp_size;
26373 }
26374
26375 if (newptr_regno != 1 && REGNO (frame_reg_rtx) != newptr_regno)
26376 frame_reg_rtx = gen_rtx_REG (Pmode, newptr_regno);
26377
26378 if (end_save + ptr_off != 0)
26379 {
26380 rtx offset = GEN_INT (end_save + ptr_off);
26381
26382 frame_off = -end_save;
26383 if (TARGET_32BIT)
26384 emit_insn (gen_addsi3_carry (frame_reg_rtx,
26385 ptr_reg, offset));
26386 else
26387 emit_insn (gen_adddi3_carry (frame_reg_rtx,
26388 ptr_reg, offset));
26389 }
26390 else
26391 {
26392 frame_off = ptr_off;
26393 emit_move_insn (frame_reg_rtx, ptr_reg);
26394 }
26395 }
26396 }
26397 else
26398 {
26399 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
26400 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
26401 {
26402 rtx addr, areg, mem, reg;
26403
26404 areg = gen_rtx_REG (Pmode, 0);
26405 emit_move_insn
26406 (areg, GEN_INT (info->altivec_save_offset
26407 + frame_off
26408 + 16 * (i - info->first_altivec_reg_save)));
26409
26410 /* AltiVec addressing mode is [reg+reg]. */
26411 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
26412 mem = gen_frame_mem (V4SImode, addr);
26413
26414 reg = gen_rtx_REG (V4SImode, i);
26415 /* Rather than emitting a generic move, force use of the
26416 lvx instruction, which we always want. In particular
26417 we don't want lxvd2x/xxpermdi for little endian. */
26418 (void) emit_insn (gen_altivec_lvx_v4si_internal (reg, mem));
26419 }
26420 }
26421
26422 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
26423 if (((strategy & REST_INLINE_VRS) == 0
26424 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
26425 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
26426 {
26427 rtx reg = gen_rtx_REG (V4SImode, i);
26428 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
26429 }
26430 }
26431
26432 /* Restore VRSAVE if we have not done so already. */
26433 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
26434 && info->vrsave_size != 0
26435 && (DEFAULT_ABI == ABI_V4
26436 || !offset_below_red_zone_p (info->vrsave_save_offset)))
26437 {
26438 rtx reg;
26439
26440 reg = gen_rtx_REG (SImode, 12);
26441 emit_insn (gen_frame_load (reg, frame_reg_rtx,
26442 info->vrsave_save_offset + frame_off));
26443
26444 emit_insn (generate_set_vrsave (reg, info, 1));
26445 }
26446
26447 /* If we exit by an out-of-line restore function on ABI_V4 then that
26448 function will deallocate the stack, so we don't need to worry
26449 about the unwinder restoring cr from an invalid stack frame
26450 location. */
26451 exit_func = (!restoring_FPRs_inline
26452 || (!restoring_GPRs_inline
26453 && info->first_fp_reg_save == 64));
26454
26455 /* In the ELFv2 ABI we need to restore all call-saved CR fields from
26456 *separate* slots if the routine calls __builtin_eh_return, so
26457 that they can be independently restored by the unwinder. */
26458 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
26459 {
26460 int i, cr_off = info->ehcr_offset;
26461
26462 for (i = 0; i < 8; i++)
26463 if (!call_used_regs[CR0_REGNO + i])
26464 {
26465 rtx reg = gen_rtx_REG (SImode, 0);
26466 emit_insn (gen_frame_load (reg, frame_reg_rtx,
26467 cr_off + frame_off));
26468
26469 insn = emit_insn (gen_movsi_to_cr_one
26470 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
26471
26472 if (!exit_func && flag_shrink_wrap)
26473 {
26474 add_reg_note (insn, REG_CFA_RESTORE,
26475 gen_rtx_REG (SImode, CR0_REGNO + i));
26476
26477 RTX_FRAME_RELATED_P (insn) = 1;
26478 }
26479
26480 cr_off += reg_size;
26481 }
26482 }
26483
26484 /* Get the old lr if we saved it. If we are restoring registers
26485 out-of-line, then the out-of-line routines can do this for us. */
26486 if (restore_lr && restoring_GPRs_inline)
26487 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
26488
26489 /* Get the old cr if we saved it. */
26490 if (info->cr_save_p)
26491 {
26492 unsigned cr_save_regno = 12;
26493
26494 if (!restoring_GPRs_inline)
26495 {
26496 /* Ensure we don't use the register used by the out-of-line
26497 gpr register restore below. */
26498 bool lr = info->gp_save_offset + info->gp_size == 0;
26499 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
26500 int gpr_ptr_regno = ptr_regno_for_savres (sel);
26501
26502 if (gpr_ptr_regno == 12)
26503 cr_save_regno = 11;
26504 gcc_checking_assert (REGNO (frame_reg_rtx) != cr_save_regno);
26505 }
26506 else if (REGNO (frame_reg_rtx) == 12)
26507 cr_save_regno = 11;
26508
26509 cr_save_reg = load_cr_save (cr_save_regno, frame_reg_rtx,
26510 info->cr_save_offset + frame_off,
26511 exit_func);
26512 }
26513
26514 /* Set LR here to try to overlap restores below. */
26515 if (restore_lr && restoring_GPRs_inline)
26516 restore_saved_lr (0, exit_func);
26517
26518 /* Load exception handler data registers, if needed. */
26519 if (crtl->calls_eh_return)
26520 {
26521 unsigned int i, regno;
26522
26523 if (TARGET_AIX)
26524 {
26525 rtx reg = gen_rtx_REG (reg_mode, 2);
26526 emit_insn (gen_frame_load (reg, frame_reg_rtx,
26527 frame_off + RS6000_TOC_SAVE_SLOT));
26528 }
26529
26530 for (i = 0; ; ++i)
26531 {
26532 rtx mem;
26533
26534 regno = EH_RETURN_DATA_REGNO (i);
26535 if (regno == INVALID_REGNUM)
26536 break;
26537
26538 /* Note: possible use of r0 here to address SPE regs. */
26539 mem = gen_frame_mem_offset (reg_mode, frame_reg_rtx,
26540 info->ehrd_offset + frame_off
26541 + reg_size * (int) i);
26542
26543 emit_move_insn (gen_rtx_REG (reg_mode, regno), mem);
26544 }
26545 }
26546
26547 /* Restore GPRs. This is done as a PARALLEL if we are using
26548 the load-multiple instructions. */
26549 if (TARGET_SPE_ABI
26550 && info->spe_64bit_regs_used
26551 && info->first_gp_reg_save != 32)
26552 {
26553 /* Determine whether we can address all of the registers that need
26554 to be saved with an offset from frame_reg_rtx that fits in
26555 the small const field for SPE memory instructions. */
26556 int spe_regs_addressable
26557 = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
26558 + reg_size * (32 - info->first_gp_reg_save - 1))
26559 && restoring_GPRs_inline);
26560
26561 if (!spe_regs_addressable)
26562 {
26563 int ool_adjust = 0;
26564 rtx old_frame_reg_rtx = frame_reg_rtx;
26565 /* Make r11 point to the start of the SPE save area. We worried about
26566 not clobbering it when we were saving registers in the prologue.
26567 There's no need to worry here because the static chain is passed
26568 anew to every function. */
26569
26570 if (!restoring_GPRs_inline)
26571 ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
26572 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
26573 emit_insn (gen_addsi3 (frame_reg_rtx, old_frame_reg_rtx,
26574 GEN_INT (info->spe_gp_save_offset
26575 + frame_off
26576 - ool_adjust)));
26577 /* Keep the invariant that frame_reg_rtx + frame_off points
26578 at the top of the stack frame. */
26579 frame_off = -info->spe_gp_save_offset + ool_adjust;
26580 }
26581
26582 if (restoring_GPRs_inline)
26583 {
26584 HOST_WIDE_INT spe_offset = info->spe_gp_save_offset + frame_off;
26585
26586 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
26587 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
26588 {
26589 rtx offset, addr, mem, reg;
26590
26591 /* We're doing all this to ensure that the immediate offset
26592 fits into the immediate field of 'evldd'. */
26593 gcc_assert (SPE_CONST_OFFSET_OK (spe_offset + reg_size * i));
26594
26595 offset = GEN_INT (spe_offset + reg_size * i);
26596 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, offset);
26597 mem = gen_rtx_MEM (V2SImode, addr);
26598 reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
26599
26600 emit_move_insn (reg, mem);
26601 }
26602 }
26603 else
26604 rs6000_emit_savres_rtx (info, frame_reg_rtx,
26605 info->spe_gp_save_offset + frame_off,
26606 info->lr_save_offset + frame_off,
26607 reg_mode,
26608 SAVRES_GPR | SAVRES_LR);
26609 }
26610 else if (!restoring_GPRs_inline)
26611 {
26612 /* We are jumping to an out-of-line function. */
26613 rtx ptr_reg;
26614 int end_save = info->gp_save_offset + info->gp_size;
26615 bool can_use_exit = end_save == 0;
26616 int sel = SAVRES_GPR | (can_use_exit ? SAVRES_LR : 0);
26617 int ptr_off;
26618
26619 /* Emit stack reset code if we need it. */
26620 ptr_regno = ptr_regno_for_savres (sel);
26621 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
26622 if (can_use_exit)
26623 rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
26624 else if (end_save + frame_off != 0)
26625 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx,
26626 GEN_INT (end_save + frame_off)));
26627 else if (REGNO (frame_reg_rtx) != ptr_regno)
26628 emit_move_insn (ptr_reg, frame_reg_rtx);
26629 if (REGNO (frame_reg_rtx) == ptr_regno)
26630 frame_off = -end_save;
26631
26632 if (can_use_exit && info->cr_save_p)
26633 restore_saved_cr (cr_save_reg, using_mtcr_multiple, true);
26634
26635 ptr_off = -end_save;
26636 rs6000_emit_savres_rtx (info, ptr_reg,
26637 info->gp_save_offset + ptr_off,
26638 info->lr_save_offset + ptr_off,
26639 reg_mode, sel);
26640 }
26641 else if (using_load_multiple)
26642 {
26643 rtvec p;
26644 p = rtvec_alloc (32 - info->first_gp_reg_save);
26645 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
26646 RTVEC_ELT (p, i)
26647 = gen_frame_load (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
26648 frame_reg_rtx,
26649 info->gp_save_offset + frame_off + reg_size * i);
26650 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
26651 }
26652 else
26653 {
26654 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
26655 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
26656 emit_insn (gen_frame_load
26657 (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
26658 frame_reg_rtx,
26659 info->gp_save_offset + frame_off + reg_size * i));
26660 }
26661
26662 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
26663 {
26664 /* If the frame pointer was used then we can't delay emitting
26665 a REG_CFA_DEF_CFA note. This must happen on the insn that
26666 restores the frame pointer, r31. We may have already emitted
26667 a REG_CFA_DEF_CFA note, but that's OK; A duplicate is
26668 discarded by dwarf2cfi.c/dwarf2out.c, and in any case would
26669 be harmless if emitted. */
26670 if (frame_pointer_needed)
26671 {
26672 insn = get_last_insn ();
26673 add_reg_note (insn, REG_CFA_DEF_CFA,
26674 plus_constant (Pmode, frame_reg_rtx, frame_off));
26675 RTX_FRAME_RELATED_P (insn) = 1;
26676 }
26677
26678 /* Set up cfa_restores. We always need these when
26679 shrink-wrapping. If not shrink-wrapping then we only need
26680 the cfa_restore when the stack location is no longer valid.
26681 The cfa_restores must be emitted on or before the insn that
26682 invalidates the stack, and of course must not be emitted
26683 before the insn that actually does the restore. The latter
26684 is why it is a bad idea to emit the cfa_restores as a group
26685 on the last instruction here that actually does a restore:
26686 That insn may be reordered with respect to others doing
26687 restores. */
26688 if (flag_shrink_wrap
26689 && !restoring_GPRs_inline
26690 && info->first_fp_reg_save == 64)
26691 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
26692
26693 for (i = info->first_gp_reg_save; i < 32; i++)
26694 if (!restoring_GPRs_inline
26695 || using_load_multiple
26696 || rs6000_reg_live_or_pic_offset_p (i))
26697 {
26698 rtx reg = gen_rtx_REG (reg_mode, i);
26699
26700 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
26701 }
26702 }
26703
26704 if (!restoring_GPRs_inline
26705 && info->first_fp_reg_save == 64)
26706 {
26707 /* We are jumping to an out-of-line function. */
26708 if (cfa_restores)
26709 emit_cfa_restores (cfa_restores);
26710 return;
26711 }
26712
26713 if (restore_lr && !restoring_GPRs_inline)
26714 {
26715 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
26716 restore_saved_lr (0, exit_func);
26717 }
26718
26719 /* Restore fpr's if we need to do it without calling a function. */
26720 if (restoring_FPRs_inline)
26721 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
26722 if (save_reg_p (info->first_fp_reg_save + i))
26723 {
26724 rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
26725 ? DFmode : SFmode),
26726 info->first_fp_reg_save + i);
26727 emit_insn (gen_frame_load (reg, frame_reg_rtx,
26728 info->fp_save_offset + frame_off + 8 * i));
26729 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
26730 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
26731 }
26732
26733 /* If we saved cr, restore it here. Just those that were used. */
26734 if (info->cr_save_p)
26735 restore_saved_cr (cr_save_reg, using_mtcr_multiple, exit_func);
26736
26737 /* If this is V.4, unwind the stack pointer after all of the loads
26738 have been done, or set up r11 if we are restoring fp out of line. */
26739 ptr_regno = 1;
26740 if (!restoring_FPRs_inline)
26741 {
26742 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
26743 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
26744 ptr_regno = ptr_regno_for_savres (sel);
26745 }
26746
26747 insn = rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
26748 if (REGNO (frame_reg_rtx) == ptr_regno)
26749 frame_off = 0;
26750
26751 if (insn && restoring_FPRs_inline)
26752 {
26753 if (cfa_restores)
26754 {
26755 REG_NOTES (insn) = cfa_restores;
26756 cfa_restores = NULL_RTX;
26757 }
26758 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
26759 RTX_FRAME_RELATED_P (insn) = 1;
26760 }
26761
26762 if (crtl->calls_eh_return)
26763 {
26764 rtx sa = EH_RETURN_STACKADJ_RTX;
26765 emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx, sa));
26766 }
26767
26768 if (!sibcall)
26769 {
26770 rtvec p;
26771 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
26772 if (! restoring_FPRs_inline)
26773 {
26774 p = rtvec_alloc (4 + 64 - info->first_fp_reg_save);
26775 RTVEC_ELT (p, 0) = ret_rtx;
26776 }
26777 else
26778 {
26779 if (cfa_restores)
26780 {
26781 /* We can't hang the cfa_restores off a simple return,
26782 since the shrink-wrap code sometimes uses an existing
26783 return. This means there might be a path from
26784 pre-prologue code to this return, and dwarf2cfi code
26785 wants the eh_frame unwinder state to be the same on
26786 all paths to any point. So we need to emit the
26787 cfa_restores before the return. For -m64 we really
26788 don't need epilogue cfa_restores at all, except for
26789 this irritating dwarf2cfi with shrink-wrap
26790 requirement; The stack red-zone means eh_frame info
26791 from the prologue telling the unwinder to restore
26792 from the stack is perfectly good right to the end of
26793 the function. */
26794 emit_insn (gen_blockage ());
26795 emit_cfa_restores (cfa_restores);
26796 cfa_restores = NULL_RTX;
26797 }
26798 p = rtvec_alloc (2);
26799 RTVEC_ELT (p, 0) = simple_return_rtx;
26800 }
26801
26802 RTVEC_ELT (p, 1) = ((restoring_FPRs_inline || !lr)
26803 ? gen_rtx_USE (VOIDmode,
26804 gen_rtx_REG (Pmode, LR_REGNO))
26805 : gen_rtx_CLOBBER (VOIDmode,
26806 gen_rtx_REG (Pmode, LR_REGNO)));
26807
26808 /* If we have to restore more than two FP registers, branch to the
26809 restore function. It will return to our caller. */
26810 if (! restoring_FPRs_inline)
26811 {
26812 int i;
26813 int reg;
26814 rtx sym;
26815
26816 if (flag_shrink_wrap)
26817 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
26818
26819 sym = rs6000_savres_routine_sym (info,
26820 SAVRES_FPR | (lr ? SAVRES_LR : 0));
26821 RTVEC_ELT (p, 2) = gen_rtx_USE (VOIDmode, sym);
26822 reg = (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)? 1 : 11;
26823 RTVEC_ELT (p, 3) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, reg));
26824
26825 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
26826 {
26827 rtx reg = gen_rtx_REG (DFmode, info->first_fp_reg_save + i);
26828
26829 RTVEC_ELT (p, i + 4)
26830 = gen_frame_load (reg, sp_reg_rtx, info->fp_save_offset + 8 * i);
26831 if (flag_shrink_wrap)
26832 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg,
26833 cfa_restores);
26834 }
26835 }
26836
26837 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
26838 }
26839
26840 if (cfa_restores)
26841 {
26842 if (sibcall)
26843 /* Ensure the cfa_restores are hung off an insn that won't
26844 be reordered above other restores. */
26845 emit_insn (gen_blockage ());
26846
26847 emit_cfa_restores (cfa_restores);
26848 }
26849 }
26850
26851 /* Write function epilogue. */
26852
26853 static void
26854 rs6000_output_function_epilogue (FILE *file,
26855 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
26856 {
26857 #if TARGET_MACHO
26858 macho_branch_islands ();
26859 /* Mach-O doesn't support labels at the end of objects, so if
26860 it looks like we might want one, insert a NOP. */
26861 {
26862 rtx_insn *insn = get_last_insn ();
26863 rtx_insn *deleted_debug_label = NULL;
26864 while (insn
26865 && NOTE_P (insn)
26866 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
26867 {
26868 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
26869 notes only, instead set their CODE_LABEL_NUMBER to -1,
26870 otherwise there would be code generation differences
26871 in between -g and -g0. */
26872 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
26873 deleted_debug_label = insn;
26874 insn = PREV_INSN (insn);
26875 }
26876 if (insn
26877 && (LABEL_P (insn)
26878 || (NOTE_P (insn)
26879 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
26880 fputs ("\tnop\n", file);
26881 else if (deleted_debug_label)
26882 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
26883 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
26884 CODE_LABEL_NUMBER (insn) = -1;
26885 }
26886 #endif
26887
26888 /* Output a traceback table here. See /usr/include/sys/debug.h for info
26889 on its format.
26890
26891 We don't output a traceback table if -finhibit-size-directive was
26892 used. The documentation for -finhibit-size-directive reads
26893 ``don't output a @code{.size} assembler directive, or anything
26894 else that would cause trouble if the function is split in the
26895 middle, and the two halves are placed at locations far apart in
26896 memory.'' The traceback table has this property, since it
26897 includes the offset from the start of the function to the
26898 traceback table itself.
26899
26900 System V.4 Powerpc's (and the embedded ABI derived from it) use a
26901 different traceback table. */
26902 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
26903 && ! flag_inhibit_size_directive
26904 && rs6000_traceback != traceback_none && !cfun->is_thunk)
26905 {
26906 const char *fname = NULL;
26907 const char *language_string = lang_hooks.name;
26908 int fixed_parms = 0, float_parms = 0, parm_info = 0;
26909 int i;
26910 int optional_tbtab;
26911 rs6000_stack_t *info = rs6000_stack_info ();
26912
26913 if (rs6000_traceback == traceback_full)
26914 optional_tbtab = 1;
26915 else if (rs6000_traceback == traceback_part)
26916 optional_tbtab = 0;
26917 else
26918 optional_tbtab = !optimize_size && !TARGET_ELF;
26919
26920 if (optional_tbtab)
26921 {
26922 fname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
26923 while (*fname == '.') /* V.4 encodes . in the name */
26924 fname++;
26925
26926 /* Need label immediately before tbtab, so we can compute
26927 its offset from the function start. */
26928 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
26929 ASM_OUTPUT_LABEL (file, fname);
26930 }
26931
26932 /* The .tbtab pseudo-op can only be used for the first eight
26933 expressions, since it can't handle the possibly variable
26934 length fields that follow. However, if you omit the optional
26935 fields, the assembler outputs zeros for all optional fields
26936 anyways, giving each variable length field is minimum length
26937 (as defined in sys/debug.h). Thus we can not use the .tbtab
26938 pseudo-op at all. */
26939
26940 /* An all-zero word flags the start of the tbtab, for debuggers
26941 that have to find it by searching forward from the entry
26942 point or from the current pc. */
26943 fputs ("\t.long 0\n", file);
26944
26945 /* Tbtab format type. Use format type 0. */
26946 fputs ("\t.byte 0,", file);
26947
26948 /* Language type. Unfortunately, there does not seem to be any
26949 official way to discover the language being compiled, so we
26950 use language_string.
26951 C is 0. Fortran is 1. Pascal is 2. Ada is 3. C++ is 9.
26952 Java is 13. Objective-C is 14. Objective-C++ isn't assigned
26953 a number, so for now use 9. LTO, Go and JIT aren't assigned numbers
26954 either, so for now use 0. */
26955 if (lang_GNU_C ()
26956 || ! strcmp (language_string, "GNU GIMPLE")
26957 || ! strcmp (language_string, "GNU Go")
26958 || ! strcmp (language_string, "libgccjit"))
26959 i = 0;
26960 else if (! strcmp (language_string, "GNU F77")
26961 || lang_GNU_Fortran ())
26962 i = 1;
26963 else if (! strcmp (language_string, "GNU Pascal"))
26964 i = 2;
26965 else if (! strcmp (language_string, "GNU Ada"))
26966 i = 3;
26967 else if (lang_GNU_CXX ()
26968 || ! strcmp (language_string, "GNU Objective-C++"))
26969 i = 9;
26970 else if (! strcmp (language_string, "GNU Java"))
26971 i = 13;
26972 else if (! strcmp (language_string, "GNU Objective-C"))
26973 i = 14;
26974 else
26975 gcc_unreachable ();
26976 fprintf (file, "%d,", i);
26977
26978 /* 8 single bit fields: global linkage (not set for C extern linkage,
26979 apparently a PL/I convention?), out-of-line epilogue/prologue, offset
26980 from start of procedure stored in tbtab, internal function, function
26981 has controlled storage, function has no toc, function uses fp,
26982 function logs/aborts fp operations. */
26983 /* Assume that fp operations are used if any fp reg must be saved. */
26984 fprintf (file, "%d,",
26985 (optional_tbtab << 5) | ((info->first_fp_reg_save != 64) << 1));
26986
26987 /* 6 bitfields: function is interrupt handler, name present in
26988 proc table, function calls alloca, on condition directives
26989 (controls stack walks, 3 bits), saves condition reg, saves
26990 link reg. */
26991 /* The `function calls alloca' bit seems to be set whenever reg 31 is
26992 set up as a frame pointer, even when there is no alloca call. */
26993 fprintf (file, "%d,",
26994 ((optional_tbtab << 6)
26995 | ((optional_tbtab & frame_pointer_needed) << 5)
26996 | (info->cr_save_p << 1)
26997 | (info->lr_save_p)));
26998
26999 /* 3 bitfields: saves backchain, fixup code, number of fpr saved
27000 (6 bits). */
27001 fprintf (file, "%d,",
27002 (info->push_p << 7) | (64 - info->first_fp_reg_save));
27003
27004 /* 2 bitfields: spare bits (2 bits), number of gpr saved (6 bits). */
27005 fprintf (file, "%d,", (32 - first_reg_to_save ()));
27006
27007 if (optional_tbtab)
27008 {
27009 /* Compute the parameter info from the function decl argument
27010 list. */
27011 tree decl;
27012 int next_parm_info_bit = 31;
27013
27014 for (decl = DECL_ARGUMENTS (current_function_decl);
27015 decl; decl = DECL_CHAIN (decl))
27016 {
27017 rtx parameter = DECL_INCOMING_RTL (decl);
27018 machine_mode mode = GET_MODE (parameter);
27019
27020 if (GET_CODE (parameter) == REG)
27021 {
27022 if (SCALAR_FLOAT_MODE_P (mode))
27023 {
27024 int bits;
27025
27026 float_parms++;
27027
27028 switch (mode)
27029 {
27030 case SFmode:
27031 case SDmode:
27032 bits = 0x2;
27033 break;
27034
27035 case DFmode:
27036 case DDmode:
27037 case TFmode:
27038 case TDmode:
27039 case IFmode:
27040 case KFmode:
27041 bits = 0x3;
27042 break;
27043
27044 default:
27045 gcc_unreachable ();
27046 }
27047
27048 /* If only one bit will fit, don't or in this entry. */
27049 if (next_parm_info_bit > 0)
27050 parm_info |= (bits << (next_parm_info_bit - 1));
27051 next_parm_info_bit -= 2;
27052 }
27053 else
27054 {
27055 fixed_parms += ((GET_MODE_SIZE (mode)
27056 + (UNITS_PER_WORD - 1))
27057 / UNITS_PER_WORD);
27058 next_parm_info_bit -= 1;
27059 }
27060 }
27061 }
27062 }
27063
27064 /* Number of fixed point parameters. */
27065 /* This is actually the number of words of fixed point parameters; thus
27066 an 8 byte struct counts as 2; and thus the maximum value is 8. */
27067 fprintf (file, "%d,", fixed_parms);
27068
27069 /* 2 bitfields: number of floating point parameters (7 bits), parameters
27070 all on stack. */
27071 /* This is actually the number of fp registers that hold parameters;
27072 and thus the maximum value is 13. */
27073 /* Set parameters on stack bit if parameters are not in their original
27074 registers, regardless of whether they are on the stack? Xlc
27075 seems to set the bit when not optimizing. */
27076 fprintf (file, "%d\n", ((float_parms << 1) | (! optimize)));
27077
27078 if (! optional_tbtab)
27079 return;
27080
27081 /* Optional fields follow. Some are variable length. */
27082
27083 /* Parameter types, left adjusted bit fields: 0 fixed, 10 single float,
27084 11 double float. */
27085 /* There is an entry for each parameter in a register, in the order that
27086 they occur in the parameter list. Any intervening arguments on the
27087 stack are ignored. If the list overflows a long (max possible length
27088 34 bits) then completely leave off all elements that don't fit. */
27089 /* Only emit this long if there was at least one parameter. */
27090 if (fixed_parms || float_parms)
27091 fprintf (file, "\t.long %d\n", parm_info);
27092
27093 /* Offset from start of code to tb table. */
27094 fputs ("\t.long ", file);
27095 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
27096 RS6000_OUTPUT_BASENAME (file, fname);
27097 putc ('-', file);
27098 rs6000_output_function_entry (file, fname);
27099 putc ('\n', file);
27100
27101 /* Interrupt handler mask. */
27102 /* Omit this long, since we never set the interrupt handler bit
27103 above. */
27104
27105 /* Number of CTL (controlled storage) anchors. */
27106 /* Omit this long, since the has_ctl bit is never set above. */
27107
27108 /* Displacement into stack of each CTL anchor. */
27109 /* Omit this list of longs, because there are no CTL anchors. */
27110
27111 /* Length of function name. */
27112 if (*fname == '*')
27113 ++fname;
27114 fprintf (file, "\t.short %d\n", (int) strlen (fname));
27115
27116 /* Function name. */
27117 assemble_string (fname, strlen (fname));
27118
27119 /* Register for alloca automatic storage; this is always reg 31.
27120 Only emit this if the alloca bit was set above. */
27121 if (frame_pointer_needed)
27122 fputs ("\t.byte 31\n", file);
27123
27124 fputs ("\t.align 2\n", file);
27125 }
27126 }
27127
27128 /* -fsplit-stack support. */
27129
27130 /* A SYMBOL_REF for __morestack. */
27131 static GTY(()) rtx morestack_ref;
27132
27133 static rtx
27134 gen_add3_const (rtx rt, rtx ra, long c)
27135 {
27136 if (TARGET_64BIT)
27137 return gen_adddi3 (rt, ra, GEN_INT (c));
27138 else
27139 return gen_addsi3 (rt, ra, GEN_INT (c));
27140 }
27141
27142 /* Emit -fsplit-stack prologue, which goes before the regular function
27143 prologue (at local entry point in the case of ELFv2). */
27144
27145 void
27146 rs6000_expand_split_stack_prologue (void)
27147 {
27148 rs6000_stack_t *info = rs6000_stack_info ();
27149 unsigned HOST_WIDE_INT allocate;
27150 long alloc_hi, alloc_lo;
27151 rtx r0, r1, r12, lr, ok_label, compare, jump, call_fusage;
27152 rtx_insn *insn;
27153
27154 gcc_assert (flag_split_stack && reload_completed);
27155
27156 if (!info->push_p)
27157 return;
27158
27159 if (global_regs[29])
27160 {
27161 error ("-fsplit-stack uses register r29");
27162 inform (DECL_SOURCE_LOCATION (global_regs_decl[29]),
27163 "conflicts with %qD", global_regs_decl[29]);
27164 }
27165
27166 allocate = info->total_size;
27167 if (allocate > (unsigned HOST_WIDE_INT) 1 << 31)
27168 {
27169 sorry ("Stack frame larger than 2G is not supported for -fsplit-stack");
27170 return;
27171 }
27172 if (morestack_ref == NULL_RTX)
27173 {
27174 morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
27175 SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL
27176 | SYMBOL_FLAG_FUNCTION);
27177 }
27178
27179 r0 = gen_rtx_REG (Pmode, 0);
27180 r1 = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
27181 r12 = gen_rtx_REG (Pmode, 12);
27182 emit_insn (gen_load_split_stack_limit (r0));
27183 /* Always emit two insns here to calculate the requested stack,
27184 so that the linker can edit them when adjusting size for calling
27185 non-split-stack code. */
27186 alloc_hi = (-allocate + 0x8000) & ~0xffffL;
27187 alloc_lo = -allocate - alloc_hi;
27188 if (alloc_hi != 0)
27189 {
27190 emit_insn (gen_add3_const (r12, r1, alloc_hi));
27191 if (alloc_lo != 0)
27192 emit_insn (gen_add3_const (r12, r12, alloc_lo));
27193 else
27194 emit_insn (gen_nop ());
27195 }
27196 else
27197 {
27198 emit_insn (gen_add3_const (r12, r1, alloc_lo));
27199 emit_insn (gen_nop ());
27200 }
27201
27202 compare = gen_rtx_REG (CCUNSmode, CR7_REGNO);
27203 emit_insn (gen_rtx_SET (compare, gen_rtx_COMPARE (CCUNSmode, r12, r0)));
27204 ok_label = gen_label_rtx ();
27205 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
27206 gen_rtx_GEU (VOIDmode, compare, const0_rtx),
27207 gen_rtx_LABEL_REF (VOIDmode, ok_label),
27208 pc_rtx);
27209 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
27210 JUMP_LABEL (jump) = ok_label;
27211 /* Mark the jump as very likely to be taken. */
27212 add_int_reg_note (jump, REG_BR_PROB,
27213 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
27214
27215 lr = gen_rtx_REG (Pmode, LR_REGNO);
27216 insn = emit_move_insn (r0, lr);
27217 RTX_FRAME_RELATED_P (insn) = 1;
27218 insn = emit_insn (gen_frame_store (r0, r1, info->lr_save_offset));
27219 RTX_FRAME_RELATED_P (insn) = 1;
27220
27221 insn = emit_call_insn (gen_call (gen_rtx_MEM (SImode, morestack_ref),
27222 const0_rtx, const0_rtx));
27223 call_fusage = NULL_RTX;
27224 use_reg (&call_fusage, r12);
27225 add_function_usage_to (insn, call_fusage);
27226 emit_insn (gen_frame_load (r0, r1, info->lr_save_offset));
27227 insn = emit_move_insn (lr, r0);
27228 add_reg_note (insn, REG_CFA_RESTORE, lr);
27229 RTX_FRAME_RELATED_P (insn) = 1;
27230 emit_insn (gen_split_stack_return ());
27231
27232 emit_label (ok_label);
27233 LABEL_NUSES (ok_label) = 1;
27234 }
27235
27236 /* Return the internal arg pointer used for function incoming
27237 arguments. When -fsplit-stack, the arg pointer is r12 so we need
27238 to copy it to a pseudo in order for it to be preserved over calls
27239 and suchlike. We'd really like to use a pseudo here for the
27240 internal arg pointer but data-flow analysis is not prepared to
27241 accept pseudos as live at the beginning of a function. */
27242
27243 static rtx
27244 rs6000_internal_arg_pointer (void)
27245 {
27246 if (flag_split_stack
27247 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
27248 == NULL))
27249
27250 {
27251 if (cfun->machine->split_stack_arg_pointer == NULL_RTX)
27252 {
27253 rtx pat;
27254
27255 cfun->machine->split_stack_arg_pointer = gen_reg_rtx (Pmode);
27256 REG_POINTER (cfun->machine->split_stack_arg_pointer) = 1;
27257
27258 /* Put the pseudo initialization right after the note at the
27259 beginning of the function. */
27260 pat = gen_rtx_SET (cfun->machine->split_stack_arg_pointer,
27261 gen_rtx_REG (Pmode, 12));
27262 push_topmost_sequence ();
27263 emit_insn_after (pat, get_insns ());
27264 pop_topmost_sequence ();
27265 }
27266 return plus_constant (Pmode, cfun->machine->split_stack_arg_pointer,
27267 FIRST_PARM_OFFSET (current_function_decl));
27268 }
27269 return virtual_incoming_args_rtx;
27270 }
27271
27272 /* We may have to tell the dataflow pass that the split stack prologue
27273 is initializing a register. */
27274
27275 static void
27276 rs6000_live_on_entry (bitmap regs)
27277 {
27278 if (flag_split_stack)
27279 bitmap_set_bit (regs, 12);
27280 }
27281
27282 /* Emit -fsplit-stack dynamic stack allocation space check. */
27283
27284 void
27285 rs6000_split_stack_space_check (rtx size, rtx label)
27286 {
27287 rtx sp = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
27288 rtx limit = gen_reg_rtx (Pmode);
27289 rtx requested = gen_reg_rtx (Pmode);
27290 rtx cmp = gen_reg_rtx (CCUNSmode);
27291 rtx jump;
27292
27293 emit_insn (gen_load_split_stack_limit (limit));
27294 if (CONST_INT_P (size))
27295 emit_insn (gen_add3_insn (requested, sp, GEN_INT (-INTVAL (size))));
27296 else
27297 {
27298 size = force_reg (Pmode, size);
27299 emit_move_insn (requested, gen_rtx_MINUS (Pmode, sp, size));
27300 }
27301 emit_insn (gen_rtx_SET (cmp, gen_rtx_COMPARE (CCUNSmode, requested, limit)));
27302 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
27303 gen_rtx_GEU (VOIDmode, cmp, const0_rtx),
27304 gen_rtx_LABEL_REF (VOIDmode, label),
27305 pc_rtx);
27306 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
27307 JUMP_LABEL (jump) = label;
27308 }
27309 \f
27310 /* A C compound statement that outputs the assembler code for a thunk
27311 function, used to implement C++ virtual function calls with
27312 multiple inheritance. The thunk acts as a wrapper around a virtual
27313 function, adjusting the implicit object parameter before handing
27314 control off to the real function.
27315
27316 First, emit code to add the integer DELTA to the location that
27317 contains the incoming first argument. Assume that this argument
27318 contains a pointer, and is the one used to pass the `this' pointer
27319 in C++. This is the incoming argument *before* the function
27320 prologue, e.g. `%o0' on a sparc. The addition must preserve the
27321 values of all other incoming arguments.
27322
27323 After the addition, emit code to jump to FUNCTION, which is a
27324 `FUNCTION_DECL'. This is a direct pure jump, not a call, and does
27325 not touch the return address. Hence returning from FUNCTION will
27326 return to whoever called the current `thunk'.
27327
27328 The effect must be as if FUNCTION had been called directly with the
27329 adjusted first argument. This macro is responsible for emitting
27330 all of the code for a thunk function; output_function_prologue()
27331 and output_function_epilogue() are not invoked.
27332
27333 The THUNK_FNDECL is redundant. (DELTA and FUNCTION have already
27334 been extracted from it.) It might possibly be useful on some
27335 targets, but probably not.
27336
27337 If you do not define this macro, the target-independent code in the
27338 C++ frontend will generate a less efficient heavyweight thunk that
27339 calls FUNCTION instead of jumping to it. The generic approach does
27340 not support varargs. */
27341
27342 static void
27343 rs6000_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
27344 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
27345 tree function)
27346 {
27347 rtx this_rtx, funexp;
27348 rtx_insn *insn;
27349
27350 reload_completed = 1;
27351 epilogue_completed = 1;
27352
27353 /* Mark the end of the (empty) prologue. */
27354 emit_note (NOTE_INSN_PROLOGUE_END);
27355
27356 /* Find the "this" pointer. If the function returns a structure,
27357 the structure return pointer is in r3. */
27358 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
27359 this_rtx = gen_rtx_REG (Pmode, 4);
27360 else
27361 this_rtx = gen_rtx_REG (Pmode, 3);
27362
27363 /* Apply the constant offset, if required. */
27364 if (delta)
27365 emit_insn (gen_add3_insn (this_rtx, this_rtx, GEN_INT (delta)));
27366
27367 /* Apply the offset from the vtable, if required. */
27368 if (vcall_offset)
27369 {
27370 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
27371 rtx tmp = gen_rtx_REG (Pmode, 12);
27372
27373 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
27374 if (((unsigned HOST_WIDE_INT) vcall_offset) + 0x8000 >= 0x10000)
27375 {
27376 emit_insn (gen_add3_insn (tmp, tmp, vcall_offset_rtx));
27377 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
27378 }
27379 else
27380 {
27381 rtx loc = gen_rtx_PLUS (Pmode, tmp, vcall_offset_rtx);
27382
27383 emit_move_insn (tmp, gen_rtx_MEM (Pmode, loc));
27384 }
27385 emit_insn (gen_add3_insn (this_rtx, this_rtx, tmp));
27386 }
27387
27388 /* Generate a tail call to the target function. */
27389 if (!TREE_USED (function))
27390 {
27391 assemble_external (function);
27392 TREE_USED (function) = 1;
27393 }
27394 funexp = XEXP (DECL_RTL (function), 0);
27395 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
27396
27397 #if TARGET_MACHO
27398 if (MACHOPIC_INDIRECT)
27399 funexp = machopic_indirect_call_target (funexp);
27400 #endif
27401
27402 /* gen_sibcall expects reload to convert scratch pseudo to LR so we must
27403 generate sibcall RTL explicitly. */
27404 insn = emit_call_insn (
27405 gen_rtx_PARALLEL (VOIDmode,
27406 gen_rtvec (4,
27407 gen_rtx_CALL (VOIDmode,
27408 funexp, const0_rtx),
27409 gen_rtx_USE (VOIDmode, const0_rtx),
27410 gen_rtx_USE (VOIDmode,
27411 gen_rtx_REG (SImode,
27412 LR_REGNO)),
27413 simple_return_rtx)));
27414 SIBLING_CALL_P (insn) = 1;
27415 emit_barrier ();
27416
27417 /* Ensure we have a global entry point for the thunk. ??? We could
27418 avoid that if the target routine doesn't need a global entry point,
27419 but we do not know whether this is the case at this point. */
27420 if (DEFAULT_ABI == ABI_ELFv2
27421 && !TARGET_SINGLE_PIC_BASE)
27422 cfun->machine->r2_setup_needed = true;
27423
27424 /* Run just enough of rest_of_compilation to get the insns emitted.
27425 There's not really enough bulk here to make other passes such as
27426 instruction scheduling worth while. Note that use_thunk calls
27427 assemble_start_function and assemble_end_function. */
27428 insn = get_insns ();
27429 shorten_branches (insn);
27430 final_start_function (insn, file, 1);
27431 final (insn, file, 1);
27432 final_end_function ();
27433
27434 reload_completed = 0;
27435 epilogue_completed = 0;
27436 }
27437 \f
27438 /* A quick summary of the various types of 'constant-pool tables'
27439 under PowerPC:
27440
27441 Target Flags Name One table per
27442 AIX (none) AIX TOC object file
27443 AIX -mfull-toc AIX TOC object file
27444 AIX -mminimal-toc AIX minimal TOC translation unit
27445 SVR4/EABI (none) SVR4 SDATA object file
27446 SVR4/EABI -fpic SVR4 pic object file
27447 SVR4/EABI -fPIC SVR4 PIC translation unit
27448 SVR4/EABI -mrelocatable EABI TOC function
27449 SVR4/EABI -maix AIX TOC object file
27450 SVR4/EABI -maix -mminimal-toc
27451 AIX minimal TOC translation unit
27452
27453 Name Reg. Set by entries contains:
27454 made by addrs? fp? sum?
27455
27456 AIX TOC 2 crt0 as Y option option
27457 AIX minimal TOC 30 prolog gcc Y Y option
27458 SVR4 SDATA 13 crt0 gcc N Y N
27459 SVR4 pic 30 prolog ld Y not yet N
27460 SVR4 PIC 30 prolog gcc Y option option
27461 EABI TOC 30 prolog gcc Y option option
27462
27463 */
27464
27465 /* Hash functions for the hash table. */
27466
27467 static unsigned
27468 rs6000_hash_constant (rtx k)
27469 {
27470 enum rtx_code code = GET_CODE (k);
27471 machine_mode mode = GET_MODE (k);
27472 unsigned result = (code << 3) ^ mode;
27473 const char *format;
27474 int flen, fidx;
27475
27476 format = GET_RTX_FORMAT (code);
27477 flen = strlen (format);
27478 fidx = 0;
27479
27480 switch (code)
27481 {
27482 case LABEL_REF:
27483 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
27484
27485 case CONST_WIDE_INT:
27486 {
27487 int i;
27488 flen = CONST_WIDE_INT_NUNITS (k);
27489 for (i = 0; i < flen; i++)
27490 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
27491 return result;
27492 }
27493
27494 case CONST_DOUBLE:
27495 if (mode != VOIDmode)
27496 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
27497 flen = 2;
27498 break;
27499
27500 case CODE_LABEL:
27501 fidx = 3;
27502 break;
27503
27504 default:
27505 break;
27506 }
27507
27508 for (; fidx < flen; fidx++)
27509 switch (format[fidx])
27510 {
27511 case 's':
27512 {
27513 unsigned i, len;
27514 const char *str = XSTR (k, fidx);
27515 len = strlen (str);
27516 result = result * 613 + len;
27517 for (i = 0; i < len; i++)
27518 result = result * 613 + (unsigned) str[i];
27519 break;
27520 }
27521 case 'u':
27522 case 'e':
27523 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
27524 break;
27525 case 'i':
27526 case 'n':
27527 result = result * 613 + (unsigned) XINT (k, fidx);
27528 break;
27529 case 'w':
27530 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
27531 result = result * 613 + (unsigned) XWINT (k, fidx);
27532 else
27533 {
27534 size_t i;
27535 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
27536 result = result * 613 + (unsigned) (XWINT (k, fidx)
27537 >> CHAR_BIT * i);
27538 }
27539 break;
27540 case '0':
27541 break;
27542 default:
27543 gcc_unreachable ();
27544 }
27545
27546 return result;
27547 }
27548
27549 hashval_t
27550 toc_hasher::hash (toc_hash_struct *thc)
27551 {
27552 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
27553 }
27554
27555 /* Compare H1 and H2 for equivalence. */
27556
27557 bool
27558 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
27559 {
27560 rtx r1 = h1->key;
27561 rtx r2 = h2->key;
27562
27563 if (h1->key_mode != h2->key_mode)
27564 return 0;
27565
27566 return rtx_equal_p (r1, r2);
27567 }
27568
27569 /* These are the names given by the C++ front-end to vtables, and
27570 vtable-like objects. Ideally, this logic should not be here;
27571 instead, there should be some programmatic way of inquiring as
27572 to whether or not an object is a vtable. */
27573
27574 #define VTABLE_NAME_P(NAME) \
27575 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
27576 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
27577 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
27578 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
27579 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
27580
27581 #ifdef NO_DOLLAR_IN_LABEL
27582 /* Return a GGC-allocated character string translating dollar signs in
27583 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
27584
27585 const char *
27586 rs6000_xcoff_strip_dollar (const char *name)
27587 {
27588 char *strip, *p;
27589 const char *q;
27590 size_t len;
27591
27592 q = (const char *) strchr (name, '$');
27593
27594 if (q == 0 || q == name)
27595 return name;
27596
27597 len = strlen (name);
27598 strip = XALLOCAVEC (char, len + 1);
27599 strcpy (strip, name);
27600 p = strip + (q - name);
27601 while (p)
27602 {
27603 *p = '_';
27604 p = strchr (p + 1, '$');
27605 }
27606
27607 return ggc_alloc_string (strip, len);
27608 }
27609 #endif
27610
27611 void
27612 rs6000_output_symbol_ref (FILE *file, rtx x)
27613 {
27614 /* Currently C++ toc references to vtables can be emitted before it
27615 is decided whether the vtable is public or private. If this is
27616 the case, then the linker will eventually complain that there is
27617 a reference to an unknown section. Thus, for vtables only,
27618 we emit the TOC reference to reference the symbol and not the
27619 section. */
27620 const char *name = XSTR (x, 0);
27621
27622 tree decl = SYMBOL_REF_DECL (x);
27623 if (decl /* sync condition with assemble_external () */
27624 && DECL_P (decl) && DECL_EXTERNAL (decl) && TREE_PUBLIC (decl)
27625 && (TREE_CODE (decl) == VAR_DECL
27626 || TREE_CODE (decl) == FUNCTION_DECL)
27627 && name[strlen (name) - 1] != ']')
27628 {
27629 name = concat (name,
27630 (TREE_CODE (decl) == FUNCTION_DECL
27631 ? "[DS]" : "[UA]"),
27632 NULL);
27633 XSTR (x, 0) = name;
27634 }
27635
27636 if (VTABLE_NAME_P (name))
27637 {
27638 RS6000_OUTPUT_BASENAME (file, name);
27639 }
27640 else
27641 assemble_name (file, name);
27642 }
27643
27644 /* Output a TOC entry. We derive the entry name from what is being
27645 written. */
27646
27647 void
27648 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
27649 {
27650 char buf[256];
27651 const char *name = buf;
27652 rtx base = x;
27653 HOST_WIDE_INT offset = 0;
27654
27655 gcc_assert (!TARGET_NO_TOC);
27656
27657 /* When the linker won't eliminate them, don't output duplicate
27658 TOC entries (this happens on AIX if there is any kind of TOC,
27659 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
27660 CODE_LABELs. */
27661 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
27662 {
27663 struct toc_hash_struct *h;
27664
27665 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
27666 time because GGC is not initialized at that point. */
27667 if (toc_hash_table == NULL)
27668 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
27669
27670 h = ggc_alloc<toc_hash_struct> ();
27671 h->key = x;
27672 h->key_mode = mode;
27673 h->labelno = labelno;
27674
27675 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
27676 if (*found == NULL)
27677 *found = h;
27678 else /* This is indeed a duplicate.
27679 Set this label equal to that label. */
27680 {
27681 fputs ("\t.set ", file);
27682 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
27683 fprintf (file, "%d,", labelno);
27684 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
27685 fprintf (file, "%d\n", ((*found)->labelno));
27686
27687 #ifdef HAVE_AS_TLS
27688 if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF
27689 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
27690 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
27691 {
27692 fputs ("\t.set ", file);
27693 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
27694 fprintf (file, "%d,", labelno);
27695 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
27696 fprintf (file, "%d\n", ((*found)->labelno));
27697 }
27698 #endif
27699 return;
27700 }
27701 }
27702
27703 /* If we're going to put a double constant in the TOC, make sure it's
27704 aligned properly when strict alignment is on. */
27705 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
27706 && STRICT_ALIGNMENT
27707 && GET_MODE_BITSIZE (mode) >= 64
27708 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
27709 ASM_OUTPUT_ALIGN (file, 3);
27710 }
27711
27712 (*targetm.asm_out.internal_label) (file, "LC", labelno);
27713
27714 /* Handle FP constants specially. Note that if we have a minimal
27715 TOC, things we put here aren't actually in the TOC, so we can allow
27716 FP constants. */
27717 if (GET_CODE (x) == CONST_DOUBLE &&
27718 (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode
27719 || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode))
27720 {
27721 long k[4];
27722
27723 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
27724 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k);
27725 else
27726 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
27727
27728 if (TARGET_64BIT)
27729 {
27730 if (TARGET_ELF || TARGET_MINIMAL_TOC)
27731 fputs (DOUBLE_INT_ASM_OP, file);
27732 else
27733 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
27734 k[0] & 0xffffffff, k[1] & 0xffffffff,
27735 k[2] & 0xffffffff, k[3] & 0xffffffff);
27736 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
27737 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
27738 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
27739 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
27740 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
27741 return;
27742 }
27743 else
27744 {
27745 if (TARGET_ELF || TARGET_MINIMAL_TOC)
27746 fputs ("\t.long ", file);
27747 else
27748 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
27749 k[0] & 0xffffffff, k[1] & 0xffffffff,
27750 k[2] & 0xffffffff, k[3] & 0xffffffff);
27751 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
27752 k[0] & 0xffffffff, k[1] & 0xffffffff,
27753 k[2] & 0xffffffff, k[3] & 0xffffffff);
27754 return;
27755 }
27756 }
27757 else if (GET_CODE (x) == CONST_DOUBLE &&
27758 (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
27759 {
27760 long k[2];
27761
27762 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
27763 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k);
27764 else
27765 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
27766
27767 if (TARGET_64BIT)
27768 {
27769 if (TARGET_ELF || TARGET_MINIMAL_TOC)
27770 fputs (DOUBLE_INT_ASM_OP, file);
27771 else
27772 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
27773 k[0] & 0xffffffff, k[1] & 0xffffffff);
27774 fprintf (file, "0x%lx%08lx\n",
27775 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
27776 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
27777 return;
27778 }
27779 else
27780 {
27781 if (TARGET_ELF || TARGET_MINIMAL_TOC)
27782 fputs ("\t.long ", file);
27783 else
27784 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
27785 k[0] & 0xffffffff, k[1] & 0xffffffff);
27786 fprintf (file, "0x%lx,0x%lx\n",
27787 k[0] & 0xffffffff, k[1] & 0xffffffff);
27788 return;
27789 }
27790 }
27791 else if (GET_CODE (x) == CONST_DOUBLE &&
27792 (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
27793 {
27794 long l;
27795
27796 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
27797 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l);
27798 else
27799 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
27800
27801 if (TARGET_64BIT)
27802 {
27803 if (TARGET_ELF || TARGET_MINIMAL_TOC)
27804 fputs (DOUBLE_INT_ASM_OP, file);
27805 else
27806 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
27807 if (WORDS_BIG_ENDIAN)
27808 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
27809 else
27810 fprintf (file, "0x%lx\n", l & 0xffffffff);
27811 return;
27812 }
27813 else
27814 {
27815 if (TARGET_ELF || TARGET_MINIMAL_TOC)
27816 fputs ("\t.long ", file);
27817 else
27818 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
27819 fprintf (file, "0x%lx\n", l & 0xffffffff);
27820 return;
27821 }
27822 }
27823 else if (GET_MODE (x) == VOIDmode && GET_CODE (x) == CONST_INT)
27824 {
27825 unsigned HOST_WIDE_INT low;
27826 HOST_WIDE_INT high;
27827
27828 low = INTVAL (x) & 0xffffffff;
27829 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
27830
27831 /* TOC entries are always Pmode-sized, so when big-endian
27832 smaller integer constants in the TOC need to be padded.
27833 (This is still a win over putting the constants in
27834 a separate constant pool, because then we'd have
27835 to have both a TOC entry _and_ the actual constant.)
27836
27837 For a 32-bit target, CONST_INT values are loaded and shifted
27838 entirely within `low' and can be stored in one TOC entry. */
27839
27840 /* It would be easy to make this work, but it doesn't now. */
27841 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
27842
27843 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
27844 {
27845 low |= high << 32;
27846 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
27847 high = (HOST_WIDE_INT) low >> 32;
27848 low &= 0xffffffff;
27849 }
27850
27851 if (TARGET_64BIT)
27852 {
27853 if (TARGET_ELF || TARGET_MINIMAL_TOC)
27854 fputs (DOUBLE_INT_ASM_OP, file);
27855 else
27856 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
27857 (long) high & 0xffffffff, (long) low & 0xffffffff);
27858 fprintf (file, "0x%lx%08lx\n",
27859 (long) high & 0xffffffff, (long) low & 0xffffffff);
27860 return;
27861 }
27862 else
27863 {
27864 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
27865 {
27866 if (TARGET_ELF || TARGET_MINIMAL_TOC)
27867 fputs ("\t.long ", file);
27868 else
27869 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
27870 (long) high & 0xffffffff, (long) low & 0xffffffff);
27871 fprintf (file, "0x%lx,0x%lx\n",
27872 (long) high & 0xffffffff, (long) low & 0xffffffff);
27873 }
27874 else
27875 {
27876 if (TARGET_ELF || TARGET_MINIMAL_TOC)
27877 fputs ("\t.long ", file);
27878 else
27879 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
27880 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
27881 }
27882 return;
27883 }
27884 }
27885
27886 if (GET_CODE (x) == CONST)
27887 {
27888 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
27889 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT);
27890
27891 base = XEXP (XEXP (x, 0), 0);
27892 offset = INTVAL (XEXP (XEXP (x, 0), 1));
27893 }
27894
27895 switch (GET_CODE (base))
27896 {
27897 case SYMBOL_REF:
27898 name = XSTR (base, 0);
27899 break;
27900
27901 case LABEL_REF:
27902 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
27903 CODE_LABEL_NUMBER (XEXP (base, 0)));
27904 break;
27905
27906 case CODE_LABEL:
27907 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
27908 break;
27909
27910 default:
27911 gcc_unreachable ();
27912 }
27913
27914 if (TARGET_ELF || TARGET_MINIMAL_TOC)
27915 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
27916 else
27917 {
27918 fputs ("\t.tc ", file);
27919 RS6000_OUTPUT_BASENAME (file, name);
27920
27921 if (offset < 0)
27922 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
27923 else if (offset)
27924 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
27925
27926 /* Mark large TOC symbols on AIX with [TE] so they are mapped
27927 after other TOC symbols, reducing overflow of small TOC access
27928 to [TC] symbols. */
27929 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
27930 ? "[TE]," : "[TC],", file);
27931 }
27932
27933 /* Currently C++ toc references to vtables can be emitted before it
27934 is decided whether the vtable is public or private. If this is
27935 the case, then the linker will eventually complain that there is
27936 a TOC reference to an unknown section. Thus, for vtables only,
27937 we emit the TOC reference to reference the symbol and not the
27938 section. */
27939 if (VTABLE_NAME_P (name))
27940 {
27941 RS6000_OUTPUT_BASENAME (file, name);
27942 if (offset < 0)
27943 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
27944 else if (offset > 0)
27945 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
27946 }
27947 else
27948 output_addr_const (file, x);
27949
27950 #if HAVE_AS_TLS
27951 if (TARGET_XCOFF && GET_CODE (base) == SYMBOL_REF)
27952 {
27953 switch (SYMBOL_REF_TLS_MODEL (base))
27954 {
27955 case 0:
27956 break;
27957 case TLS_MODEL_LOCAL_EXEC:
27958 fputs ("@le", file);
27959 break;
27960 case TLS_MODEL_INITIAL_EXEC:
27961 fputs ("@ie", file);
27962 break;
27963 /* Use global-dynamic for local-dynamic. */
27964 case TLS_MODEL_GLOBAL_DYNAMIC:
27965 case TLS_MODEL_LOCAL_DYNAMIC:
27966 putc ('\n', file);
27967 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
27968 fputs ("\t.tc .", file);
27969 RS6000_OUTPUT_BASENAME (file, name);
27970 fputs ("[TC],", file);
27971 output_addr_const (file, x);
27972 fputs ("@m", file);
27973 break;
27974 default:
27975 gcc_unreachable ();
27976 }
27977 }
27978 #endif
27979
27980 putc ('\n', file);
27981 }
27982 \f
27983 /* Output an assembler pseudo-op to write an ASCII string of N characters
27984 starting at P to FILE.
27985
27986 On the RS/6000, we have to do this using the .byte operation and
27987 write out special characters outside the quoted string.
27988 Also, the assembler is broken; very long strings are truncated,
27989 so we must artificially break them up early. */
27990
27991 void
27992 output_ascii (FILE *file, const char *p, int n)
27993 {
27994 char c;
27995 int i, count_string;
27996 const char *for_string = "\t.byte \"";
27997 const char *for_decimal = "\t.byte ";
27998 const char *to_close = NULL;
27999
28000 count_string = 0;
28001 for (i = 0; i < n; i++)
28002 {
28003 c = *p++;
28004 if (c >= ' ' && c < 0177)
28005 {
28006 if (for_string)
28007 fputs (for_string, file);
28008 putc (c, file);
28009
28010 /* Write two quotes to get one. */
28011 if (c == '"')
28012 {
28013 putc (c, file);
28014 ++count_string;
28015 }
28016
28017 for_string = NULL;
28018 for_decimal = "\"\n\t.byte ";
28019 to_close = "\"\n";
28020 ++count_string;
28021
28022 if (count_string >= 512)
28023 {
28024 fputs (to_close, file);
28025
28026 for_string = "\t.byte \"";
28027 for_decimal = "\t.byte ";
28028 to_close = NULL;
28029 count_string = 0;
28030 }
28031 }
28032 else
28033 {
28034 if (for_decimal)
28035 fputs (for_decimal, file);
28036 fprintf (file, "%d", c);
28037
28038 for_string = "\n\t.byte \"";
28039 for_decimal = ", ";
28040 to_close = "\n";
28041 count_string = 0;
28042 }
28043 }
28044
28045 /* Now close the string if we have written one. Then end the line. */
28046 if (to_close)
28047 fputs (to_close, file);
28048 }
28049 \f
28050 /* Generate a unique section name for FILENAME for a section type
28051 represented by SECTION_DESC. Output goes into BUF.
28052
28053 SECTION_DESC can be any string, as long as it is different for each
28054 possible section type.
28055
28056 We name the section in the same manner as xlc. The name begins with an
28057 underscore followed by the filename (after stripping any leading directory
28058 names) with the last period replaced by the string SECTION_DESC. If
28059 FILENAME does not contain a period, SECTION_DESC is appended to the end of
28060 the name. */
28061
28062 void
28063 rs6000_gen_section_name (char **buf, const char *filename,
28064 const char *section_desc)
28065 {
28066 const char *q, *after_last_slash, *last_period = 0;
28067 char *p;
28068 int len;
28069
28070 after_last_slash = filename;
28071 for (q = filename; *q; q++)
28072 {
28073 if (*q == '/')
28074 after_last_slash = q + 1;
28075 else if (*q == '.')
28076 last_period = q;
28077 }
28078
28079 len = strlen (after_last_slash) + strlen (section_desc) + 2;
28080 *buf = (char *) xmalloc (len);
28081
28082 p = *buf;
28083 *p++ = '_';
28084
28085 for (q = after_last_slash; *q; q++)
28086 {
28087 if (q == last_period)
28088 {
28089 strcpy (p, section_desc);
28090 p += strlen (section_desc);
28091 break;
28092 }
28093
28094 else if (ISALNUM (*q))
28095 *p++ = *q;
28096 }
28097
28098 if (last_period == 0)
28099 strcpy (p, section_desc);
28100 else
28101 *p = '\0';
28102 }
28103 \f
28104 /* Emit profile function. */
28105
28106 void
28107 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
28108 {
28109 /* Non-standard profiling for kernels, which just saves LR then calls
28110 _mcount without worrying about arg saves. The idea is to change
28111 the function prologue as little as possible as it isn't easy to
28112 account for arg save/restore code added just for _mcount. */
28113 if (TARGET_PROFILE_KERNEL)
28114 return;
28115
28116 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
28117 {
28118 #ifndef NO_PROFILE_COUNTERS
28119 # define NO_PROFILE_COUNTERS 0
28120 #endif
28121 if (NO_PROFILE_COUNTERS)
28122 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
28123 LCT_NORMAL, VOIDmode, 0);
28124 else
28125 {
28126 char buf[30];
28127 const char *label_name;
28128 rtx fun;
28129
28130 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
28131 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
28132 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
28133
28134 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
28135 LCT_NORMAL, VOIDmode, 1, fun, Pmode);
28136 }
28137 }
28138 else if (DEFAULT_ABI == ABI_DARWIN)
28139 {
28140 const char *mcount_name = RS6000_MCOUNT;
28141 int caller_addr_regno = LR_REGNO;
28142
28143 /* Be conservative and always set this, at least for now. */
28144 crtl->uses_pic_offset_table = 1;
28145
28146 #if TARGET_MACHO
28147 /* For PIC code, set up a stub and collect the caller's address
28148 from r0, which is where the prologue puts it. */
28149 if (MACHOPIC_INDIRECT
28150 && crtl->uses_pic_offset_table)
28151 caller_addr_regno = 0;
28152 #endif
28153 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
28154 LCT_NORMAL, VOIDmode, 1,
28155 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
28156 }
28157 }
28158
28159 /* Write function profiler code. */
28160
28161 void
28162 output_function_profiler (FILE *file, int labelno)
28163 {
28164 char buf[100];
28165
28166 switch (DEFAULT_ABI)
28167 {
28168 default:
28169 gcc_unreachable ();
28170
28171 case ABI_V4:
28172 if (!TARGET_32BIT)
28173 {
28174 warning (0, "no profiling of 64-bit code for this ABI");
28175 return;
28176 }
28177 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
28178 fprintf (file, "\tmflr %s\n", reg_names[0]);
28179 if (NO_PROFILE_COUNTERS)
28180 {
28181 asm_fprintf (file, "\tstw %s,4(%s)\n",
28182 reg_names[0], reg_names[1]);
28183 }
28184 else if (TARGET_SECURE_PLT && flag_pic)
28185 {
28186 if (TARGET_LINK_STACK)
28187 {
28188 char name[32];
28189 get_ppc476_thunk_name (name);
28190 asm_fprintf (file, "\tbl %s\n", name);
28191 }
28192 else
28193 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
28194 asm_fprintf (file, "\tstw %s,4(%s)\n",
28195 reg_names[0], reg_names[1]);
28196 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
28197 asm_fprintf (file, "\taddis %s,%s,",
28198 reg_names[12], reg_names[12]);
28199 assemble_name (file, buf);
28200 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
28201 assemble_name (file, buf);
28202 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
28203 }
28204 else if (flag_pic == 1)
28205 {
28206 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
28207 asm_fprintf (file, "\tstw %s,4(%s)\n",
28208 reg_names[0], reg_names[1]);
28209 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
28210 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
28211 assemble_name (file, buf);
28212 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
28213 }
28214 else if (flag_pic > 1)
28215 {
28216 asm_fprintf (file, "\tstw %s,4(%s)\n",
28217 reg_names[0], reg_names[1]);
28218 /* Now, we need to get the address of the label. */
28219 if (TARGET_LINK_STACK)
28220 {
28221 char name[32];
28222 get_ppc476_thunk_name (name);
28223 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
28224 assemble_name (file, buf);
28225 fputs ("-.\n1:", file);
28226 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
28227 asm_fprintf (file, "\taddi %s,%s,4\n",
28228 reg_names[11], reg_names[11]);
28229 }
28230 else
28231 {
28232 fputs ("\tbcl 20,31,1f\n\t.long ", file);
28233 assemble_name (file, buf);
28234 fputs ("-.\n1:", file);
28235 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
28236 }
28237 asm_fprintf (file, "\tlwz %s,0(%s)\n",
28238 reg_names[0], reg_names[11]);
28239 asm_fprintf (file, "\tadd %s,%s,%s\n",
28240 reg_names[0], reg_names[0], reg_names[11]);
28241 }
28242 else
28243 {
28244 asm_fprintf (file, "\tlis %s,", reg_names[12]);
28245 assemble_name (file, buf);
28246 fputs ("@ha\n", file);
28247 asm_fprintf (file, "\tstw %s,4(%s)\n",
28248 reg_names[0], reg_names[1]);
28249 asm_fprintf (file, "\tla %s,", reg_names[0]);
28250 assemble_name (file, buf);
28251 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
28252 }
28253
28254 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
28255 fprintf (file, "\tbl %s%s\n",
28256 RS6000_MCOUNT, flag_pic ? "@plt" : "");
28257 break;
28258
28259 case ABI_AIX:
28260 case ABI_ELFv2:
28261 case ABI_DARWIN:
28262 /* Don't do anything, done in output_profile_hook (). */
28263 break;
28264 }
28265 }
28266
28267 \f
28268
28269 /* The following variable value is the last issued insn. */
28270
28271 static rtx last_scheduled_insn;
28272
28273 /* The following variable helps to balance issuing of load and
28274 store instructions */
28275
28276 static int load_store_pendulum;
28277
28278 /* Power4 load update and store update instructions are cracked into a
28279 load or store and an integer insn which are executed in the same cycle.
28280 Branches have their own dispatch slot which does not count against the
28281 GCC issue rate, but it changes the program flow so there are no other
28282 instructions to issue in this cycle. */
28283
28284 static int
28285 rs6000_variable_issue_1 (rtx_insn *insn, int more)
28286 {
28287 last_scheduled_insn = insn;
28288 if (GET_CODE (PATTERN (insn)) == USE
28289 || GET_CODE (PATTERN (insn)) == CLOBBER)
28290 {
28291 cached_can_issue_more = more;
28292 return cached_can_issue_more;
28293 }
28294
28295 if (insn_terminates_group_p (insn, current_group))
28296 {
28297 cached_can_issue_more = 0;
28298 return cached_can_issue_more;
28299 }
28300
28301 /* If no reservation, but reach here */
28302 if (recog_memoized (insn) < 0)
28303 return more;
28304
28305 if (rs6000_sched_groups)
28306 {
28307 if (is_microcoded_insn (insn))
28308 cached_can_issue_more = 0;
28309 else if (is_cracked_insn (insn))
28310 cached_can_issue_more = more > 2 ? more - 2 : 0;
28311 else
28312 cached_can_issue_more = more - 1;
28313
28314 return cached_can_issue_more;
28315 }
28316
28317 if (rs6000_cpu_attr == CPU_CELL && is_nonpipeline_insn (insn))
28318 return 0;
28319
28320 cached_can_issue_more = more - 1;
28321 return cached_can_issue_more;
28322 }
28323
28324 static int
28325 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
28326 {
28327 int r = rs6000_variable_issue_1 (insn, more);
28328 if (verbose)
28329 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
28330 return r;
28331 }
28332
28333 /* Adjust the cost of a scheduling dependency. Return the new cost of
28334 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
28335
28336 static int
28337 rs6000_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
28338 {
28339 enum attr_type attr_type;
28340
28341 if (! recog_memoized (insn))
28342 return 0;
28343
28344 switch (REG_NOTE_KIND (link))
28345 {
28346 case REG_DEP_TRUE:
28347 {
28348 /* Data dependency; DEP_INSN writes a register that INSN reads
28349 some cycles later. */
28350
28351 /* Separate a load from a narrower, dependent store. */
28352 if (rs6000_sched_groups
28353 && GET_CODE (PATTERN (insn)) == SET
28354 && GET_CODE (PATTERN (dep_insn)) == SET
28355 && GET_CODE (XEXP (PATTERN (insn), 1)) == MEM
28356 && GET_CODE (XEXP (PATTERN (dep_insn), 0)) == MEM
28357 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
28358 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
28359 return cost + 14;
28360
28361 attr_type = get_attr_type (insn);
28362
28363 switch (attr_type)
28364 {
28365 case TYPE_JMPREG:
28366 /* Tell the first scheduling pass about the latency between
28367 a mtctr and bctr (and mtlr and br/blr). The first
28368 scheduling pass will not know about this latency since
28369 the mtctr instruction, which has the latency associated
28370 to it, will be generated by reload. */
28371 return 4;
28372 case TYPE_BRANCH:
28373 /* Leave some extra cycles between a compare and its
28374 dependent branch, to inhibit expensive mispredicts. */
28375 if ((rs6000_cpu_attr == CPU_PPC603
28376 || rs6000_cpu_attr == CPU_PPC604
28377 || rs6000_cpu_attr == CPU_PPC604E
28378 || rs6000_cpu_attr == CPU_PPC620
28379 || rs6000_cpu_attr == CPU_PPC630
28380 || rs6000_cpu_attr == CPU_PPC750
28381 || rs6000_cpu_attr == CPU_PPC7400
28382 || rs6000_cpu_attr == CPU_PPC7450
28383 || rs6000_cpu_attr == CPU_PPCE5500
28384 || rs6000_cpu_attr == CPU_PPCE6500
28385 || rs6000_cpu_attr == CPU_POWER4
28386 || rs6000_cpu_attr == CPU_POWER5
28387 || rs6000_cpu_attr == CPU_POWER7
28388 || rs6000_cpu_attr == CPU_POWER8
28389 || rs6000_cpu_attr == CPU_POWER9
28390 || rs6000_cpu_attr == CPU_CELL)
28391 && recog_memoized (dep_insn)
28392 && (INSN_CODE (dep_insn) >= 0))
28393
28394 switch (get_attr_type (dep_insn))
28395 {
28396 case TYPE_CMP:
28397 case TYPE_FPCOMPARE:
28398 case TYPE_CR_LOGICAL:
28399 case TYPE_DELAYED_CR:
28400 return cost + 2;
28401 case TYPE_EXTS:
28402 case TYPE_MUL:
28403 if (get_attr_dot (dep_insn) == DOT_YES)
28404 return cost + 2;
28405 else
28406 break;
28407 case TYPE_SHIFT:
28408 if (get_attr_dot (dep_insn) == DOT_YES
28409 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
28410 return cost + 2;
28411 else
28412 break;
28413 default:
28414 break;
28415 }
28416 break;
28417
28418 case TYPE_STORE:
28419 case TYPE_FPSTORE:
28420 if ((rs6000_cpu == PROCESSOR_POWER6)
28421 && recog_memoized (dep_insn)
28422 && (INSN_CODE (dep_insn) >= 0))
28423 {
28424
28425 if (GET_CODE (PATTERN (insn)) != SET)
28426 /* If this happens, we have to extend this to schedule
28427 optimally. Return default for now. */
28428 return cost;
28429
28430 /* Adjust the cost for the case where the value written
28431 by a fixed point operation is used as the address
28432 gen value on a store. */
28433 switch (get_attr_type (dep_insn))
28434 {
28435 case TYPE_LOAD:
28436 case TYPE_CNTLZ:
28437 {
28438 if (! store_data_bypass_p (dep_insn, insn))
28439 return get_attr_sign_extend (dep_insn)
28440 == SIGN_EXTEND_YES ? 6 : 4;
28441 break;
28442 }
28443 case TYPE_SHIFT:
28444 {
28445 if (! store_data_bypass_p (dep_insn, insn))
28446 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
28447 6 : 3;
28448 break;
28449 }
28450 case TYPE_INTEGER:
28451 case TYPE_ADD:
28452 case TYPE_LOGICAL:
28453 case TYPE_EXTS:
28454 case TYPE_INSERT:
28455 {
28456 if (! store_data_bypass_p (dep_insn, insn))
28457 return 3;
28458 break;
28459 }
28460 case TYPE_STORE:
28461 case TYPE_FPLOAD:
28462 case TYPE_FPSTORE:
28463 {
28464 if (get_attr_update (dep_insn) == UPDATE_YES
28465 && ! store_data_bypass_p (dep_insn, insn))
28466 return 3;
28467 break;
28468 }
28469 case TYPE_MUL:
28470 {
28471 if (! store_data_bypass_p (dep_insn, insn))
28472 return 17;
28473 break;
28474 }
28475 case TYPE_DIV:
28476 {
28477 if (! store_data_bypass_p (dep_insn, insn))
28478 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
28479 break;
28480 }
28481 default:
28482 break;
28483 }
28484 }
28485 break;
28486
28487 case TYPE_LOAD:
28488 if ((rs6000_cpu == PROCESSOR_POWER6)
28489 && recog_memoized (dep_insn)
28490 && (INSN_CODE (dep_insn) >= 0))
28491 {
28492
28493 /* Adjust the cost for the case where the value written
28494 by a fixed point instruction is used within the address
28495 gen portion of a subsequent load(u)(x) */
28496 switch (get_attr_type (dep_insn))
28497 {
28498 case TYPE_LOAD:
28499 case TYPE_CNTLZ:
28500 {
28501 if (set_to_load_agen (dep_insn, insn))
28502 return get_attr_sign_extend (dep_insn)
28503 == SIGN_EXTEND_YES ? 6 : 4;
28504 break;
28505 }
28506 case TYPE_SHIFT:
28507 {
28508 if (set_to_load_agen (dep_insn, insn))
28509 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
28510 6 : 3;
28511 break;
28512 }
28513 case TYPE_INTEGER:
28514 case TYPE_ADD:
28515 case TYPE_LOGICAL:
28516 case TYPE_EXTS:
28517 case TYPE_INSERT:
28518 {
28519 if (set_to_load_agen (dep_insn, insn))
28520 return 3;
28521 break;
28522 }
28523 case TYPE_STORE:
28524 case TYPE_FPLOAD:
28525 case TYPE_FPSTORE:
28526 {
28527 if (get_attr_update (dep_insn) == UPDATE_YES
28528 && set_to_load_agen (dep_insn, insn))
28529 return 3;
28530 break;
28531 }
28532 case TYPE_MUL:
28533 {
28534 if (set_to_load_agen (dep_insn, insn))
28535 return 17;
28536 break;
28537 }
28538 case TYPE_DIV:
28539 {
28540 if (set_to_load_agen (dep_insn, insn))
28541 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
28542 break;
28543 }
28544 default:
28545 break;
28546 }
28547 }
28548 break;
28549
28550 case TYPE_FPLOAD:
28551 if ((rs6000_cpu == PROCESSOR_POWER6)
28552 && get_attr_update (insn) == UPDATE_NO
28553 && recog_memoized (dep_insn)
28554 && (INSN_CODE (dep_insn) >= 0)
28555 && (get_attr_type (dep_insn) == TYPE_MFFGPR))
28556 return 2;
28557
28558 default:
28559 break;
28560 }
28561
28562 /* Fall out to return default cost. */
28563 }
28564 break;
28565
28566 case REG_DEP_OUTPUT:
28567 /* Output dependency; DEP_INSN writes a register that INSN writes some
28568 cycles later. */
28569 if ((rs6000_cpu == PROCESSOR_POWER6)
28570 && recog_memoized (dep_insn)
28571 && (INSN_CODE (dep_insn) >= 0))
28572 {
28573 attr_type = get_attr_type (insn);
28574
28575 switch (attr_type)
28576 {
28577 case TYPE_FP:
28578 if (get_attr_type (dep_insn) == TYPE_FP)
28579 return 1;
28580 break;
28581 case TYPE_FPLOAD:
28582 if (get_attr_update (insn) == UPDATE_NO
28583 && get_attr_type (dep_insn) == TYPE_MFFGPR)
28584 return 2;
28585 break;
28586 default:
28587 break;
28588 }
28589 }
28590 case REG_DEP_ANTI:
28591 /* Anti dependency; DEP_INSN reads a register that INSN writes some
28592 cycles later. */
28593 return 0;
28594
28595 default:
28596 gcc_unreachable ();
28597 }
28598
28599 return cost;
28600 }
28601
28602 /* Debug version of rs6000_adjust_cost. */
28603
28604 static int
28605 rs6000_debug_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn,
28606 int cost)
28607 {
28608 int ret = rs6000_adjust_cost (insn, link, dep_insn, cost);
28609
28610 if (ret != cost)
28611 {
28612 const char *dep;
28613
28614 switch (REG_NOTE_KIND (link))
28615 {
28616 default: dep = "unknown depencency"; break;
28617 case REG_DEP_TRUE: dep = "data dependency"; break;
28618 case REG_DEP_OUTPUT: dep = "output dependency"; break;
28619 case REG_DEP_ANTI: dep = "anti depencency"; break;
28620 }
28621
28622 fprintf (stderr,
28623 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
28624 "%s, insn:\n", ret, cost, dep);
28625
28626 debug_rtx (insn);
28627 }
28628
28629 return ret;
28630 }
28631
28632 /* The function returns a true if INSN is microcoded.
28633 Return false otherwise. */
28634
28635 static bool
28636 is_microcoded_insn (rtx_insn *insn)
28637 {
28638 if (!insn || !NONDEBUG_INSN_P (insn)
28639 || GET_CODE (PATTERN (insn)) == USE
28640 || GET_CODE (PATTERN (insn)) == CLOBBER)
28641 return false;
28642
28643 if (rs6000_cpu_attr == CPU_CELL)
28644 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
28645
28646 if (rs6000_sched_groups
28647 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
28648 {
28649 enum attr_type type = get_attr_type (insn);
28650 if ((type == TYPE_LOAD
28651 && get_attr_update (insn) == UPDATE_YES
28652 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
28653 || ((type == TYPE_LOAD || type == TYPE_STORE)
28654 && get_attr_update (insn) == UPDATE_YES
28655 && get_attr_indexed (insn) == INDEXED_YES)
28656 || type == TYPE_MFCR)
28657 return true;
28658 }
28659
28660 return false;
28661 }
28662
28663 /* The function returns true if INSN is cracked into 2 instructions
28664 by the processor (and therefore occupies 2 issue slots). */
28665
28666 static bool
28667 is_cracked_insn (rtx_insn *insn)
28668 {
28669 if (!insn || !NONDEBUG_INSN_P (insn)
28670 || GET_CODE (PATTERN (insn)) == USE
28671 || GET_CODE (PATTERN (insn)) == CLOBBER)
28672 return false;
28673
28674 if (rs6000_sched_groups
28675 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
28676 {
28677 enum attr_type type = get_attr_type (insn);
28678 if ((type == TYPE_LOAD
28679 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
28680 && get_attr_update (insn) == UPDATE_NO)
28681 || (type == TYPE_LOAD
28682 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
28683 && get_attr_update (insn) == UPDATE_YES
28684 && get_attr_indexed (insn) == INDEXED_NO)
28685 || (type == TYPE_STORE
28686 && get_attr_update (insn) == UPDATE_YES
28687 && get_attr_indexed (insn) == INDEXED_NO)
28688 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
28689 && get_attr_update (insn) == UPDATE_YES)
28690 || type == TYPE_DELAYED_CR
28691 || (type == TYPE_EXTS
28692 && get_attr_dot (insn) == DOT_YES)
28693 || (type == TYPE_SHIFT
28694 && get_attr_dot (insn) == DOT_YES
28695 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
28696 || (type == TYPE_MUL
28697 && get_attr_dot (insn) == DOT_YES)
28698 || type == TYPE_DIV
28699 || (type == TYPE_INSERT
28700 && get_attr_size (insn) == SIZE_32))
28701 return true;
28702 }
28703
28704 return false;
28705 }
28706
28707 /* The function returns true if INSN can be issued only from
28708 the branch slot. */
28709
28710 static bool
28711 is_branch_slot_insn (rtx_insn *insn)
28712 {
28713 if (!insn || !NONDEBUG_INSN_P (insn)
28714 || GET_CODE (PATTERN (insn)) == USE
28715 || GET_CODE (PATTERN (insn)) == CLOBBER)
28716 return false;
28717
28718 if (rs6000_sched_groups)
28719 {
28720 enum attr_type type = get_attr_type (insn);
28721 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
28722 return true;
28723 return false;
28724 }
28725
28726 return false;
28727 }
28728
28729 /* The function returns true if out_inst sets a value that is
28730 used in the address generation computation of in_insn */
28731 static bool
28732 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
28733 {
28734 rtx out_set, in_set;
28735
28736 /* For performance reasons, only handle the simple case where
28737 both loads are a single_set. */
28738 out_set = single_set (out_insn);
28739 if (out_set)
28740 {
28741 in_set = single_set (in_insn);
28742 if (in_set)
28743 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
28744 }
28745
28746 return false;
28747 }
28748
28749 /* Try to determine base/offset/size parts of the given MEM.
28750 Return true if successful, false if all the values couldn't
28751 be determined.
28752
28753 This function only looks for REG or REG+CONST address forms.
28754 REG+REG address form will return false. */
28755
28756 static bool
28757 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
28758 HOST_WIDE_INT *size)
28759 {
28760 rtx addr_rtx;
28761 if MEM_SIZE_KNOWN_P (mem)
28762 *size = MEM_SIZE (mem);
28763 else
28764 return false;
28765
28766 addr_rtx = (XEXP (mem, 0));
28767 if (GET_CODE (addr_rtx) == PRE_MODIFY)
28768 addr_rtx = XEXP (addr_rtx, 1);
28769
28770 *offset = 0;
28771 while (GET_CODE (addr_rtx) == PLUS
28772 && CONST_INT_P (XEXP (addr_rtx, 1)))
28773 {
28774 *offset += INTVAL (XEXP (addr_rtx, 1));
28775 addr_rtx = XEXP (addr_rtx, 0);
28776 }
28777 if (!REG_P (addr_rtx))
28778 return false;
28779
28780 *base = addr_rtx;
28781 return true;
28782 }
28783
28784 /* The function returns true if the target storage location of
28785 mem1 is adjacent to the target storage location of mem2 */
28786 /* Return 1 if memory locations are adjacent. */
28787
28788 static bool
28789 adjacent_mem_locations (rtx mem1, rtx mem2)
28790 {
28791 rtx reg1, reg2;
28792 HOST_WIDE_INT off1, size1, off2, size2;
28793
28794 if (get_memref_parts (mem1, &reg1, &off1, &size1)
28795 && get_memref_parts (mem2, &reg2, &off2, &size2))
28796 return ((REGNO (reg1) == REGNO (reg2))
28797 && ((off1 + size1 == off2)
28798 || (off2 + size2 == off1)));
28799
28800 return false;
28801 }
28802
28803 /* This function returns true if it can be determined that the two MEM
28804 locations overlap by at least 1 byte based on base reg/offset/size. */
28805
28806 static bool
28807 mem_locations_overlap (rtx mem1, rtx mem2)
28808 {
28809 rtx reg1, reg2;
28810 HOST_WIDE_INT off1, size1, off2, size2;
28811
28812 if (get_memref_parts (mem1, &reg1, &off1, &size1)
28813 && get_memref_parts (mem2, &reg2, &off2, &size2))
28814 return ((REGNO (reg1) == REGNO (reg2))
28815 && (((off1 <= off2) && (off1 + size1 > off2))
28816 || ((off2 <= off1) && (off2 + size2 > off1))));
28817
28818 return false;
28819 }
28820
28821 /* A C statement (sans semicolon) to update the integer scheduling
28822 priority INSN_PRIORITY (INSN). Increase the priority to execute the
28823 INSN earlier, reduce the priority to execute INSN later. Do not
28824 define this macro if you do not need to adjust the scheduling
28825 priorities of insns. */
28826
28827 static int
28828 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
28829 {
28830 rtx load_mem, str_mem;
28831 /* On machines (like the 750) which have asymmetric integer units,
28832 where one integer unit can do multiply and divides and the other
28833 can't, reduce the priority of multiply/divide so it is scheduled
28834 before other integer operations. */
28835
28836 #if 0
28837 if (! INSN_P (insn))
28838 return priority;
28839
28840 if (GET_CODE (PATTERN (insn)) == USE)
28841 return priority;
28842
28843 switch (rs6000_cpu_attr) {
28844 case CPU_PPC750:
28845 switch (get_attr_type (insn))
28846 {
28847 default:
28848 break;
28849
28850 case TYPE_MUL:
28851 case TYPE_DIV:
28852 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
28853 priority, priority);
28854 if (priority >= 0 && priority < 0x01000000)
28855 priority >>= 3;
28856 break;
28857 }
28858 }
28859 #endif
28860
28861 if (insn_must_be_first_in_group (insn)
28862 && reload_completed
28863 && current_sched_info->sched_max_insns_priority
28864 && rs6000_sched_restricted_insns_priority)
28865 {
28866
28867 /* Prioritize insns that can be dispatched only in the first
28868 dispatch slot. */
28869 if (rs6000_sched_restricted_insns_priority == 1)
28870 /* Attach highest priority to insn. This means that in
28871 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
28872 precede 'priority' (critical path) considerations. */
28873 return current_sched_info->sched_max_insns_priority;
28874 else if (rs6000_sched_restricted_insns_priority == 2)
28875 /* Increase priority of insn by a minimal amount. This means that in
28876 haifa-sched.c:ready_sort(), only 'priority' (critical path)
28877 considerations precede dispatch-slot restriction considerations. */
28878 return (priority + 1);
28879 }
28880
28881 if (rs6000_cpu == PROCESSOR_POWER6
28882 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
28883 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
28884 /* Attach highest priority to insn if the scheduler has just issued two
28885 stores and this instruction is a load, or two loads and this instruction
28886 is a store. Power6 wants loads and stores scheduled alternately
28887 when possible */
28888 return current_sched_info->sched_max_insns_priority;
28889
28890 return priority;
28891 }
28892
28893 /* Return true if the instruction is nonpipelined on the Cell. */
28894 static bool
28895 is_nonpipeline_insn (rtx_insn *insn)
28896 {
28897 enum attr_type type;
28898 if (!insn || !NONDEBUG_INSN_P (insn)
28899 || GET_CODE (PATTERN (insn)) == USE
28900 || GET_CODE (PATTERN (insn)) == CLOBBER)
28901 return false;
28902
28903 type = get_attr_type (insn);
28904 if (type == TYPE_MUL
28905 || type == TYPE_DIV
28906 || type == TYPE_SDIV
28907 || type == TYPE_DDIV
28908 || type == TYPE_SSQRT
28909 || type == TYPE_DSQRT
28910 || type == TYPE_MFCR
28911 || type == TYPE_MFCRF
28912 || type == TYPE_MFJMPR)
28913 {
28914 return true;
28915 }
28916 return false;
28917 }
28918
28919
28920 /* Return how many instructions the machine can issue per cycle. */
28921
28922 static int
28923 rs6000_issue_rate (void)
28924 {
28925 /* Unless scheduling for register pressure, use issue rate of 1 for
28926 first scheduling pass to decrease degradation. */
28927 if (!reload_completed && !flag_sched_pressure)
28928 return 1;
28929
28930 switch (rs6000_cpu_attr) {
28931 case CPU_RS64A:
28932 case CPU_PPC601: /* ? */
28933 case CPU_PPC7450:
28934 return 3;
28935 case CPU_PPC440:
28936 case CPU_PPC603:
28937 case CPU_PPC750:
28938 case CPU_PPC7400:
28939 case CPU_PPC8540:
28940 case CPU_PPC8548:
28941 case CPU_CELL:
28942 case CPU_PPCE300C2:
28943 case CPU_PPCE300C3:
28944 case CPU_PPCE500MC:
28945 case CPU_PPCE500MC64:
28946 case CPU_PPCE5500:
28947 case CPU_PPCE6500:
28948 case CPU_TITAN:
28949 return 2;
28950 case CPU_PPC476:
28951 case CPU_PPC604:
28952 case CPU_PPC604E:
28953 case CPU_PPC620:
28954 case CPU_PPC630:
28955 return 4;
28956 case CPU_POWER4:
28957 case CPU_POWER5:
28958 case CPU_POWER6:
28959 case CPU_POWER7:
28960 return 5;
28961 case CPU_POWER8:
28962 case CPU_POWER9:
28963 return 7;
28964 default:
28965 return 1;
28966 }
28967 }
28968
28969 /* Return how many instructions to look ahead for better insn
28970 scheduling. */
28971
28972 static int
28973 rs6000_use_sched_lookahead (void)
28974 {
28975 switch (rs6000_cpu_attr)
28976 {
28977 case CPU_PPC8540:
28978 case CPU_PPC8548:
28979 return 4;
28980
28981 case CPU_CELL:
28982 return (reload_completed ? 8 : 0);
28983
28984 default:
28985 return 0;
28986 }
28987 }
28988
28989 /* We are choosing insn from the ready queue. Return zero if INSN can be
28990 chosen. */
28991 static int
28992 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
28993 {
28994 if (ready_index == 0)
28995 return 0;
28996
28997 if (rs6000_cpu_attr != CPU_CELL)
28998 return 0;
28999
29000 gcc_assert (insn != NULL_RTX && INSN_P (insn));
29001
29002 if (!reload_completed
29003 || is_nonpipeline_insn (insn)
29004 || is_microcoded_insn (insn))
29005 return 1;
29006
29007 return 0;
29008 }
29009
29010 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
29011 and return true. */
29012
29013 static bool
29014 find_mem_ref (rtx pat, rtx *mem_ref)
29015 {
29016 const char * fmt;
29017 int i, j;
29018
29019 /* stack_tie does not produce any real memory traffic. */
29020 if (tie_operand (pat, VOIDmode))
29021 return false;
29022
29023 if (GET_CODE (pat) == MEM)
29024 {
29025 *mem_ref = pat;
29026 return true;
29027 }
29028
29029 /* Recursively process the pattern. */
29030 fmt = GET_RTX_FORMAT (GET_CODE (pat));
29031
29032 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
29033 {
29034 if (fmt[i] == 'e')
29035 {
29036 if (find_mem_ref (XEXP (pat, i), mem_ref))
29037 return true;
29038 }
29039 else if (fmt[i] == 'E')
29040 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
29041 {
29042 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
29043 return true;
29044 }
29045 }
29046
29047 return false;
29048 }
29049
29050 /* Determine if PAT is a PATTERN of a load insn. */
29051
29052 static bool
29053 is_load_insn1 (rtx pat, rtx *load_mem)
29054 {
29055 if (!pat || pat == NULL_RTX)
29056 return false;
29057
29058 if (GET_CODE (pat) == SET)
29059 return find_mem_ref (SET_SRC (pat), load_mem);
29060
29061 if (GET_CODE (pat) == PARALLEL)
29062 {
29063 int i;
29064
29065 for (i = 0; i < XVECLEN (pat, 0); i++)
29066 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
29067 return true;
29068 }
29069
29070 return false;
29071 }
29072
29073 /* Determine if INSN loads from memory. */
29074
29075 static bool
29076 is_load_insn (rtx insn, rtx *load_mem)
29077 {
29078 if (!insn || !INSN_P (insn))
29079 return false;
29080
29081 if (CALL_P (insn))
29082 return false;
29083
29084 return is_load_insn1 (PATTERN (insn), load_mem);
29085 }
29086
29087 /* Determine if PAT is a PATTERN of a store insn. */
29088
29089 static bool
29090 is_store_insn1 (rtx pat, rtx *str_mem)
29091 {
29092 if (!pat || pat == NULL_RTX)
29093 return false;
29094
29095 if (GET_CODE (pat) == SET)
29096 return find_mem_ref (SET_DEST (pat), str_mem);
29097
29098 if (GET_CODE (pat) == PARALLEL)
29099 {
29100 int i;
29101
29102 for (i = 0; i < XVECLEN (pat, 0); i++)
29103 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
29104 return true;
29105 }
29106
29107 return false;
29108 }
29109
29110 /* Determine if INSN stores to memory. */
29111
29112 static bool
29113 is_store_insn (rtx insn, rtx *str_mem)
29114 {
29115 if (!insn || !INSN_P (insn))
29116 return false;
29117
29118 return is_store_insn1 (PATTERN (insn), str_mem);
29119 }
29120
29121 /* Returns whether the dependence between INSN and NEXT is considered
29122 costly by the given target. */
29123
29124 static bool
29125 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
29126 {
29127 rtx insn;
29128 rtx next;
29129 rtx load_mem, str_mem;
29130
29131 /* If the flag is not enabled - no dependence is considered costly;
29132 allow all dependent insns in the same group.
29133 This is the most aggressive option. */
29134 if (rs6000_sched_costly_dep == no_dep_costly)
29135 return false;
29136
29137 /* If the flag is set to 1 - a dependence is always considered costly;
29138 do not allow dependent instructions in the same group.
29139 This is the most conservative option. */
29140 if (rs6000_sched_costly_dep == all_deps_costly)
29141 return true;
29142
29143 insn = DEP_PRO (dep);
29144 next = DEP_CON (dep);
29145
29146 if (rs6000_sched_costly_dep == store_to_load_dep_costly
29147 && is_load_insn (next, &load_mem)
29148 && is_store_insn (insn, &str_mem))
29149 /* Prevent load after store in the same group. */
29150 return true;
29151
29152 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
29153 && is_load_insn (next, &load_mem)
29154 && is_store_insn (insn, &str_mem)
29155 && DEP_TYPE (dep) == REG_DEP_TRUE
29156 && mem_locations_overlap(str_mem, load_mem))
29157 /* Prevent load after store in the same group if it is a true
29158 dependence. */
29159 return true;
29160
29161 /* The flag is set to X; dependences with latency >= X are considered costly,
29162 and will not be scheduled in the same group. */
29163 if (rs6000_sched_costly_dep <= max_dep_latency
29164 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
29165 return true;
29166
29167 return false;
29168 }
29169
29170 /* Return the next insn after INSN that is found before TAIL is reached,
29171 skipping any "non-active" insns - insns that will not actually occupy
29172 an issue slot. Return NULL_RTX if such an insn is not found. */
29173
29174 static rtx_insn *
29175 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
29176 {
29177 if (insn == NULL_RTX || insn == tail)
29178 return NULL;
29179
29180 while (1)
29181 {
29182 insn = NEXT_INSN (insn);
29183 if (insn == NULL_RTX || insn == tail)
29184 return NULL;
29185
29186 if (CALL_P (insn)
29187 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
29188 || (NONJUMP_INSN_P (insn)
29189 && GET_CODE (PATTERN (insn)) != USE
29190 && GET_CODE (PATTERN (insn)) != CLOBBER
29191 && INSN_CODE (insn) != CODE_FOR_stack_tie))
29192 break;
29193 }
29194 return insn;
29195 }
29196
29197 /* We are about to begin issuing insns for this clock cycle. */
29198
29199 static int
29200 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
29201 rtx_insn **ready ATTRIBUTE_UNUSED,
29202 int *pn_ready ATTRIBUTE_UNUSED,
29203 int clock_var ATTRIBUTE_UNUSED)
29204 {
29205 int n_ready = *pn_ready;
29206
29207 if (sched_verbose)
29208 fprintf (dump, "// rs6000_sched_reorder :\n");
29209
29210 /* Reorder the ready list, if the second to last ready insn
29211 is a nonepipeline insn. */
29212 if (rs6000_cpu_attr == CPU_CELL && n_ready > 1)
29213 {
29214 if (is_nonpipeline_insn (ready[n_ready - 1])
29215 && (recog_memoized (ready[n_ready - 2]) > 0))
29216 /* Simply swap first two insns. */
29217 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
29218 }
29219
29220 if (rs6000_cpu == PROCESSOR_POWER6)
29221 load_store_pendulum = 0;
29222
29223 return rs6000_issue_rate ();
29224 }
29225
29226 /* Like rs6000_sched_reorder, but called after issuing each insn. */
29227
29228 static int
29229 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
29230 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
29231 {
29232 if (sched_verbose)
29233 fprintf (dump, "// rs6000_sched_reorder2 :\n");
29234
29235 /* For Power6, we need to handle some special cases to try and keep the
29236 store queue from overflowing and triggering expensive flushes.
29237
29238 This code monitors how load and store instructions are being issued
29239 and skews the ready list one way or the other to increase the likelihood
29240 that a desired instruction is issued at the proper time.
29241
29242 A couple of things are done. First, we maintain a "load_store_pendulum"
29243 to track the current state of load/store issue.
29244
29245 - If the pendulum is at zero, then no loads or stores have been
29246 issued in the current cycle so we do nothing.
29247
29248 - If the pendulum is 1, then a single load has been issued in this
29249 cycle and we attempt to locate another load in the ready list to
29250 issue with it.
29251
29252 - If the pendulum is -2, then two stores have already been
29253 issued in this cycle, so we increase the priority of the first load
29254 in the ready list to increase it's likelihood of being chosen first
29255 in the next cycle.
29256
29257 - If the pendulum is -1, then a single store has been issued in this
29258 cycle and we attempt to locate another store in the ready list to
29259 issue with it, preferring a store to an adjacent memory location to
29260 facilitate store pairing in the store queue.
29261
29262 - If the pendulum is 2, then two loads have already been
29263 issued in this cycle, so we increase the priority of the first store
29264 in the ready list to increase it's likelihood of being chosen first
29265 in the next cycle.
29266
29267 - If the pendulum < -2 or > 2, then do nothing.
29268
29269 Note: This code covers the most common scenarios. There exist non
29270 load/store instructions which make use of the LSU and which
29271 would need to be accounted for to strictly model the behavior
29272 of the machine. Those instructions are currently unaccounted
29273 for to help minimize compile time overhead of this code.
29274 */
29275 if (rs6000_cpu == PROCESSOR_POWER6 && last_scheduled_insn)
29276 {
29277 int pos;
29278 int i;
29279 rtx_insn *tmp;
29280 rtx load_mem, str_mem;
29281
29282 if (is_store_insn (last_scheduled_insn, &str_mem))
29283 /* Issuing a store, swing the load_store_pendulum to the left */
29284 load_store_pendulum--;
29285 else if (is_load_insn (last_scheduled_insn, &load_mem))
29286 /* Issuing a load, swing the load_store_pendulum to the right */
29287 load_store_pendulum++;
29288 else
29289 return cached_can_issue_more;
29290
29291 /* If the pendulum is balanced, or there is only one instruction on
29292 the ready list, then all is well, so return. */
29293 if ((load_store_pendulum == 0) || (*pn_ready <= 1))
29294 return cached_can_issue_more;
29295
29296 if (load_store_pendulum == 1)
29297 {
29298 /* A load has been issued in this cycle. Scan the ready list
29299 for another load to issue with it */
29300 pos = *pn_ready-1;
29301
29302 while (pos >= 0)
29303 {
29304 if (is_load_insn (ready[pos], &load_mem))
29305 {
29306 /* Found a load. Move it to the head of the ready list,
29307 and adjust it's priority so that it is more likely to
29308 stay there */
29309 tmp = ready[pos];
29310 for (i=pos; i<*pn_ready-1; i++)
29311 ready[i] = ready[i + 1];
29312 ready[*pn_ready-1] = tmp;
29313
29314 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
29315 INSN_PRIORITY (tmp)++;
29316 break;
29317 }
29318 pos--;
29319 }
29320 }
29321 else if (load_store_pendulum == -2)
29322 {
29323 /* Two stores have been issued in this cycle. Increase the
29324 priority of the first load in the ready list to favor it for
29325 issuing in the next cycle. */
29326 pos = *pn_ready-1;
29327
29328 while (pos >= 0)
29329 {
29330 if (is_load_insn (ready[pos], &load_mem)
29331 && !sel_sched_p ()
29332 && INSN_PRIORITY_KNOWN (ready[pos]))
29333 {
29334 INSN_PRIORITY (ready[pos])++;
29335
29336 /* Adjust the pendulum to account for the fact that a load
29337 was found and increased in priority. This is to prevent
29338 increasing the priority of multiple loads */
29339 load_store_pendulum--;
29340
29341 break;
29342 }
29343 pos--;
29344 }
29345 }
29346 else if (load_store_pendulum == -1)
29347 {
29348 /* A store has been issued in this cycle. Scan the ready list for
29349 another store to issue with it, preferring a store to an adjacent
29350 memory location */
29351 int first_store_pos = -1;
29352
29353 pos = *pn_ready-1;
29354
29355 while (pos >= 0)
29356 {
29357 if (is_store_insn (ready[pos], &str_mem))
29358 {
29359 rtx str_mem2;
29360 /* Maintain the index of the first store found on the
29361 list */
29362 if (first_store_pos == -1)
29363 first_store_pos = pos;
29364
29365 if (is_store_insn (last_scheduled_insn, &str_mem2)
29366 && adjacent_mem_locations (str_mem, str_mem2))
29367 {
29368 /* Found an adjacent store. Move it to the head of the
29369 ready list, and adjust it's priority so that it is
29370 more likely to stay there */
29371 tmp = ready[pos];
29372 for (i=pos; i<*pn_ready-1; i++)
29373 ready[i] = ready[i + 1];
29374 ready[*pn_ready-1] = tmp;
29375
29376 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
29377 INSN_PRIORITY (tmp)++;
29378
29379 first_store_pos = -1;
29380
29381 break;
29382 };
29383 }
29384 pos--;
29385 }
29386
29387 if (first_store_pos >= 0)
29388 {
29389 /* An adjacent store wasn't found, but a non-adjacent store was,
29390 so move the non-adjacent store to the front of the ready
29391 list, and adjust its priority so that it is more likely to
29392 stay there. */
29393 tmp = ready[first_store_pos];
29394 for (i=first_store_pos; i<*pn_ready-1; i++)
29395 ready[i] = ready[i + 1];
29396 ready[*pn_ready-1] = tmp;
29397 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
29398 INSN_PRIORITY (tmp)++;
29399 }
29400 }
29401 else if (load_store_pendulum == 2)
29402 {
29403 /* Two loads have been issued in this cycle. Increase the priority
29404 of the first store in the ready list to favor it for issuing in
29405 the next cycle. */
29406 pos = *pn_ready-1;
29407
29408 while (pos >= 0)
29409 {
29410 if (is_store_insn (ready[pos], &str_mem)
29411 && !sel_sched_p ()
29412 && INSN_PRIORITY_KNOWN (ready[pos]))
29413 {
29414 INSN_PRIORITY (ready[pos])++;
29415
29416 /* Adjust the pendulum to account for the fact that a store
29417 was found and increased in priority. This is to prevent
29418 increasing the priority of multiple stores */
29419 load_store_pendulum++;
29420
29421 break;
29422 }
29423 pos--;
29424 }
29425 }
29426 }
29427
29428 return cached_can_issue_more;
29429 }
29430
29431 /* Return whether the presence of INSN causes a dispatch group termination
29432 of group WHICH_GROUP.
29433
29434 If WHICH_GROUP == current_group, this function will return true if INSN
29435 causes the termination of the current group (i.e, the dispatch group to
29436 which INSN belongs). This means that INSN will be the last insn in the
29437 group it belongs to.
29438
29439 If WHICH_GROUP == previous_group, this function will return true if INSN
29440 causes the termination of the previous group (i.e, the dispatch group that
29441 precedes the group to which INSN belongs). This means that INSN will be
29442 the first insn in the group it belongs to). */
29443
29444 static bool
29445 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
29446 {
29447 bool first, last;
29448
29449 if (! insn)
29450 return false;
29451
29452 first = insn_must_be_first_in_group (insn);
29453 last = insn_must_be_last_in_group (insn);
29454
29455 if (first && last)
29456 return true;
29457
29458 if (which_group == current_group)
29459 return last;
29460 else if (which_group == previous_group)
29461 return first;
29462
29463 return false;
29464 }
29465
29466
29467 static bool
29468 insn_must_be_first_in_group (rtx_insn *insn)
29469 {
29470 enum attr_type type;
29471
29472 if (!insn
29473 || NOTE_P (insn)
29474 || DEBUG_INSN_P (insn)
29475 || GET_CODE (PATTERN (insn)) == USE
29476 || GET_CODE (PATTERN (insn)) == CLOBBER)
29477 return false;
29478
29479 switch (rs6000_cpu)
29480 {
29481 case PROCESSOR_POWER5:
29482 if (is_cracked_insn (insn))
29483 return true;
29484 case PROCESSOR_POWER4:
29485 if (is_microcoded_insn (insn))
29486 return true;
29487
29488 if (!rs6000_sched_groups)
29489 return false;
29490
29491 type = get_attr_type (insn);
29492
29493 switch (type)
29494 {
29495 case TYPE_MFCR:
29496 case TYPE_MFCRF:
29497 case TYPE_MTCR:
29498 case TYPE_DELAYED_CR:
29499 case TYPE_CR_LOGICAL:
29500 case TYPE_MTJMPR:
29501 case TYPE_MFJMPR:
29502 case TYPE_DIV:
29503 case TYPE_LOAD_L:
29504 case TYPE_STORE_C:
29505 case TYPE_ISYNC:
29506 case TYPE_SYNC:
29507 return true;
29508 default:
29509 break;
29510 }
29511 break;
29512 case PROCESSOR_POWER6:
29513 type = get_attr_type (insn);
29514
29515 switch (type)
29516 {
29517 case TYPE_EXTS:
29518 case TYPE_CNTLZ:
29519 case TYPE_TRAP:
29520 case TYPE_MUL:
29521 case TYPE_INSERT:
29522 case TYPE_FPCOMPARE:
29523 case TYPE_MFCR:
29524 case TYPE_MTCR:
29525 case TYPE_MFJMPR:
29526 case TYPE_MTJMPR:
29527 case TYPE_ISYNC:
29528 case TYPE_SYNC:
29529 case TYPE_LOAD_L:
29530 case TYPE_STORE_C:
29531 return true;
29532 case TYPE_SHIFT:
29533 if (get_attr_dot (insn) == DOT_NO
29534 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
29535 return true;
29536 else
29537 break;
29538 case TYPE_DIV:
29539 if (get_attr_size (insn) == SIZE_32)
29540 return true;
29541 else
29542 break;
29543 case TYPE_LOAD:
29544 case TYPE_STORE:
29545 case TYPE_FPLOAD:
29546 case TYPE_FPSTORE:
29547 if (get_attr_update (insn) == UPDATE_YES)
29548 return true;
29549 else
29550 break;
29551 default:
29552 break;
29553 }
29554 break;
29555 case PROCESSOR_POWER7:
29556 type = get_attr_type (insn);
29557
29558 switch (type)
29559 {
29560 case TYPE_CR_LOGICAL:
29561 case TYPE_MFCR:
29562 case TYPE_MFCRF:
29563 case TYPE_MTCR:
29564 case TYPE_DIV:
29565 case TYPE_ISYNC:
29566 case TYPE_LOAD_L:
29567 case TYPE_STORE_C:
29568 case TYPE_MFJMPR:
29569 case TYPE_MTJMPR:
29570 return true;
29571 case TYPE_MUL:
29572 case TYPE_SHIFT:
29573 case TYPE_EXTS:
29574 if (get_attr_dot (insn) == DOT_YES)
29575 return true;
29576 else
29577 break;
29578 case TYPE_LOAD:
29579 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
29580 || get_attr_update (insn) == UPDATE_YES)
29581 return true;
29582 else
29583 break;
29584 case TYPE_STORE:
29585 case TYPE_FPLOAD:
29586 case TYPE_FPSTORE:
29587 if (get_attr_update (insn) == UPDATE_YES)
29588 return true;
29589 else
29590 break;
29591 default:
29592 break;
29593 }
29594 break;
29595 case PROCESSOR_POWER8:
29596 case PROCESSOR_POWER9:
29597 type = get_attr_type (insn);
29598
29599 switch (type)
29600 {
29601 case TYPE_CR_LOGICAL:
29602 case TYPE_DELAYED_CR:
29603 case TYPE_MFCR:
29604 case TYPE_MFCRF:
29605 case TYPE_MTCR:
29606 case TYPE_SYNC:
29607 case TYPE_ISYNC:
29608 case TYPE_LOAD_L:
29609 case TYPE_STORE_C:
29610 case TYPE_VECSTORE:
29611 case TYPE_MFJMPR:
29612 case TYPE_MTJMPR:
29613 return true;
29614 case TYPE_SHIFT:
29615 case TYPE_EXTS:
29616 case TYPE_MUL:
29617 if (get_attr_dot (insn) == DOT_YES)
29618 return true;
29619 else
29620 break;
29621 case TYPE_LOAD:
29622 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
29623 || get_attr_update (insn) == UPDATE_YES)
29624 return true;
29625 else
29626 break;
29627 case TYPE_STORE:
29628 if (get_attr_update (insn) == UPDATE_YES
29629 && get_attr_indexed (insn) == INDEXED_YES)
29630 return true;
29631 else
29632 break;
29633 default:
29634 break;
29635 }
29636 break;
29637 default:
29638 break;
29639 }
29640
29641 return false;
29642 }
29643
29644 static bool
29645 insn_must_be_last_in_group (rtx_insn *insn)
29646 {
29647 enum attr_type type;
29648
29649 if (!insn
29650 || NOTE_P (insn)
29651 || DEBUG_INSN_P (insn)
29652 || GET_CODE (PATTERN (insn)) == USE
29653 || GET_CODE (PATTERN (insn)) == CLOBBER)
29654 return false;
29655
29656 switch (rs6000_cpu) {
29657 case PROCESSOR_POWER4:
29658 case PROCESSOR_POWER5:
29659 if (is_microcoded_insn (insn))
29660 return true;
29661
29662 if (is_branch_slot_insn (insn))
29663 return true;
29664
29665 break;
29666 case PROCESSOR_POWER6:
29667 type = get_attr_type (insn);
29668
29669 switch (type)
29670 {
29671 case TYPE_EXTS:
29672 case TYPE_CNTLZ:
29673 case TYPE_TRAP:
29674 case TYPE_MUL:
29675 case TYPE_FPCOMPARE:
29676 case TYPE_MFCR:
29677 case TYPE_MTCR:
29678 case TYPE_MFJMPR:
29679 case TYPE_MTJMPR:
29680 case TYPE_ISYNC:
29681 case TYPE_SYNC:
29682 case TYPE_LOAD_L:
29683 case TYPE_STORE_C:
29684 return true;
29685 case TYPE_SHIFT:
29686 if (get_attr_dot (insn) == DOT_NO
29687 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
29688 return true;
29689 else
29690 break;
29691 case TYPE_DIV:
29692 if (get_attr_size (insn) == SIZE_32)
29693 return true;
29694 else
29695 break;
29696 default:
29697 break;
29698 }
29699 break;
29700 case PROCESSOR_POWER7:
29701 type = get_attr_type (insn);
29702
29703 switch (type)
29704 {
29705 case TYPE_ISYNC:
29706 case TYPE_SYNC:
29707 case TYPE_LOAD_L:
29708 case TYPE_STORE_C:
29709 return true;
29710 case TYPE_LOAD:
29711 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
29712 && get_attr_update (insn) == UPDATE_YES)
29713 return true;
29714 else
29715 break;
29716 case TYPE_STORE:
29717 if (get_attr_update (insn) == UPDATE_YES
29718 && get_attr_indexed (insn) == INDEXED_YES)
29719 return true;
29720 else
29721 break;
29722 default:
29723 break;
29724 }
29725 break;
29726 case PROCESSOR_POWER8:
29727 case PROCESSOR_POWER9:
29728 type = get_attr_type (insn);
29729
29730 switch (type)
29731 {
29732 case TYPE_MFCR:
29733 case TYPE_MTCR:
29734 case TYPE_ISYNC:
29735 case TYPE_SYNC:
29736 case TYPE_LOAD_L:
29737 case TYPE_STORE_C:
29738 return true;
29739 case TYPE_LOAD:
29740 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
29741 && get_attr_update (insn) == UPDATE_YES)
29742 return true;
29743 else
29744 break;
29745 case TYPE_STORE:
29746 if (get_attr_update (insn) == UPDATE_YES
29747 && get_attr_indexed (insn) == INDEXED_YES)
29748 return true;
29749 else
29750 break;
29751 default:
29752 break;
29753 }
29754 break;
29755 default:
29756 break;
29757 }
29758
29759 return false;
29760 }
29761
29762 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
29763 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
29764
29765 static bool
29766 is_costly_group (rtx *group_insns, rtx next_insn)
29767 {
29768 int i;
29769 int issue_rate = rs6000_issue_rate ();
29770
29771 for (i = 0; i < issue_rate; i++)
29772 {
29773 sd_iterator_def sd_it;
29774 dep_t dep;
29775 rtx insn = group_insns[i];
29776
29777 if (!insn)
29778 continue;
29779
29780 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
29781 {
29782 rtx next = DEP_CON (dep);
29783
29784 if (next == next_insn
29785 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
29786 return true;
29787 }
29788 }
29789
29790 return false;
29791 }
29792
29793 /* Utility of the function redefine_groups.
29794 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
29795 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
29796 to keep it "far" (in a separate group) from GROUP_INSNS, following
29797 one of the following schemes, depending on the value of the flag
29798 -minsert_sched_nops = X:
29799 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
29800 in order to force NEXT_INSN into a separate group.
29801 (2) X < sched_finish_regroup_exact: insert exactly X nops.
29802 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
29803 insertion (has a group just ended, how many vacant issue slots remain in the
29804 last group, and how many dispatch groups were encountered so far). */
29805
29806 static int
29807 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
29808 rtx_insn *next_insn, bool *group_end, int can_issue_more,
29809 int *group_count)
29810 {
29811 rtx nop;
29812 bool force;
29813 int issue_rate = rs6000_issue_rate ();
29814 bool end = *group_end;
29815 int i;
29816
29817 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
29818 return can_issue_more;
29819
29820 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
29821 return can_issue_more;
29822
29823 force = is_costly_group (group_insns, next_insn);
29824 if (!force)
29825 return can_issue_more;
29826
29827 if (sched_verbose > 6)
29828 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
29829 *group_count ,can_issue_more);
29830
29831 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
29832 {
29833 if (*group_end)
29834 can_issue_more = 0;
29835
29836 /* Since only a branch can be issued in the last issue_slot, it is
29837 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
29838 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
29839 in this case the last nop will start a new group and the branch
29840 will be forced to the new group. */
29841 if (can_issue_more && !is_branch_slot_insn (next_insn))
29842 can_issue_more--;
29843
29844 /* Do we have a special group ending nop? */
29845 if (rs6000_cpu_attr == CPU_POWER6 || rs6000_cpu_attr == CPU_POWER7
29846 || rs6000_cpu_attr == CPU_POWER8 || rs6000_cpu_attr == CPU_POWER9)
29847 {
29848 nop = gen_group_ending_nop ();
29849 emit_insn_before (nop, next_insn);
29850 can_issue_more = 0;
29851 }
29852 else
29853 while (can_issue_more > 0)
29854 {
29855 nop = gen_nop ();
29856 emit_insn_before (nop, next_insn);
29857 can_issue_more--;
29858 }
29859
29860 *group_end = true;
29861 return 0;
29862 }
29863
29864 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
29865 {
29866 int n_nops = rs6000_sched_insert_nops;
29867
29868 /* Nops can't be issued from the branch slot, so the effective
29869 issue_rate for nops is 'issue_rate - 1'. */
29870 if (can_issue_more == 0)
29871 can_issue_more = issue_rate;
29872 can_issue_more--;
29873 if (can_issue_more == 0)
29874 {
29875 can_issue_more = issue_rate - 1;
29876 (*group_count)++;
29877 end = true;
29878 for (i = 0; i < issue_rate; i++)
29879 {
29880 group_insns[i] = 0;
29881 }
29882 }
29883
29884 while (n_nops > 0)
29885 {
29886 nop = gen_nop ();
29887 emit_insn_before (nop, next_insn);
29888 if (can_issue_more == issue_rate - 1) /* new group begins */
29889 end = false;
29890 can_issue_more--;
29891 if (can_issue_more == 0)
29892 {
29893 can_issue_more = issue_rate - 1;
29894 (*group_count)++;
29895 end = true;
29896 for (i = 0; i < issue_rate; i++)
29897 {
29898 group_insns[i] = 0;
29899 }
29900 }
29901 n_nops--;
29902 }
29903
29904 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
29905 can_issue_more++;
29906
29907 /* Is next_insn going to start a new group? */
29908 *group_end
29909 = (end
29910 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
29911 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
29912 || (can_issue_more < issue_rate &&
29913 insn_terminates_group_p (next_insn, previous_group)));
29914 if (*group_end && end)
29915 (*group_count)--;
29916
29917 if (sched_verbose > 6)
29918 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
29919 *group_count, can_issue_more);
29920 return can_issue_more;
29921 }
29922
29923 return can_issue_more;
29924 }
29925
29926 /* This function tries to synch the dispatch groups that the compiler "sees"
29927 with the dispatch groups that the processor dispatcher is expected to
29928 form in practice. It tries to achieve this synchronization by forcing the
29929 estimated processor grouping on the compiler (as opposed to the function
29930 'pad_goups' which tries to force the scheduler's grouping on the processor).
29931
29932 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
29933 examines the (estimated) dispatch groups that will be formed by the processor
29934 dispatcher. It marks these group boundaries to reflect the estimated
29935 processor grouping, overriding the grouping that the scheduler had marked.
29936 Depending on the value of the flag '-minsert-sched-nops' this function can
29937 force certain insns into separate groups or force a certain distance between
29938 them by inserting nops, for example, if there exists a "costly dependence"
29939 between the insns.
29940
29941 The function estimates the group boundaries that the processor will form as
29942 follows: It keeps track of how many vacant issue slots are available after
29943 each insn. A subsequent insn will start a new group if one of the following
29944 4 cases applies:
29945 - no more vacant issue slots remain in the current dispatch group.
29946 - only the last issue slot, which is the branch slot, is vacant, but the next
29947 insn is not a branch.
29948 - only the last 2 or less issue slots, including the branch slot, are vacant,
29949 which means that a cracked insn (which occupies two issue slots) can't be
29950 issued in this group.
29951 - less than 'issue_rate' slots are vacant, and the next insn always needs to
29952 start a new group. */
29953
29954 static int
29955 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
29956 rtx_insn *tail)
29957 {
29958 rtx_insn *insn, *next_insn;
29959 int issue_rate;
29960 int can_issue_more;
29961 int slot, i;
29962 bool group_end;
29963 int group_count = 0;
29964 rtx *group_insns;
29965
29966 /* Initialize. */
29967 issue_rate = rs6000_issue_rate ();
29968 group_insns = XALLOCAVEC (rtx, issue_rate);
29969 for (i = 0; i < issue_rate; i++)
29970 {
29971 group_insns[i] = 0;
29972 }
29973 can_issue_more = issue_rate;
29974 slot = 0;
29975 insn = get_next_active_insn (prev_head_insn, tail);
29976 group_end = false;
29977
29978 while (insn != NULL_RTX)
29979 {
29980 slot = (issue_rate - can_issue_more);
29981 group_insns[slot] = insn;
29982 can_issue_more =
29983 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
29984 if (insn_terminates_group_p (insn, current_group))
29985 can_issue_more = 0;
29986
29987 next_insn = get_next_active_insn (insn, tail);
29988 if (next_insn == NULL_RTX)
29989 return group_count + 1;
29990
29991 /* Is next_insn going to start a new group? */
29992 group_end
29993 = (can_issue_more == 0
29994 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
29995 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
29996 || (can_issue_more < issue_rate &&
29997 insn_terminates_group_p (next_insn, previous_group)));
29998
29999 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
30000 next_insn, &group_end, can_issue_more,
30001 &group_count);
30002
30003 if (group_end)
30004 {
30005 group_count++;
30006 can_issue_more = 0;
30007 for (i = 0; i < issue_rate; i++)
30008 {
30009 group_insns[i] = 0;
30010 }
30011 }
30012
30013 if (GET_MODE (next_insn) == TImode && can_issue_more)
30014 PUT_MODE (next_insn, VOIDmode);
30015 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
30016 PUT_MODE (next_insn, TImode);
30017
30018 insn = next_insn;
30019 if (can_issue_more == 0)
30020 can_issue_more = issue_rate;
30021 } /* while */
30022
30023 return group_count;
30024 }
30025
30026 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
30027 dispatch group boundaries that the scheduler had marked. Pad with nops
30028 any dispatch groups which have vacant issue slots, in order to force the
30029 scheduler's grouping on the processor dispatcher. The function
30030 returns the number of dispatch groups found. */
30031
30032 static int
30033 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
30034 rtx_insn *tail)
30035 {
30036 rtx_insn *insn, *next_insn;
30037 rtx nop;
30038 int issue_rate;
30039 int can_issue_more;
30040 int group_end;
30041 int group_count = 0;
30042
30043 /* Initialize issue_rate. */
30044 issue_rate = rs6000_issue_rate ();
30045 can_issue_more = issue_rate;
30046
30047 insn = get_next_active_insn (prev_head_insn, tail);
30048 next_insn = get_next_active_insn (insn, tail);
30049
30050 while (insn != NULL_RTX)
30051 {
30052 can_issue_more =
30053 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
30054
30055 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
30056
30057 if (next_insn == NULL_RTX)
30058 break;
30059
30060 if (group_end)
30061 {
30062 /* If the scheduler had marked group termination at this location
30063 (between insn and next_insn), and neither insn nor next_insn will
30064 force group termination, pad the group with nops to force group
30065 termination. */
30066 if (can_issue_more
30067 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
30068 && !insn_terminates_group_p (insn, current_group)
30069 && !insn_terminates_group_p (next_insn, previous_group))
30070 {
30071 if (!is_branch_slot_insn (next_insn))
30072 can_issue_more--;
30073
30074 while (can_issue_more)
30075 {
30076 nop = gen_nop ();
30077 emit_insn_before (nop, next_insn);
30078 can_issue_more--;
30079 }
30080 }
30081
30082 can_issue_more = issue_rate;
30083 group_count++;
30084 }
30085
30086 insn = next_insn;
30087 next_insn = get_next_active_insn (insn, tail);
30088 }
30089
30090 return group_count;
30091 }
30092
30093 /* We're beginning a new block. Initialize data structures as necessary. */
30094
30095 static void
30096 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
30097 int sched_verbose ATTRIBUTE_UNUSED,
30098 int max_ready ATTRIBUTE_UNUSED)
30099 {
30100 last_scheduled_insn = NULL_RTX;
30101 load_store_pendulum = 0;
30102 }
30103
30104 /* The following function is called at the end of scheduling BB.
30105 After reload, it inserts nops at insn group bundling. */
30106
30107 static void
30108 rs6000_sched_finish (FILE *dump, int sched_verbose)
30109 {
30110 int n_groups;
30111
30112 if (sched_verbose)
30113 fprintf (dump, "=== Finishing schedule.\n");
30114
30115 if (reload_completed && rs6000_sched_groups)
30116 {
30117 /* Do not run sched_finish hook when selective scheduling enabled. */
30118 if (sel_sched_p ())
30119 return;
30120
30121 if (rs6000_sched_insert_nops == sched_finish_none)
30122 return;
30123
30124 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
30125 n_groups = pad_groups (dump, sched_verbose,
30126 current_sched_info->prev_head,
30127 current_sched_info->next_tail);
30128 else
30129 n_groups = redefine_groups (dump, sched_verbose,
30130 current_sched_info->prev_head,
30131 current_sched_info->next_tail);
30132
30133 if (sched_verbose >= 6)
30134 {
30135 fprintf (dump, "ngroups = %d\n", n_groups);
30136 print_rtl (dump, current_sched_info->prev_head);
30137 fprintf (dump, "Done finish_sched\n");
30138 }
30139 }
30140 }
30141
30142 struct _rs6000_sched_context
30143 {
30144 short cached_can_issue_more;
30145 rtx last_scheduled_insn;
30146 int load_store_pendulum;
30147 };
30148
30149 typedef struct _rs6000_sched_context rs6000_sched_context_def;
30150 typedef rs6000_sched_context_def *rs6000_sched_context_t;
30151
30152 /* Allocate store for new scheduling context. */
30153 static void *
30154 rs6000_alloc_sched_context (void)
30155 {
30156 return xmalloc (sizeof (rs6000_sched_context_def));
30157 }
30158
30159 /* If CLEAN_P is true then initializes _SC with clean data,
30160 and from the global context otherwise. */
30161 static void
30162 rs6000_init_sched_context (void *_sc, bool clean_p)
30163 {
30164 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
30165
30166 if (clean_p)
30167 {
30168 sc->cached_can_issue_more = 0;
30169 sc->last_scheduled_insn = NULL_RTX;
30170 sc->load_store_pendulum = 0;
30171 }
30172 else
30173 {
30174 sc->cached_can_issue_more = cached_can_issue_more;
30175 sc->last_scheduled_insn = last_scheduled_insn;
30176 sc->load_store_pendulum = load_store_pendulum;
30177 }
30178 }
30179
30180 /* Sets the global scheduling context to the one pointed to by _SC. */
30181 static void
30182 rs6000_set_sched_context (void *_sc)
30183 {
30184 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
30185
30186 gcc_assert (sc != NULL);
30187
30188 cached_can_issue_more = sc->cached_can_issue_more;
30189 last_scheduled_insn = sc->last_scheduled_insn;
30190 load_store_pendulum = sc->load_store_pendulum;
30191 }
30192
30193 /* Free _SC. */
30194 static void
30195 rs6000_free_sched_context (void *_sc)
30196 {
30197 gcc_assert (_sc != NULL);
30198
30199 free (_sc);
30200 }
30201
30202 \f
30203 /* Length in units of the trampoline for entering a nested function. */
30204
30205 int
30206 rs6000_trampoline_size (void)
30207 {
30208 int ret = 0;
30209
30210 switch (DEFAULT_ABI)
30211 {
30212 default:
30213 gcc_unreachable ();
30214
30215 case ABI_AIX:
30216 ret = (TARGET_32BIT) ? 12 : 24;
30217 break;
30218
30219 case ABI_ELFv2:
30220 gcc_assert (!TARGET_32BIT);
30221 ret = 32;
30222 break;
30223
30224 case ABI_DARWIN:
30225 case ABI_V4:
30226 ret = (TARGET_32BIT) ? 40 : 48;
30227 break;
30228 }
30229
30230 return ret;
30231 }
30232
30233 /* Emit RTL insns to initialize the variable parts of a trampoline.
30234 FNADDR is an RTX for the address of the function's pure code.
30235 CXT is an RTX for the static chain value for the function. */
30236
30237 static void
30238 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
30239 {
30240 int regsize = (TARGET_32BIT) ? 4 : 8;
30241 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
30242 rtx ctx_reg = force_reg (Pmode, cxt);
30243 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
30244
30245 switch (DEFAULT_ABI)
30246 {
30247 default:
30248 gcc_unreachable ();
30249
30250 /* Under AIX, just build the 3 word function descriptor */
30251 case ABI_AIX:
30252 {
30253 rtx fnmem, fn_reg, toc_reg;
30254
30255 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
30256 error ("You cannot take the address of a nested function if you use "
30257 "the -mno-pointers-to-nested-functions option.");
30258
30259 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
30260 fn_reg = gen_reg_rtx (Pmode);
30261 toc_reg = gen_reg_rtx (Pmode);
30262
30263 /* Macro to shorten the code expansions below. */
30264 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
30265
30266 m_tramp = replace_equiv_address (m_tramp, addr);
30267
30268 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
30269 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
30270 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
30271 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
30272 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
30273
30274 # undef MEM_PLUS
30275 }
30276 break;
30277
30278 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
30279 case ABI_ELFv2:
30280 case ABI_DARWIN:
30281 case ABI_V4:
30282 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
30283 LCT_NORMAL, VOIDmode, 4,
30284 addr, Pmode,
30285 GEN_INT (rs6000_trampoline_size ()), SImode,
30286 fnaddr, Pmode,
30287 ctx_reg, Pmode);
30288 break;
30289 }
30290 }
30291
30292 \f
30293 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
30294 identifier as an argument, so the front end shouldn't look it up. */
30295
30296 static bool
30297 rs6000_attribute_takes_identifier_p (const_tree attr_id)
30298 {
30299 return is_attribute_p ("altivec", attr_id);
30300 }
30301
30302 /* Handle the "altivec" attribute. The attribute may have
30303 arguments as follows:
30304
30305 __attribute__((altivec(vector__)))
30306 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
30307 __attribute__((altivec(bool__))) (always followed by 'unsigned')
30308
30309 and may appear more than once (e.g., 'vector bool char') in a
30310 given declaration. */
30311
30312 static tree
30313 rs6000_handle_altivec_attribute (tree *node,
30314 tree name ATTRIBUTE_UNUSED,
30315 tree args,
30316 int flags ATTRIBUTE_UNUSED,
30317 bool *no_add_attrs)
30318 {
30319 tree type = *node, result = NULL_TREE;
30320 machine_mode mode;
30321 int unsigned_p;
30322 char altivec_type
30323 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
30324 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
30325 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
30326 : '?');
30327
30328 while (POINTER_TYPE_P (type)
30329 || TREE_CODE (type) == FUNCTION_TYPE
30330 || TREE_CODE (type) == METHOD_TYPE
30331 || TREE_CODE (type) == ARRAY_TYPE)
30332 type = TREE_TYPE (type);
30333
30334 mode = TYPE_MODE (type);
30335
30336 /* Check for invalid AltiVec type qualifiers. */
30337 if (type == long_double_type_node)
30338 error ("use of %<long double%> in AltiVec types is invalid");
30339 else if (type == boolean_type_node)
30340 error ("use of boolean types in AltiVec types is invalid");
30341 else if (TREE_CODE (type) == COMPLEX_TYPE)
30342 error ("use of %<complex%> in AltiVec types is invalid");
30343 else if (DECIMAL_FLOAT_MODE_P (mode))
30344 error ("use of decimal floating point types in AltiVec types is invalid");
30345 else if (!TARGET_VSX)
30346 {
30347 if (type == long_unsigned_type_node || type == long_integer_type_node)
30348 {
30349 if (TARGET_64BIT)
30350 error ("use of %<long%> in AltiVec types is invalid for "
30351 "64-bit code without -mvsx");
30352 else if (rs6000_warn_altivec_long)
30353 warning (0, "use of %<long%> in AltiVec types is deprecated; "
30354 "use %<int%>");
30355 }
30356 else if (type == long_long_unsigned_type_node
30357 || type == long_long_integer_type_node)
30358 error ("use of %<long long%> in AltiVec types is invalid without "
30359 "-mvsx");
30360 else if (type == double_type_node)
30361 error ("use of %<double%> in AltiVec types is invalid without -mvsx");
30362 }
30363
30364 switch (altivec_type)
30365 {
30366 case 'v':
30367 unsigned_p = TYPE_UNSIGNED (type);
30368 switch (mode)
30369 {
30370 case TImode:
30371 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
30372 break;
30373 case DImode:
30374 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
30375 break;
30376 case SImode:
30377 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
30378 break;
30379 case HImode:
30380 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
30381 break;
30382 case QImode:
30383 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
30384 break;
30385 case SFmode: result = V4SF_type_node; break;
30386 case DFmode: result = V2DF_type_node; break;
30387 /* If the user says 'vector int bool', we may be handed the 'bool'
30388 attribute _before_ the 'vector' attribute, and so select the
30389 proper type in the 'b' case below. */
30390 case V4SImode: case V8HImode: case V16QImode: case V4SFmode:
30391 case V2DImode: case V2DFmode:
30392 result = type;
30393 default: break;
30394 }
30395 break;
30396 case 'b':
30397 switch (mode)
30398 {
30399 case DImode: case V2DImode: result = bool_V2DI_type_node; break;
30400 case SImode: case V4SImode: result = bool_V4SI_type_node; break;
30401 case HImode: case V8HImode: result = bool_V8HI_type_node; break;
30402 case QImode: case V16QImode: result = bool_V16QI_type_node;
30403 default: break;
30404 }
30405 break;
30406 case 'p':
30407 switch (mode)
30408 {
30409 case V8HImode: result = pixel_V8HI_type_node;
30410 default: break;
30411 }
30412 default: break;
30413 }
30414
30415 /* Propagate qualifiers attached to the element type
30416 onto the vector type. */
30417 if (result && result != type && TYPE_QUALS (type))
30418 result = build_qualified_type (result, TYPE_QUALS (type));
30419
30420 *no_add_attrs = true; /* No need to hang on to the attribute. */
30421
30422 if (result)
30423 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
30424
30425 return NULL_TREE;
30426 }
30427
30428 /* AltiVec defines four built-in scalar types that serve as vector
30429 elements; we must teach the compiler how to mangle them. */
30430
30431 static const char *
30432 rs6000_mangle_type (const_tree type)
30433 {
30434 type = TYPE_MAIN_VARIANT (type);
30435
30436 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
30437 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
30438 return NULL;
30439
30440 if (type == bool_char_type_node) return "U6__boolc";
30441 if (type == bool_short_type_node) return "U6__bools";
30442 if (type == pixel_type_node) return "u7__pixel";
30443 if (type == bool_int_type_node) return "U6__booli";
30444 if (type == bool_long_type_node) return "U6__booll";
30445
30446 /* Use a unique name for __float128 rather than trying to use "e" or "g". Use
30447 "g" for IBM extended double, no matter whether it is long double (using
30448 -mabi=ibmlongdouble) or the distinct __ibm128 type. */
30449 if (TARGET_FLOAT128)
30450 {
30451 if (type == ieee128_float_type_node)
30452 return "U10__float128";
30453
30454 if (type == ibm128_float_type_node)
30455 return "g";
30456
30457 if (type == long_double_type_node && TARGET_LONG_DOUBLE_128)
30458 return (TARGET_IEEEQUAD) ? "U10__float128" : "g";
30459 }
30460
30461 /* Mangle IBM extended float long double as `g' (__float128) on
30462 powerpc*-linux where long-double-64 previously was the default. */
30463 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
30464 && TARGET_ELF
30465 && TARGET_LONG_DOUBLE_128
30466 && !TARGET_IEEEQUAD)
30467 return "g";
30468
30469 /* For all other types, use normal C++ mangling. */
30470 return NULL;
30471 }
30472
30473 /* Handle a "longcall" or "shortcall" attribute; arguments as in
30474 struct attribute_spec.handler. */
30475
30476 static tree
30477 rs6000_handle_longcall_attribute (tree *node, tree name,
30478 tree args ATTRIBUTE_UNUSED,
30479 int flags ATTRIBUTE_UNUSED,
30480 bool *no_add_attrs)
30481 {
30482 if (TREE_CODE (*node) != FUNCTION_TYPE
30483 && TREE_CODE (*node) != FIELD_DECL
30484 && TREE_CODE (*node) != TYPE_DECL)
30485 {
30486 warning (OPT_Wattributes, "%qE attribute only applies to functions",
30487 name);
30488 *no_add_attrs = true;
30489 }
30490
30491 return NULL_TREE;
30492 }
30493
30494 /* Set longcall attributes on all functions declared when
30495 rs6000_default_long_calls is true. */
30496 static void
30497 rs6000_set_default_type_attributes (tree type)
30498 {
30499 if (rs6000_default_long_calls
30500 && (TREE_CODE (type) == FUNCTION_TYPE
30501 || TREE_CODE (type) == METHOD_TYPE))
30502 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
30503 NULL_TREE,
30504 TYPE_ATTRIBUTES (type));
30505
30506 #if TARGET_MACHO
30507 darwin_set_default_type_attributes (type);
30508 #endif
30509 }
30510
30511 /* Return a reference suitable for calling a function with the
30512 longcall attribute. */
30513
30514 rtx
30515 rs6000_longcall_ref (rtx call_ref)
30516 {
30517 const char *call_name;
30518 tree node;
30519
30520 if (GET_CODE (call_ref) != SYMBOL_REF)
30521 return call_ref;
30522
30523 /* System V adds '.' to the internal name, so skip them. */
30524 call_name = XSTR (call_ref, 0);
30525 if (*call_name == '.')
30526 {
30527 while (*call_name == '.')
30528 call_name++;
30529
30530 node = get_identifier (call_name);
30531 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
30532 }
30533
30534 return force_reg (Pmode, call_ref);
30535 }
30536 \f
30537 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
30538 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
30539 #endif
30540
30541 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
30542 struct attribute_spec.handler. */
30543 static tree
30544 rs6000_handle_struct_attribute (tree *node, tree name,
30545 tree args ATTRIBUTE_UNUSED,
30546 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
30547 {
30548 tree *type = NULL;
30549 if (DECL_P (*node))
30550 {
30551 if (TREE_CODE (*node) == TYPE_DECL)
30552 type = &TREE_TYPE (*node);
30553 }
30554 else
30555 type = node;
30556
30557 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
30558 || TREE_CODE (*type) == UNION_TYPE)))
30559 {
30560 warning (OPT_Wattributes, "%qE attribute ignored", name);
30561 *no_add_attrs = true;
30562 }
30563
30564 else if ((is_attribute_p ("ms_struct", name)
30565 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
30566 || ((is_attribute_p ("gcc_struct", name)
30567 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
30568 {
30569 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
30570 name);
30571 *no_add_attrs = true;
30572 }
30573
30574 return NULL_TREE;
30575 }
30576
30577 static bool
30578 rs6000_ms_bitfield_layout_p (const_tree record_type)
30579 {
30580 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
30581 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
30582 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
30583 }
30584 \f
30585 #ifdef USING_ELFOS_H
30586
30587 /* A get_unnamed_section callback, used for switching to toc_section. */
30588
30589 static void
30590 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
30591 {
30592 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
30593 && TARGET_MINIMAL_TOC
30594 && !TARGET_RELOCATABLE)
30595 {
30596 if (!toc_initialized)
30597 {
30598 toc_initialized = 1;
30599 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
30600 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
30601 fprintf (asm_out_file, "\t.tc ");
30602 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
30603 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
30604 fprintf (asm_out_file, "\n");
30605
30606 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
30607 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
30608 fprintf (asm_out_file, " = .+32768\n");
30609 }
30610 else
30611 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
30612 }
30613 else if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
30614 && !TARGET_RELOCATABLE)
30615 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
30616 else
30617 {
30618 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
30619 if (!toc_initialized)
30620 {
30621 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
30622 fprintf (asm_out_file, " = .+32768\n");
30623 toc_initialized = 1;
30624 }
30625 }
30626 }
30627
30628 /* Implement TARGET_ASM_INIT_SECTIONS. */
30629
30630 static void
30631 rs6000_elf_asm_init_sections (void)
30632 {
30633 toc_section
30634 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
30635
30636 sdata2_section
30637 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
30638 SDATA2_SECTION_ASM_OP);
30639 }
30640
30641 /* Implement TARGET_SELECT_RTX_SECTION. */
30642
30643 static section *
30644 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
30645 unsigned HOST_WIDE_INT align)
30646 {
30647 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
30648 return toc_section;
30649 else
30650 return default_elf_select_rtx_section (mode, x, align);
30651 }
30652 \f
30653 /* For a SYMBOL_REF, set generic flags and then perform some
30654 target-specific processing.
30655
30656 When the AIX ABI is requested on a non-AIX system, replace the
30657 function name with the real name (with a leading .) rather than the
30658 function descriptor name. This saves a lot of overriding code to
30659 read the prefixes. */
30660
30661 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
30662 static void
30663 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
30664 {
30665 default_encode_section_info (decl, rtl, first);
30666
30667 if (first
30668 && TREE_CODE (decl) == FUNCTION_DECL
30669 && !TARGET_AIX
30670 && DEFAULT_ABI == ABI_AIX)
30671 {
30672 rtx sym_ref = XEXP (rtl, 0);
30673 size_t len = strlen (XSTR (sym_ref, 0));
30674 char *str = XALLOCAVEC (char, len + 2);
30675 str[0] = '.';
30676 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
30677 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
30678 }
30679 }
30680
30681 static inline bool
30682 compare_section_name (const char *section, const char *templ)
30683 {
30684 int len;
30685
30686 len = strlen (templ);
30687 return (strncmp (section, templ, len) == 0
30688 && (section[len] == 0 || section[len] == '.'));
30689 }
30690
30691 bool
30692 rs6000_elf_in_small_data_p (const_tree decl)
30693 {
30694 if (rs6000_sdata == SDATA_NONE)
30695 return false;
30696
30697 /* We want to merge strings, so we never consider them small data. */
30698 if (TREE_CODE (decl) == STRING_CST)
30699 return false;
30700
30701 /* Functions are never in the small data area. */
30702 if (TREE_CODE (decl) == FUNCTION_DECL)
30703 return false;
30704
30705 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
30706 {
30707 const char *section = DECL_SECTION_NAME (decl);
30708 if (compare_section_name (section, ".sdata")
30709 || compare_section_name (section, ".sdata2")
30710 || compare_section_name (section, ".gnu.linkonce.s")
30711 || compare_section_name (section, ".sbss")
30712 || compare_section_name (section, ".sbss2")
30713 || compare_section_name (section, ".gnu.linkonce.sb")
30714 || strcmp (section, ".PPC.EMB.sdata0") == 0
30715 || strcmp (section, ".PPC.EMB.sbss0") == 0)
30716 return true;
30717 }
30718 else
30719 {
30720 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
30721
30722 if (size > 0
30723 && size <= g_switch_value
30724 /* If it's not public, and we're not going to reference it there,
30725 there's no need to put it in the small data section. */
30726 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
30727 return true;
30728 }
30729
30730 return false;
30731 }
30732
30733 #endif /* USING_ELFOS_H */
30734 \f
30735 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
30736
30737 static bool
30738 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
30739 {
30740 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
30741 }
30742
30743 /* Do not place thread-local symbols refs in the object blocks. */
30744
30745 static bool
30746 rs6000_use_blocks_for_decl_p (const_tree decl)
30747 {
30748 return !DECL_THREAD_LOCAL_P (decl);
30749 }
30750 \f
30751 /* Return a REG that occurs in ADDR with coefficient 1.
30752 ADDR can be effectively incremented by incrementing REG.
30753
30754 r0 is special and we must not select it as an address
30755 register by this routine since our caller will try to
30756 increment the returned register via an "la" instruction. */
30757
30758 rtx
30759 find_addr_reg (rtx addr)
30760 {
30761 while (GET_CODE (addr) == PLUS)
30762 {
30763 if (GET_CODE (XEXP (addr, 0)) == REG
30764 && REGNO (XEXP (addr, 0)) != 0)
30765 addr = XEXP (addr, 0);
30766 else if (GET_CODE (XEXP (addr, 1)) == REG
30767 && REGNO (XEXP (addr, 1)) != 0)
30768 addr = XEXP (addr, 1);
30769 else if (CONSTANT_P (XEXP (addr, 0)))
30770 addr = XEXP (addr, 1);
30771 else if (CONSTANT_P (XEXP (addr, 1)))
30772 addr = XEXP (addr, 0);
30773 else
30774 gcc_unreachable ();
30775 }
30776 gcc_assert (GET_CODE (addr) == REG && REGNO (addr) != 0);
30777 return addr;
30778 }
30779
30780 void
30781 rs6000_fatal_bad_address (rtx op)
30782 {
30783 fatal_insn ("bad address", op);
30784 }
30785
30786 #if TARGET_MACHO
30787
30788 typedef struct branch_island_d {
30789 tree function_name;
30790 tree label_name;
30791 int line_number;
30792 } branch_island;
30793
30794
30795 static vec<branch_island, va_gc> *branch_islands;
30796
30797 /* Remember to generate a branch island for far calls to the given
30798 function. */
30799
30800 static void
30801 add_compiler_branch_island (tree label_name, tree function_name,
30802 int line_number)
30803 {
30804 branch_island bi = {function_name, label_name, line_number};
30805 vec_safe_push (branch_islands, bi);
30806 }
30807
30808 /* Generate far-jump branch islands for everything recorded in
30809 branch_islands. Invoked immediately after the last instruction of
30810 the epilogue has been emitted; the branch islands must be appended
30811 to, and contiguous with, the function body. Mach-O stubs are
30812 generated in machopic_output_stub(). */
30813
30814 static void
30815 macho_branch_islands (void)
30816 {
30817 char tmp_buf[512];
30818
30819 while (!vec_safe_is_empty (branch_islands))
30820 {
30821 branch_island *bi = &branch_islands->last ();
30822 const char *label = IDENTIFIER_POINTER (bi->label_name);
30823 const char *name = IDENTIFIER_POINTER (bi->function_name);
30824 char name_buf[512];
30825 /* Cheap copy of the details from the Darwin ASM_OUTPUT_LABELREF(). */
30826 if (name[0] == '*' || name[0] == '&')
30827 strcpy (name_buf, name+1);
30828 else
30829 {
30830 name_buf[0] = '_';
30831 strcpy (name_buf+1, name);
30832 }
30833 strcpy (tmp_buf, "\n");
30834 strcat (tmp_buf, label);
30835 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
30836 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
30837 dbxout_stabd (N_SLINE, bi->line_number);
30838 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
30839 if (flag_pic)
30840 {
30841 if (TARGET_LINK_STACK)
30842 {
30843 char name[32];
30844 get_ppc476_thunk_name (name);
30845 strcat (tmp_buf, ":\n\tmflr r0\n\tbl ");
30846 strcat (tmp_buf, name);
30847 strcat (tmp_buf, "\n");
30848 strcat (tmp_buf, label);
30849 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
30850 }
30851 else
30852 {
30853 strcat (tmp_buf, ":\n\tmflr r0\n\tbcl 20,31,");
30854 strcat (tmp_buf, label);
30855 strcat (tmp_buf, "_pic\n");
30856 strcat (tmp_buf, label);
30857 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
30858 }
30859
30860 strcat (tmp_buf, "\taddis r11,r11,ha16(");
30861 strcat (tmp_buf, name_buf);
30862 strcat (tmp_buf, " - ");
30863 strcat (tmp_buf, label);
30864 strcat (tmp_buf, "_pic)\n");
30865
30866 strcat (tmp_buf, "\tmtlr r0\n");
30867
30868 strcat (tmp_buf, "\taddi r12,r11,lo16(");
30869 strcat (tmp_buf, name_buf);
30870 strcat (tmp_buf, " - ");
30871 strcat (tmp_buf, label);
30872 strcat (tmp_buf, "_pic)\n");
30873
30874 strcat (tmp_buf, "\tmtctr r12\n\tbctr\n");
30875 }
30876 else
30877 {
30878 strcat (tmp_buf, ":\nlis r12,hi16(");
30879 strcat (tmp_buf, name_buf);
30880 strcat (tmp_buf, ")\n\tori r12,r12,lo16(");
30881 strcat (tmp_buf, name_buf);
30882 strcat (tmp_buf, ")\n\tmtctr r12\n\tbctr");
30883 }
30884 output_asm_insn (tmp_buf, 0);
30885 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
30886 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
30887 dbxout_stabd (N_SLINE, bi->line_number);
30888 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
30889 branch_islands->pop ();
30890 }
30891 }
30892
30893 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
30894 already there or not. */
30895
30896 static int
30897 no_previous_def (tree function_name)
30898 {
30899 branch_island *bi;
30900 unsigned ix;
30901
30902 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
30903 if (function_name == bi->function_name)
30904 return 0;
30905 return 1;
30906 }
30907
30908 /* GET_PREV_LABEL gets the label name from the previous definition of
30909 the function. */
30910
30911 static tree
30912 get_prev_label (tree function_name)
30913 {
30914 branch_island *bi;
30915 unsigned ix;
30916
30917 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
30918 if (function_name == bi->function_name)
30919 return bi->label_name;
30920 return NULL_TREE;
30921 }
30922
30923 /* INSN is either a function call or a millicode call. It may have an
30924 unconditional jump in its delay slot.
30925
30926 CALL_DEST is the routine we are calling. */
30927
30928 char *
30929 output_call (rtx_insn *insn, rtx *operands, int dest_operand_number,
30930 int cookie_operand_number)
30931 {
30932 static char buf[256];
30933 if (darwin_emit_branch_islands
30934 && GET_CODE (operands[dest_operand_number]) == SYMBOL_REF
30935 && (INTVAL (operands[cookie_operand_number]) & CALL_LONG))
30936 {
30937 tree labelname;
30938 tree funname = get_identifier (XSTR (operands[dest_operand_number], 0));
30939
30940 if (no_previous_def (funname))
30941 {
30942 rtx label_rtx = gen_label_rtx ();
30943 char *label_buf, temp_buf[256];
30944 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
30945 CODE_LABEL_NUMBER (label_rtx));
30946 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
30947 labelname = get_identifier (label_buf);
30948 add_compiler_branch_island (labelname, funname, insn_line (insn));
30949 }
30950 else
30951 labelname = get_prev_label (funname);
30952
30953 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
30954 instruction will reach 'foo', otherwise link as 'bl L42'".
30955 "L42" should be a 'branch island', that will do a far jump to
30956 'foo'. Branch islands are generated in
30957 macho_branch_islands(). */
30958 sprintf (buf, "jbsr %%z%d,%.246s",
30959 dest_operand_number, IDENTIFIER_POINTER (labelname));
30960 }
30961 else
30962 sprintf (buf, "bl %%z%d", dest_operand_number);
30963 return buf;
30964 }
30965
30966 /* Generate PIC and indirect symbol stubs. */
30967
30968 void
30969 machopic_output_stub (FILE *file, const char *symb, const char *stub)
30970 {
30971 unsigned int length;
30972 char *symbol_name, *lazy_ptr_name;
30973 char *local_label_0;
30974 static int label = 0;
30975
30976 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
30977 symb = (*targetm.strip_name_encoding) (symb);
30978
30979
30980 length = strlen (symb);
30981 symbol_name = XALLOCAVEC (char, length + 32);
30982 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
30983
30984 lazy_ptr_name = XALLOCAVEC (char, length + 32);
30985 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
30986
30987 if (flag_pic == 2)
30988 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
30989 else
30990 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
30991
30992 if (flag_pic == 2)
30993 {
30994 fprintf (file, "\t.align 5\n");
30995
30996 fprintf (file, "%s:\n", stub);
30997 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
30998
30999 label++;
31000 local_label_0 = XALLOCAVEC (char, sizeof ("\"L00000000000$spb\""));
31001 sprintf (local_label_0, "\"L%011d$spb\"", label);
31002
31003 fprintf (file, "\tmflr r0\n");
31004 if (TARGET_LINK_STACK)
31005 {
31006 char name[32];
31007 get_ppc476_thunk_name (name);
31008 fprintf (file, "\tbl %s\n", name);
31009 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
31010 }
31011 else
31012 {
31013 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
31014 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
31015 }
31016 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
31017 lazy_ptr_name, local_label_0);
31018 fprintf (file, "\tmtlr r0\n");
31019 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
31020 (TARGET_64BIT ? "ldu" : "lwzu"),
31021 lazy_ptr_name, local_label_0);
31022 fprintf (file, "\tmtctr r12\n");
31023 fprintf (file, "\tbctr\n");
31024 }
31025 else
31026 {
31027 fprintf (file, "\t.align 4\n");
31028
31029 fprintf (file, "%s:\n", stub);
31030 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
31031
31032 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
31033 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
31034 (TARGET_64BIT ? "ldu" : "lwzu"),
31035 lazy_ptr_name);
31036 fprintf (file, "\tmtctr r12\n");
31037 fprintf (file, "\tbctr\n");
31038 }
31039
31040 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
31041 fprintf (file, "%s:\n", lazy_ptr_name);
31042 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
31043 fprintf (file, "%sdyld_stub_binding_helper\n",
31044 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
31045 }
31046
31047 /* Legitimize PIC addresses. If the address is already
31048 position-independent, we return ORIG. Newly generated
31049 position-independent addresses go into a reg. This is REG if non
31050 zero, otherwise we allocate register(s) as necessary. */
31051
31052 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
31053
31054 rtx
31055 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
31056 rtx reg)
31057 {
31058 rtx base, offset;
31059
31060 if (reg == NULL && ! reload_in_progress && ! reload_completed)
31061 reg = gen_reg_rtx (Pmode);
31062
31063 if (GET_CODE (orig) == CONST)
31064 {
31065 rtx reg_temp;
31066
31067 if (GET_CODE (XEXP (orig, 0)) == PLUS
31068 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
31069 return orig;
31070
31071 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
31072
31073 /* Use a different reg for the intermediate value, as
31074 it will be marked UNCHANGING. */
31075 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
31076 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
31077 Pmode, reg_temp);
31078 offset =
31079 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
31080 Pmode, reg);
31081
31082 if (GET_CODE (offset) == CONST_INT)
31083 {
31084 if (SMALL_INT (offset))
31085 return plus_constant (Pmode, base, INTVAL (offset));
31086 else if (! reload_in_progress && ! reload_completed)
31087 offset = force_reg (Pmode, offset);
31088 else
31089 {
31090 rtx mem = force_const_mem (Pmode, orig);
31091 return machopic_legitimize_pic_address (mem, Pmode, reg);
31092 }
31093 }
31094 return gen_rtx_PLUS (Pmode, base, offset);
31095 }
31096
31097 /* Fall back on generic machopic code. */
31098 return machopic_legitimize_pic_address (orig, mode, reg);
31099 }
31100
31101 /* Output a .machine directive for the Darwin assembler, and call
31102 the generic start_file routine. */
31103
31104 static void
31105 rs6000_darwin_file_start (void)
31106 {
31107 static const struct
31108 {
31109 const char *arg;
31110 const char *name;
31111 HOST_WIDE_INT if_set;
31112 } mapping[] = {
31113 { "ppc64", "ppc64", MASK_64BIT },
31114 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
31115 { "power4", "ppc970", 0 },
31116 { "G5", "ppc970", 0 },
31117 { "7450", "ppc7450", 0 },
31118 { "7400", "ppc7400", MASK_ALTIVEC },
31119 { "G4", "ppc7400", 0 },
31120 { "750", "ppc750", 0 },
31121 { "740", "ppc750", 0 },
31122 { "G3", "ppc750", 0 },
31123 { "604e", "ppc604e", 0 },
31124 { "604", "ppc604", 0 },
31125 { "603e", "ppc603", 0 },
31126 { "603", "ppc603", 0 },
31127 { "601", "ppc601", 0 },
31128 { NULL, "ppc", 0 } };
31129 const char *cpu_id = "";
31130 size_t i;
31131
31132 rs6000_file_start ();
31133 darwin_file_start ();
31134
31135 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
31136
31137 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
31138 cpu_id = rs6000_default_cpu;
31139
31140 if (global_options_set.x_rs6000_cpu_index)
31141 cpu_id = processor_target_table[rs6000_cpu_index].name;
31142
31143 /* Look through the mapping array. Pick the first name that either
31144 matches the argument, has a bit set in IF_SET that is also set
31145 in the target flags, or has a NULL name. */
31146
31147 i = 0;
31148 while (mapping[i].arg != NULL
31149 && strcmp (mapping[i].arg, cpu_id) != 0
31150 && (mapping[i].if_set & rs6000_isa_flags) == 0)
31151 i++;
31152
31153 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
31154 }
31155
31156 #endif /* TARGET_MACHO */
31157
31158 #if TARGET_ELF
31159 static int
31160 rs6000_elf_reloc_rw_mask (void)
31161 {
31162 if (flag_pic)
31163 return 3;
31164 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
31165 return 2;
31166 else
31167 return 0;
31168 }
31169
31170 /* Record an element in the table of global constructors. SYMBOL is
31171 a SYMBOL_REF of the function to be called; PRIORITY is a number
31172 between 0 and MAX_INIT_PRIORITY.
31173
31174 This differs from default_named_section_asm_out_constructor in
31175 that we have special handling for -mrelocatable. */
31176
31177 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
31178 static void
31179 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
31180 {
31181 const char *section = ".ctors";
31182 char buf[16];
31183
31184 if (priority != DEFAULT_INIT_PRIORITY)
31185 {
31186 sprintf (buf, ".ctors.%.5u",
31187 /* Invert the numbering so the linker puts us in the proper
31188 order; constructors are run from right to left, and the
31189 linker sorts in increasing order. */
31190 MAX_INIT_PRIORITY - priority);
31191 section = buf;
31192 }
31193
31194 switch_to_section (get_section (section, SECTION_WRITE, NULL));
31195 assemble_align (POINTER_SIZE);
31196
31197 if (TARGET_RELOCATABLE)
31198 {
31199 fputs ("\t.long (", asm_out_file);
31200 output_addr_const (asm_out_file, symbol);
31201 fputs (")@fixup\n", asm_out_file);
31202 }
31203 else
31204 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
31205 }
31206
31207 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
31208 static void
31209 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
31210 {
31211 const char *section = ".dtors";
31212 char buf[16];
31213
31214 if (priority != DEFAULT_INIT_PRIORITY)
31215 {
31216 sprintf (buf, ".dtors.%.5u",
31217 /* Invert the numbering so the linker puts us in the proper
31218 order; constructors are run from right to left, and the
31219 linker sorts in increasing order. */
31220 MAX_INIT_PRIORITY - priority);
31221 section = buf;
31222 }
31223
31224 switch_to_section (get_section (section, SECTION_WRITE, NULL));
31225 assemble_align (POINTER_SIZE);
31226
31227 if (TARGET_RELOCATABLE)
31228 {
31229 fputs ("\t.long (", asm_out_file);
31230 output_addr_const (asm_out_file, symbol);
31231 fputs (")@fixup\n", asm_out_file);
31232 }
31233 else
31234 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
31235 }
31236
31237 void
31238 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
31239 {
31240 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
31241 {
31242 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
31243 ASM_OUTPUT_LABEL (file, name);
31244 fputs (DOUBLE_INT_ASM_OP, file);
31245 rs6000_output_function_entry (file, name);
31246 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
31247 if (DOT_SYMBOLS)
31248 {
31249 fputs ("\t.size\t", file);
31250 assemble_name (file, name);
31251 fputs (",24\n\t.type\t.", file);
31252 assemble_name (file, name);
31253 fputs (",@function\n", file);
31254 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
31255 {
31256 fputs ("\t.globl\t.", file);
31257 assemble_name (file, name);
31258 putc ('\n', file);
31259 }
31260 }
31261 else
31262 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
31263 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
31264 rs6000_output_function_entry (file, name);
31265 fputs (":\n", file);
31266 return;
31267 }
31268
31269 if (TARGET_RELOCATABLE
31270 && !TARGET_SECURE_PLT
31271 && (get_pool_size () != 0 || crtl->profile)
31272 && uses_TOC ())
31273 {
31274 char buf[256];
31275
31276 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
31277
31278 ASM_GENERATE_INTERNAL_LABEL (buf, "LCTOC", 1);
31279 fprintf (file, "\t.long ");
31280 assemble_name (file, buf);
31281 putc ('-', file);
31282 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
31283 assemble_name (file, buf);
31284 putc ('\n', file);
31285 }
31286
31287 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
31288 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
31289
31290 if (DEFAULT_ABI == ABI_AIX)
31291 {
31292 const char *desc_name, *orig_name;
31293
31294 orig_name = (*targetm.strip_name_encoding) (name);
31295 desc_name = orig_name;
31296 while (*desc_name == '.')
31297 desc_name++;
31298
31299 if (TREE_PUBLIC (decl))
31300 fprintf (file, "\t.globl %s\n", desc_name);
31301
31302 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
31303 fprintf (file, "%s:\n", desc_name);
31304 fprintf (file, "\t.long %s\n", orig_name);
31305 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
31306 fputs ("\t.long 0\n", file);
31307 fprintf (file, "\t.previous\n");
31308 }
31309 ASM_OUTPUT_LABEL (file, name);
31310 }
31311
31312 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
31313 static void
31314 rs6000_elf_file_end (void)
31315 {
31316 #ifdef HAVE_AS_GNU_ATTRIBUTE
31317 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
31318 {
31319 if (rs6000_passes_float)
31320 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n",
31321 ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT) ? 1
31322 : (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT) ? 3
31323 : 2));
31324 if (rs6000_passes_vector)
31325 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
31326 (TARGET_ALTIVEC_ABI ? 2
31327 : TARGET_SPE_ABI ? 3
31328 : 1));
31329 if (rs6000_returns_struct)
31330 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
31331 aix_struct_return ? 2 : 1);
31332 }
31333 #endif
31334 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
31335 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
31336 file_end_indicate_exec_stack ();
31337 #endif
31338
31339 if (flag_split_stack)
31340 file_end_indicate_split_stack ();
31341 }
31342 #endif
31343
31344 #if TARGET_XCOFF
31345
31346 #ifndef HAVE_XCOFF_DWARF_EXTRAS
31347 #define HAVE_XCOFF_DWARF_EXTRAS 0
31348 #endif
31349
31350 static enum unwind_info_type
31351 rs6000_xcoff_debug_unwind_info (void)
31352 {
31353 return UI_NONE;
31354 }
31355
31356 static void
31357 rs6000_xcoff_asm_output_anchor (rtx symbol)
31358 {
31359 char buffer[100];
31360
31361 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
31362 SYMBOL_REF_BLOCK_OFFSET (symbol));
31363 fprintf (asm_out_file, "%s", SET_ASM_OP);
31364 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
31365 fprintf (asm_out_file, ",");
31366 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
31367 fprintf (asm_out_file, "\n");
31368 }
31369
31370 static void
31371 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
31372 {
31373 fputs (GLOBAL_ASM_OP, stream);
31374 RS6000_OUTPUT_BASENAME (stream, name);
31375 putc ('\n', stream);
31376 }
31377
31378 /* A get_unnamed_decl callback, used for read-only sections. PTR
31379 points to the section string variable. */
31380
31381 static void
31382 rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
31383 {
31384 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
31385 *(const char *const *) directive,
31386 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
31387 }
31388
31389 /* Likewise for read-write sections. */
31390
31391 static void
31392 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
31393 {
31394 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
31395 *(const char *const *) directive,
31396 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
31397 }
31398
31399 static void
31400 rs6000_xcoff_output_tls_section_asm_op (const void *directive)
31401 {
31402 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
31403 *(const char *const *) directive,
31404 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
31405 }
31406
31407 /* A get_unnamed_section callback, used for switching to toc_section. */
31408
31409 static void
31410 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
31411 {
31412 if (TARGET_MINIMAL_TOC)
31413 {
31414 /* toc_section is always selected at least once from
31415 rs6000_xcoff_file_start, so this is guaranteed to
31416 always be defined once and only once in each file. */
31417 if (!toc_initialized)
31418 {
31419 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
31420 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
31421 toc_initialized = 1;
31422 }
31423 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
31424 (TARGET_32BIT ? "" : ",3"));
31425 }
31426 else
31427 fputs ("\t.toc\n", asm_out_file);
31428 }
31429
31430 /* Implement TARGET_ASM_INIT_SECTIONS. */
31431
31432 static void
31433 rs6000_xcoff_asm_init_sections (void)
31434 {
31435 read_only_data_section
31436 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
31437 &xcoff_read_only_section_name);
31438
31439 private_data_section
31440 = get_unnamed_section (SECTION_WRITE,
31441 rs6000_xcoff_output_readwrite_section_asm_op,
31442 &xcoff_private_data_section_name);
31443
31444 tls_data_section
31445 = get_unnamed_section (SECTION_TLS,
31446 rs6000_xcoff_output_tls_section_asm_op,
31447 &xcoff_tls_data_section_name);
31448
31449 tls_private_data_section
31450 = get_unnamed_section (SECTION_TLS,
31451 rs6000_xcoff_output_tls_section_asm_op,
31452 &xcoff_private_data_section_name);
31453
31454 read_only_private_data_section
31455 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
31456 &xcoff_private_data_section_name);
31457
31458 toc_section
31459 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
31460
31461 readonly_data_section = read_only_data_section;
31462 }
31463
31464 static int
31465 rs6000_xcoff_reloc_rw_mask (void)
31466 {
31467 return 3;
31468 }
31469
31470 static void
31471 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
31472 tree decl ATTRIBUTE_UNUSED)
31473 {
31474 int smclass;
31475 static const char * const suffix[5] = { "PR", "RO", "RW", "TL", "XO" };
31476
31477 if (flags & SECTION_EXCLUDE)
31478 smclass = 4;
31479 else if (flags & SECTION_DEBUG)
31480 {
31481 fprintf (asm_out_file, "\t.dwsect %s\n", name);
31482 return;
31483 }
31484 else if (flags & SECTION_CODE)
31485 smclass = 0;
31486 else if (flags & SECTION_TLS)
31487 smclass = 3;
31488 else if (flags & SECTION_WRITE)
31489 smclass = 2;
31490 else
31491 smclass = 1;
31492
31493 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
31494 (flags & SECTION_CODE) ? "." : "",
31495 name, suffix[smclass], flags & SECTION_ENTSIZE);
31496 }
31497
31498 #define IN_NAMED_SECTION(DECL) \
31499 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
31500 && DECL_SECTION_NAME (DECL) != NULL)
31501
31502 static section *
31503 rs6000_xcoff_select_section (tree decl, int reloc,
31504 unsigned HOST_WIDE_INT align)
31505 {
31506 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
31507 named section. */
31508 if (align > BIGGEST_ALIGNMENT)
31509 {
31510 resolve_unique_section (decl, reloc, true);
31511 if (IN_NAMED_SECTION (decl))
31512 return get_named_section (decl, NULL, reloc);
31513 }
31514
31515 if (decl_readonly_section (decl, reloc))
31516 {
31517 if (TREE_PUBLIC (decl))
31518 return read_only_data_section;
31519 else
31520 return read_only_private_data_section;
31521 }
31522 else
31523 {
31524 #if HAVE_AS_TLS
31525 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
31526 {
31527 if (TREE_PUBLIC (decl))
31528 return tls_data_section;
31529 else if (bss_initializer_p (decl))
31530 {
31531 /* Convert to COMMON to emit in BSS. */
31532 DECL_COMMON (decl) = 1;
31533 return tls_comm_section;
31534 }
31535 else
31536 return tls_private_data_section;
31537 }
31538 else
31539 #endif
31540 if (TREE_PUBLIC (decl))
31541 return data_section;
31542 else
31543 return private_data_section;
31544 }
31545 }
31546
31547 static void
31548 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
31549 {
31550 const char *name;
31551
31552 /* Use select_section for private data and uninitialized data with
31553 alignment <= BIGGEST_ALIGNMENT. */
31554 if (!TREE_PUBLIC (decl)
31555 || DECL_COMMON (decl)
31556 || (DECL_INITIAL (decl) == NULL_TREE
31557 && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT)
31558 || DECL_INITIAL (decl) == error_mark_node
31559 || (flag_zero_initialized_in_bss
31560 && initializer_zerop (DECL_INITIAL (decl))))
31561 return;
31562
31563 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
31564 name = (*targetm.strip_name_encoding) (name);
31565 set_decl_section_name (decl, name);
31566 }
31567
31568 /* Select section for constant in constant pool.
31569
31570 On RS/6000, all constants are in the private read-only data area.
31571 However, if this is being placed in the TOC it must be output as a
31572 toc entry. */
31573
31574 static section *
31575 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
31576 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
31577 {
31578 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
31579 return toc_section;
31580 else
31581 return read_only_private_data_section;
31582 }
31583
31584 /* Remove any trailing [DS] or the like from the symbol name. */
31585
31586 static const char *
31587 rs6000_xcoff_strip_name_encoding (const char *name)
31588 {
31589 size_t len;
31590 if (*name == '*')
31591 name++;
31592 len = strlen (name);
31593 if (name[len - 1] == ']')
31594 return ggc_alloc_string (name, len - 4);
31595 else
31596 return name;
31597 }
31598
31599 /* Section attributes. AIX is always PIC. */
31600
31601 static unsigned int
31602 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
31603 {
31604 unsigned int align;
31605 unsigned int flags = default_section_type_flags (decl, name, reloc);
31606
31607 /* Align to at least UNIT size. */
31608 if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl))
31609 align = MIN_UNITS_PER_WORD;
31610 else
31611 /* Increase alignment of large objects if not already stricter. */
31612 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
31613 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
31614 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
31615
31616 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
31617 }
31618
31619 /* Output at beginning of assembler file.
31620
31621 Initialize the section names for the RS/6000 at this point.
31622
31623 Specify filename, including full path, to assembler.
31624
31625 We want to go into the TOC section so at least one .toc will be emitted.
31626 Also, in order to output proper .bs/.es pairs, we need at least one static
31627 [RW] section emitted.
31628
31629 Finally, declare mcount when profiling to make the assembler happy. */
31630
31631 static void
31632 rs6000_xcoff_file_start (void)
31633 {
31634 rs6000_gen_section_name (&xcoff_bss_section_name,
31635 main_input_filename, ".bss_");
31636 rs6000_gen_section_name (&xcoff_private_data_section_name,
31637 main_input_filename, ".rw_");
31638 rs6000_gen_section_name (&xcoff_read_only_section_name,
31639 main_input_filename, ".ro_");
31640 rs6000_gen_section_name (&xcoff_tls_data_section_name,
31641 main_input_filename, ".tls_");
31642 rs6000_gen_section_name (&xcoff_tbss_section_name,
31643 main_input_filename, ".tbss_[UL]");
31644
31645 fputs ("\t.file\t", asm_out_file);
31646 output_quoted_string (asm_out_file, main_input_filename);
31647 fputc ('\n', asm_out_file);
31648 if (write_symbols != NO_DEBUG)
31649 switch_to_section (private_data_section);
31650 switch_to_section (text_section);
31651 if (profile_flag)
31652 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
31653 rs6000_file_start ();
31654 }
31655
31656 /* Output at end of assembler file.
31657 On the RS/6000, referencing data should automatically pull in text. */
31658
31659 static void
31660 rs6000_xcoff_file_end (void)
31661 {
31662 switch_to_section (text_section);
31663 fputs ("_section_.text:\n", asm_out_file);
31664 switch_to_section (data_section);
31665 fputs (TARGET_32BIT
31666 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
31667 asm_out_file);
31668 }
31669
31670 struct declare_alias_data
31671 {
31672 FILE *file;
31673 bool function_descriptor;
31674 };
31675
31676 /* Declare alias N. A helper function for for_node_and_aliases. */
31677
31678 static bool
31679 rs6000_declare_alias (struct symtab_node *n, void *d)
31680 {
31681 struct declare_alias_data *data = (struct declare_alias_data *)d;
31682 /* Main symbol is output specially, because varasm machinery does part of
31683 the job for us - we do not need to declare .globl/lglobs and such. */
31684 if (!n->alias || n->weakref)
31685 return false;
31686
31687 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
31688 return false;
31689
31690 /* Prevent assemble_alias from trying to use .set pseudo operation
31691 that does not behave as expected by the middle-end. */
31692 TREE_ASM_WRITTEN (n->decl) = true;
31693
31694 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
31695 char *buffer = (char *) alloca (strlen (name) + 2);
31696 char *p;
31697 int dollar_inside = 0;
31698
31699 strcpy (buffer, name);
31700 p = strchr (buffer, '$');
31701 while (p) {
31702 *p = '_';
31703 dollar_inside++;
31704 p = strchr (p + 1, '$');
31705 }
31706 if (TREE_PUBLIC (n->decl))
31707 {
31708 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
31709 {
31710 if (dollar_inside) {
31711 if (data->function_descriptor)
31712 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
31713 else
31714 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
31715 }
31716 if (data->function_descriptor)
31717 fputs ("\t.globl .", data->file);
31718 else
31719 fputs ("\t.globl ", data->file);
31720 RS6000_OUTPUT_BASENAME (data->file, buffer);
31721 putc ('\n', data->file);
31722 }
31723 #ifdef ASM_WEAKEN_DECL
31724 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
31725 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
31726 #endif
31727 }
31728 else
31729 {
31730 if (dollar_inside)
31731 {
31732 if (data->function_descriptor)
31733 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
31734 else
31735 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
31736 }
31737 if (data->function_descriptor)
31738 fputs ("\t.lglobl .", data->file);
31739 else
31740 fputs ("\t.lglobl ", data->file);
31741 RS6000_OUTPUT_BASENAME (data->file, buffer);
31742 putc ('\n', data->file);
31743 }
31744 if (data->function_descriptor)
31745 fputs (".", data->file);
31746 RS6000_OUTPUT_BASENAME (data->file, buffer);
31747 fputs (":\n", data->file);
31748 return false;
31749 }
31750
31751 /* This macro produces the initial definition of a function name.
31752 On the RS/6000, we need to place an extra '.' in the function name and
31753 output the function descriptor.
31754 Dollar signs are converted to underscores.
31755
31756 The csect for the function will have already been created when
31757 text_section was selected. We do have to go back to that csect, however.
31758
31759 The third and fourth parameters to the .function pseudo-op (16 and 044)
31760 are placeholders which no longer have any use.
31761
31762 Because AIX assembler's .set command has unexpected semantics, we output
31763 all aliases as alternative labels in front of the definition. */
31764
31765 void
31766 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
31767 {
31768 char *buffer = (char *) alloca (strlen (name) + 1);
31769 char *p;
31770 int dollar_inside = 0;
31771 struct declare_alias_data data = {file, false};
31772
31773 strcpy (buffer, name);
31774 p = strchr (buffer, '$');
31775 while (p) {
31776 *p = '_';
31777 dollar_inside++;
31778 p = strchr (p + 1, '$');
31779 }
31780 if (TREE_PUBLIC (decl))
31781 {
31782 if (!RS6000_WEAK || !DECL_WEAK (decl))
31783 {
31784 if (dollar_inside) {
31785 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
31786 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
31787 }
31788 fputs ("\t.globl .", file);
31789 RS6000_OUTPUT_BASENAME (file, buffer);
31790 putc ('\n', file);
31791 }
31792 }
31793 else
31794 {
31795 if (dollar_inside) {
31796 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
31797 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
31798 }
31799 fputs ("\t.lglobl .", file);
31800 RS6000_OUTPUT_BASENAME (file, buffer);
31801 putc ('\n', file);
31802 }
31803 fputs ("\t.csect ", file);
31804 RS6000_OUTPUT_BASENAME (file, buffer);
31805 fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file);
31806 RS6000_OUTPUT_BASENAME (file, buffer);
31807 fputs (":\n", file);
31808 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, &data, true);
31809 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
31810 RS6000_OUTPUT_BASENAME (file, buffer);
31811 fputs (", TOC[tc0], 0\n", file);
31812 in_section = NULL;
31813 switch_to_section (function_section (decl));
31814 putc ('.', file);
31815 RS6000_OUTPUT_BASENAME (file, buffer);
31816 fputs (":\n", file);
31817 data.function_descriptor = true;
31818 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, &data, true);
31819 if (!DECL_IGNORED_P (decl))
31820 {
31821 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
31822 xcoffout_declare_function (file, decl, buffer);
31823 else if (write_symbols == DWARF2_DEBUG)
31824 {
31825 name = (*targetm.strip_name_encoding) (name);
31826 fprintf (file, "\t.function .%s,.%s,2,0\n", name, name);
31827 }
31828 }
31829 return;
31830 }
31831
31832 /* This macro produces the initial definition of a object (variable) name.
31833 Because AIX assembler's .set command has unexpected semantics, we output
31834 all aliases as alternative labels in front of the definition. */
31835
31836 void
31837 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
31838 {
31839 struct declare_alias_data data = {file, false};
31840 RS6000_OUTPUT_BASENAME (file, name);
31841 fputs (":\n", file);
31842 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, &data, true);
31843 }
31844
31845 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
31846
31847 void
31848 rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label)
31849 {
31850 fputs (integer_asm_op (size, FALSE), file);
31851 assemble_name (file, label);
31852 fputs ("-$", file);
31853 }
31854
31855 /* Output a symbol offset relative to the dbase for the current object.
31856 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
31857 signed offsets.
31858
31859 __gcc_unwind_dbase is embedded in all executables/libraries through
31860 libgcc/config/rs6000/crtdbase.S. */
31861
31862 void
31863 rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label)
31864 {
31865 fputs (integer_asm_op (size, FALSE), file);
31866 assemble_name (file, label);
31867 fputs("-__gcc_unwind_dbase", file);
31868 }
31869
31870 #ifdef HAVE_AS_TLS
31871 static void
31872 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
31873 {
31874 rtx symbol;
31875 int flags;
31876
31877 default_encode_section_info (decl, rtl, first);
31878
31879 /* Careful not to prod global register variables. */
31880 if (!MEM_P (rtl))
31881 return;
31882 symbol = XEXP (rtl, 0);
31883 if (GET_CODE (symbol) != SYMBOL_REF)
31884 return;
31885
31886 flags = SYMBOL_REF_FLAGS (symbol);
31887
31888 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
31889 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
31890
31891 SYMBOL_REF_FLAGS (symbol) = flags;
31892 }
31893 #endif /* HAVE_AS_TLS */
31894 #endif /* TARGET_XCOFF */
31895
31896 /* Return true if INSN should not be copied. */
31897
31898 static bool
31899 rs6000_cannot_copy_insn_p (rtx_insn *insn)
31900 {
31901 return recog_memoized (insn) >= 0
31902 && get_attr_cannot_copy (insn);
31903 }
31904
31905 /* Compute a (partial) cost for rtx X. Return true if the complete
31906 cost has been computed, and false if subexpressions should be
31907 scanned. In either case, *TOTAL contains the cost result. */
31908
31909 static bool
31910 rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
31911 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
31912 {
31913 int code = GET_CODE (x);
31914
31915 switch (code)
31916 {
31917 /* On the RS/6000, if it is valid in the insn, it is free. */
31918 case CONST_INT:
31919 if (((outer_code == SET
31920 || outer_code == PLUS
31921 || outer_code == MINUS)
31922 && (satisfies_constraint_I (x)
31923 || satisfies_constraint_L (x)))
31924 || (outer_code == AND
31925 && (satisfies_constraint_K (x)
31926 || (mode == SImode
31927 ? satisfies_constraint_L (x)
31928 : satisfies_constraint_J (x))))
31929 || ((outer_code == IOR || outer_code == XOR)
31930 && (satisfies_constraint_K (x)
31931 || (mode == SImode
31932 ? satisfies_constraint_L (x)
31933 : satisfies_constraint_J (x))))
31934 || outer_code == ASHIFT
31935 || outer_code == ASHIFTRT
31936 || outer_code == LSHIFTRT
31937 || outer_code == ROTATE
31938 || outer_code == ROTATERT
31939 || outer_code == ZERO_EXTRACT
31940 || (outer_code == MULT
31941 && satisfies_constraint_I (x))
31942 || ((outer_code == DIV || outer_code == UDIV
31943 || outer_code == MOD || outer_code == UMOD)
31944 && exact_log2 (INTVAL (x)) >= 0)
31945 || (outer_code == COMPARE
31946 && (satisfies_constraint_I (x)
31947 || satisfies_constraint_K (x)))
31948 || ((outer_code == EQ || outer_code == NE)
31949 && (satisfies_constraint_I (x)
31950 || satisfies_constraint_K (x)
31951 || (mode == SImode
31952 ? satisfies_constraint_L (x)
31953 : satisfies_constraint_J (x))))
31954 || (outer_code == GTU
31955 && satisfies_constraint_I (x))
31956 || (outer_code == LTU
31957 && satisfies_constraint_P (x)))
31958 {
31959 *total = 0;
31960 return true;
31961 }
31962 else if ((outer_code == PLUS
31963 && reg_or_add_cint_operand (x, VOIDmode))
31964 || (outer_code == MINUS
31965 && reg_or_sub_cint_operand (x, VOIDmode))
31966 || ((outer_code == SET
31967 || outer_code == IOR
31968 || outer_code == XOR)
31969 && (INTVAL (x)
31970 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
31971 {
31972 *total = COSTS_N_INSNS (1);
31973 return true;
31974 }
31975 /* FALLTHRU */
31976
31977 case CONST_DOUBLE:
31978 case CONST_WIDE_INT:
31979 case CONST:
31980 case HIGH:
31981 case SYMBOL_REF:
31982 case MEM:
31983 /* When optimizing for size, MEM should be slightly more expensive
31984 than generating address, e.g., (plus (reg) (const)).
31985 L1 cache latency is about two instructions. */
31986 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
31987 return true;
31988
31989 case LABEL_REF:
31990 *total = 0;
31991 return true;
31992
31993 case PLUS:
31994 case MINUS:
31995 if (FLOAT_MODE_P (mode))
31996 *total = rs6000_cost->fp;
31997 else
31998 *total = COSTS_N_INSNS (1);
31999 return false;
32000
32001 case MULT:
32002 if (GET_CODE (XEXP (x, 1)) == CONST_INT
32003 && satisfies_constraint_I (XEXP (x, 1)))
32004 {
32005 if (INTVAL (XEXP (x, 1)) >= -256
32006 && INTVAL (XEXP (x, 1)) <= 255)
32007 *total = rs6000_cost->mulsi_const9;
32008 else
32009 *total = rs6000_cost->mulsi_const;
32010 }
32011 else if (mode == SFmode)
32012 *total = rs6000_cost->fp;
32013 else if (FLOAT_MODE_P (mode))
32014 *total = rs6000_cost->dmul;
32015 else if (mode == DImode)
32016 *total = rs6000_cost->muldi;
32017 else
32018 *total = rs6000_cost->mulsi;
32019 return false;
32020
32021 case FMA:
32022 if (mode == SFmode)
32023 *total = rs6000_cost->fp;
32024 else
32025 *total = rs6000_cost->dmul;
32026 break;
32027
32028 case DIV:
32029 case MOD:
32030 if (FLOAT_MODE_P (mode))
32031 {
32032 *total = mode == DFmode ? rs6000_cost->ddiv
32033 : rs6000_cost->sdiv;
32034 return false;
32035 }
32036 /* FALLTHRU */
32037
32038 case UDIV:
32039 case UMOD:
32040 if (GET_CODE (XEXP (x, 1)) == CONST_INT
32041 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
32042 {
32043 if (code == DIV || code == MOD)
32044 /* Shift, addze */
32045 *total = COSTS_N_INSNS (2);
32046 else
32047 /* Shift */
32048 *total = COSTS_N_INSNS (1);
32049 }
32050 else
32051 {
32052 if (GET_MODE (XEXP (x, 1)) == DImode)
32053 *total = rs6000_cost->divdi;
32054 else
32055 *total = rs6000_cost->divsi;
32056 }
32057 /* Add in shift and subtract for MOD unless we have a mod instruction. */
32058 if (!TARGET_MODULO && (code == MOD || code == UMOD))
32059 *total += COSTS_N_INSNS (2);
32060 return false;
32061
32062 case CTZ:
32063 *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4);
32064 return false;
32065
32066 case FFS:
32067 *total = COSTS_N_INSNS (4);
32068 return false;
32069
32070 case POPCOUNT:
32071 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
32072 return false;
32073
32074 case PARITY:
32075 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
32076 return false;
32077
32078 case NOT:
32079 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
32080 *total = 0;
32081 else
32082 *total = COSTS_N_INSNS (1);
32083 return false;
32084
32085 case AND:
32086 if (CONST_INT_P (XEXP (x, 1)))
32087 {
32088 rtx left = XEXP (x, 0);
32089 rtx_code left_code = GET_CODE (left);
32090
32091 /* rotate-and-mask: 1 insn. */
32092 if ((left_code == ROTATE
32093 || left_code == ASHIFT
32094 || left_code == LSHIFTRT)
32095 && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
32096 {
32097 *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
32098 if (!CONST_INT_P (XEXP (left, 1)))
32099 *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
32100 *total += COSTS_N_INSNS (1);
32101 return true;
32102 }
32103
32104 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
32105 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
32106 if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
32107 || (val & 0xffff) == val
32108 || (val & 0xffff0000) == val
32109 || ((val & 0xffff) == 0 && mode == SImode))
32110 {
32111 *total = rtx_cost (left, mode, AND, 0, speed);
32112 *total += COSTS_N_INSNS (1);
32113 return true;
32114 }
32115
32116 /* 2 insns. */
32117 if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
32118 {
32119 *total = rtx_cost (left, mode, AND, 0, speed);
32120 *total += COSTS_N_INSNS (2);
32121 return true;
32122 }
32123 }
32124
32125 *total = COSTS_N_INSNS (1);
32126 return false;
32127
32128 case IOR:
32129 /* FIXME */
32130 *total = COSTS_N_INSNS (1);
32131 return true;
32132
32133 case CLZ:
32134 case XOR:
32135 case ZERO_EXTRACT:
32136 *total = COSTS_N_INSNS (1);
32137 return false;
32138
32139 case ASHIFT:
32140 /* The EXTSWSLI instruction is a combined instruction. Don't count both
32141 the sign extend and shift separately within the insn. */
32142 if (TARGET_EXTSWSLI && mode == DImode
32143 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
32144 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
32145 {
32146 *total = 0;
32147 return false;
32148 }
32149 /* fall through */
32150
32151 case ASHIFTRT:
32152 case LSHIFTRT:
32153 case ROTATE:
32154 case ROTATERT:
32155 /* Handle mul_highpart. */
32156 if (outer_code == TRUNCATE
32157 && GET_CODE (XEXP (x, 0)) == MULT)
32158 {
32159 if (mode == DImode)
32160 *total = rs6000_cost->muldi;
32161 else
32162 *total = rs6000_cost->mulsi;
32163 return true;
32164 }
32165 else if (outer_code == AND)
32166 *total = 0;
32167 else
32168 *total = COSTS_N_INSNS (1);
32169 return false;
32170
32171 case SIGN_EXTEND:
32172 case ZERO_EXTEND:
32173 if (GET_CODE (XEXP (x, 0)) == MEM)
32174 *total = 0;
32175 else
32176 *total = COSTS_N_INSNS (1);
32177 return false;
32178
32179 case COMPARE:
32180 case NEG:
32181 case ABS:
32182 if (!FLOAT_MODE_P (mode))
32183 {
32184 *total = COSTS_N_INSNS (1);
32185 return false;
32186 }
32187 /* FALLTHRU */
32188
32189 case FLOAT:
32190 case UNSIGNED_FLOAT:
32191 case FIX:
32192 case UNSIGNED_FIX:
32193 case FLOAT_TRUNCATE:
32194 *total = rs6000_cost->fp;
32195 return false;
32196
32197 case FLOAT_EXTEND:
32198 if (mode == DFmode)
32199 *total = rs6000_cost->sfdf_convert;
32200 else
32201 *total = rs6000_cost->fp;
32202 return false;
32203
32204 case UNSPEC:
32205 switch (XINT (x, 1))
32206 {
32207 case UNSPEC_FRSP:
32208 *total = rs6000_cost->fp;
32209 return true;
32210
32211 default:
32212 break;
32213 }
32214 break;
32215
32216 case CALL:
32217 case IF_THEN_ELSE:
32218 if (!speed)
32219 {
32220 *total = COSTS_N_INSNS (1);
32221 return true;
32222 }
32223 else if (FLOAT_MODE_P (mode)
32224 && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS)
32225 {
32226 *total = rs6000_cost->fp;
32227 return false;
32228 }
32229 break;
32230
32231 case NE:
32232 case EQ:
32233 case GTU:
32234 case LTU:
32235 /* Carry bit requires mode == Pmode.
32236 NEG or PLUS already counted so only add one. */
32237 if (mode == Pmode
32238 && (outer_code == NEG || outer_code == PLUS))
32239 {
32240 *total = COSTS_N_INSNS (1);
32241 return true;
32242 }
32243 if (outer_code == SET)
32244 {
32245 if (XEXP (x, 1) == const0_rtx)
32246 {
32247 if (TARGET_ISEL && !TARGET_MFCRF)
32248 *total = COSTS_N_INSNS (8);
32249 else
32250 *total = COSTS_N_INSNS (2);
32251 return true;
32252 }
32253 else
32254 {
32255 *total = COSTS_N_INSNS (3);
32256 return false;
32257 }
32258 }
32259 /* FALLTHRU */
32260
32261 case GT:
32262 case LT:
32263 case UNORDERED:
32264 if (outer_code == SET && (XEXP (x, 1) == const0_rtx))
32265 {
32266 if (TARGET_ISEL && !TARGET_MFCRF)
32267 *total = COSTS_N_INSNS (8);
32268 else
32269 *total = COSTS_N_INSNS (2);
32270 return true;
32271 }
32272 /* CC COMPARE. */
32273 if (outer_code == COMPARE)
32274 {
32275 *total = 0;
32276 return true;
32277 }
32278 break;
32279
32280 default:
32281 break;
32282 }
32283
32284 return false;
32285 }
32286
32287 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
32288
32289 static bool
32290 rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
32291 int opno, int *total, bool speed)
32292 {
32293 bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed);
32294
32295 fprintf (stderr,
32296 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
32297 "opno = %d, total = %d, speed = %s, x:\n",
32298 ret ? "complete" : "scan inner",
32299 GET_MODE_NAME (mode),
32300 GET_RTX_NAME (outer_code),
32301 opno,
32302 *total,
32303 speed ? "true" : "false");
32304
32305 debug_rtx (x);
32306
32307 return ret;
32308 }
32309
32310 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
32311
32312 static int
32313 rs6000_debug_address_cost (rtx x, machine_mode mode,
32314 addr_space_t as, bool speed)
32315 {
32316 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
32317
32318 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
32319 ret, speed ? "true" : "false");
32320 debug_rtx (x);
32321
32322 return ret;
32323 }
32324
32325
32326 /* A C expression returning the cost of moving data from a register of class
32327 CLASS1 to one of CLASS2. */
32328
32329 static int
32330 rs6000_register_move_cost (machine_mode mode,
32331 reg_class_t from, reg_class_t to)
32332 {
32333 int ret;
32334
32335 if (TARGET_DEBUG_COST)
32336 dbg_cost_ctrl++;
32337
32338 /* Moves from/to GENERAL_REGS. */
32339 if (reg_classes_intersect_p (to, GENERAL_REGS)
32340 || reg_classes_intersect_p (from, GENERAL_REGS))
32341 {
32342 reg_class_t rclass = from;
32343
32344 if (! reg_classes_intersect_p (to, GENERAL_REGS))
32345 rclass = to;
32346
32347 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
32348 ret = (rs6000_memory_move_cost (mode, rclass, false)
32349 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
32350
32351 /* It's more expensive to move CR_REGS than CR0_REGS because of the
32352 shift. */
32353 else if (rclass == CR_REGS)
32354 ret = 4;
32355
32356 /* For those processors that have slow LR/CTR moves, make them more
32357 expensive than memory in order to bias spills to memory .*/
32358 else if ((rs6000_cpu == PROCESSOR_POWER6
32359 || rs6000_cpu == PROCESSOR_POWER7
32360 || rs6000_cpu == PROCESSOR_POWER8
32361 || rs6000_cpu == PROCESSOR_POWER9)
32362 && reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS))
32363 ret = 6 * hard_regno_nregs[0][mode];
32364
32365 else
32366 /* A move will cost one instruction per GPR moved. */
32367 ret = 2 * hard_regno_nregs[0][mode];
32368 }
32369
32370 /* If we have VSX, we can easily move between FPR or Altivec registers. */
32371 else if (VECTOR_MEM_VSX_P (mode)
32372 && reg_classes_intersect_p (to, VSX_REGS)
32373 && reg_classes_intersect_p (from, VSX_REGS))
32374 ret = 2 * hard_regno_nregs[32][mode];
32375
32376 /* Moving between two similar registers is just one instruction. */
32377 else if (reg_classes_intersect_p (to, from))
32378 ret = (FLOAT128_2REG_P (mode)) ? 4 : 2;
32379
32380 /* Everything else has to go through GENERAL_REGS. */
32381 else
32382 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
32383 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
32384
32385 if (TARGET_DEBUG_COST)
32386 {
32387 if (dbg_cost_ctrl == 1)
32388 fprintf (stderr,
32389 "rs6000_register_move_cost:, ret=%d, mode=%s, from=%s, to=%s\n",
32390 ret, GET_MODE_NAME (mode), reg_class_names[from],
32391 reg_class_names[to]);
32392 dbg_cost_ctrl--;
32393 }
32394
32395 return ret;
32396 }
32397
32398 /* A C expressions returning the cost of moving data of MODE from a register to
32399 or from memory. */
32400
32401 static int
32402 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
32403 bool in ATTRIBUTE_UNUSED)
32404 {
32405 int ret;
32406
32407 if (TARGET_DEBUG_COST)
32408 dbg_cost_ctrl++;
32409
32410 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
32411 ret = 4 * hard_regno_nregs[0][mode];
32412 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
32413 || reg_classes_intersect_p (rclass, VSX_REGS)))
32414 ret = 4 * hard_regno_nregs[32][mode];
32415 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
32416 ret = 4 * hard_regno_nregs[FIRST_ALTIVEC_REGNO][mode];
32417 else
32418 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
32419
32420 if (TARGET_DEBUG_COST)
32421 {
32422 if (dbg_cost_ctrl == 1)
32423 fprintf (stderr,
32424 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
32425 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
32426 dbg_cost_ctrl--;
32427 }
32428
32429 return ret;
32430 }
32431
32432 /* Returns a code for a target-specific builtin that implements
32433 reciprocal of the function, or NULL_TREE if not available. */
32434
32435 static tree
32436 rs6000_builtin_reciprocal (unsigned int fn, bool md_fn,
32437 bool sqrt ATTRIBUTE_UNUSED)
32438 {
32439 if (optimize_insn_for_size_p ())
32440 return NULL_TREE;
32441
32442 if (md_fn)
32443 switch (fn)
32444 {
32445 case VSX_BUILTIN_XVSQRTDP:
32446 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
32447 return NULL_TREE;
32448
32449 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
32450
32451 case VSX_BUILTIN_XVSQRTSP:
32452 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
32453 return NULL_TREE;
32454
32455 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
32456
32457 default:
32458 return NULL_TREE;
32459 }
32460
32461 else
32462 switch (fn)
32463 {
32464 case BUILT_IN_SQRT:
32465 if (!RS6000_RECIP_AUTO_RSQRTE_P (DFmode))
32466 return NULL_TREE;
32467
32468 return rs6000_builtin_decls[RS6000_BUILTIN_RSQRT];
32469
32470 case BUILT_IN_SQRTF:
32471 if (!RS6000_RECIP_AUTO_RSQRTE_P (SFmode))
32472 return NULL_TREE;
32473
32474 return rs6000_builtin_decls[RS6000_BUILTIN_RSQRTF];
32475
32476 default:
32477 return NULL_TREE;
32478 }
32479 }
32480
32481 /* Load up a constant. If the mode is a vector mode, splat the value across
32482 all of the vector elements. */
32483
32484 static rtx
32485 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
32486 {
32487 rtx reg;
32488
32489 if (mode == SFmode || mode == DFmode)
32490 {
32491 rtx d = const_double_from_real_value (dconst, mode);
32492 reg = force_reg (mode, d);
32493 }
32494 else if (mode == V4SFmode)
32495 {
32496 rtx d = const_double_from_real_value (dconst, SFmode);
32497 rtvec v = gen_rtvec (4, d, d, d, d);
32498 reg = gen_reg_rtx (mode);
32499 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
32500 }
32501 else if (mode == V2DFmode)
32502 {
32503 rtx d = const_double_from_real_value (dconst, DFmode);
32504 rtvec v = gen_rtvec (2, d, d);
32505 reg = gen_reg_rtx (mode);
32506 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
32507 }
32508 else
32509 gcc_unreachable ();
32510
32511 return reg;
32512 }
32513
32514 /* Generate an FMA instruction. */
32515
32516 static void
32517 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
32518 {
32519 machine_mode mode = GET_MODE (target);
32520 rtx dst;
32521
32522 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
32523 gcc_assert (dst != NULL);
32524
32525 if (dst != target)
32526 emit_move_insn (target, dst);
32527 }
32528
32529 /* Generate a FMSUB instruction: dst = fma(m1, m2, -a). */
32530
32531 static void
32532 rs6000_emit_msub (rtx target, rtx m1, rtx m2, rtx a)
32533 {
32534 machine_mode mode = GET_MODE (target);
32535 rtx dst;
32536
32537 /* Altivec does not support fms directly;
32538 generate in terms of fma in that case. */
32539 if (optab_handler (fms_optab, mode) != CODE_FOR_nothing)
32540 dst = expand_ternary_op (mode, fms_optab, m1, m2, a, target, 0);
32541 else
32542 {
32543 a = expand_unop (mode, neg_optab, a, NULL_RTX, 0);
32544 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
32545 }
32546 gcc_assert (dst != NULL);
32547
32548 if (dst != target)
32549 emit_move_insn (target, dst);
32550 }
32551
32552 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
32553
32554 static void
32555 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
32556 {
32557 machine_mode mode = GET_MODE (dst);
32558 rtx r;
32559
32560 /* This is a tad more complicated, since the fnma_optab is for
32561 a different expression: fma(-m1, m2, a), which is the same
32562 thing except in the case of signed zeros.
32563
32564 Fortunately we know that if FMA is supported that FNMSUB is
32565 also supported in the ISA. Just expand it directly. */
32566
32567 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
32568
32569 r = gen_rtx_NEG (mode, a);
32570 r = gen_rtx_FMA (mode, m1, m2, r);
32571 r = gen_rtx_NEG (mode, r);
32572 emit_insn (gen_rtx_SET (dst, r));
32573 }
32574
32575 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
32576 add a reg_note saying that this was a division. Support both scalar and
32577 vector divide. Assumes no trapping math and finite arguments. */
32578
32579 void
32580 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
32581 {
32582 machine_mode mode = GET_MODE (dst);
32583 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
32584 int i;
32585
32586 /* Low precision estimates guarantee 5 bits of accuracy. High
32587 precision estimates guarantee 14 bits of accuracy. SFmode
32588 requires 23 bits of accuracy. DFmode requires 52 bits of
32589 accuracy. Each pass at least doubles the accuracy, leading
32590 to the following. */
32591 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
32592 if (mode == DFmode || mode == V2DFmode)
32593 passes++;
32594
32595 enum insn_code code = optab_handler (smul_optab, mode);
32596 insn_gen_fn gen_mul = GEN_FCN (code);
32597
32598 gcc_assert (code != CODE_FOR_nothing);
32599
32600 one = rs6000_load_constant_and_splat (mode, dconst1);
32601
32602 /* x0 = 1./d estimate */
32603 x0 = gen_reg_rtx (mode);
32604 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
32605 UNSPEC_FRES)));
32606
32607 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
32608 if (passes > 1) {
32609
32610 /* e0 = 1. - d * x0 */
32611 e0 = gen_reg_rtx (mode);
32612 rs6000_emit_nmsub (e0, d, x0, one);
32613
32614 /* x1 = x0 + e0 * x0 */
32615 x1 = gen_reg_rtx (mode);
32616 rs6000_emit_madd (x1, e0, x0, x0);
32617
32618 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
32619 ++i, xprev = xnext, eprev = enext) {
32620
32621 /* enext = eprev * eprev */
32622 enext = gen_reg_rtx (mode);
32623 emit_insn (gen_mul (enext, eprev, eprev));
32624
32625 /* xnext = xprev + enext * xprev */
32626 xnext = gen_reg_rtx (mode);
32627 rs6000_emit_madd (xnext, enext, xprev, xprev);
32628 }
32629
32630 } else
32631 xprev = x0;
32632
32633 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
32634
32635 /* u = n * xprev */
32636 u = gen_reg_rtx (mode);
32637 emit_insn (gen_mul (u, n, xprev));
32638
32639 /* v = n - (d * u) */
32640 v = gen_reg_rtx (mode);
32641 rs6000_emit_nmsub (v, d, u, n);
32642
32643 /* dst = (v * xprev) + u */
32644 rs6000_emit_madd (dst, v, xprev, u);
32645
32646 if (note_p)
32647 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
32648 }
32649
32650 /* Newton-Raphson approximation of single/double-precision floating point
32651 rsqrt. Assumes no trapping math and finite arguments. */
32652
32653 void
32654 rs6000_emit_swrsqrt (rtx dst, rtx src)
32655 {
32656 machine_mode mode = GET_MODE (src);
32657 rtx x0 = gen_reg_rtx (mode);
32658 rtx y = gen_reg_rtx (mode);
32659
32660 /* Low precision estimates guarantee 5 bits of accuracy. High
32661 precision estimates guarantee 14 bits of accuracy. SFmode
32662 requires 23 bits of accuracy. DFmode requires 52 bits of
32663 accuracy. Each pass at least doubles the accuracy, leading
32664 to the following. */
32665 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
32666 if (mode == DFmode || mode == V2DFmode)
32667 passes++;
32668
32669 REAL_VALUE_TYPE dconst3_2;
32670 int i;
32671 rtx halfthree;
32672 enum insn_code code = optab_handler (smul_optab, mode);
32673 insn_gen_fn gen_mul = GEN_FCN (code);
32674
32675 gcc_assert (code != CODE_FOR_nothing);
32676
32677 /* Load up the constant 1.5 either as a scalar, or as a vector. */
32678 real_from_integer (&dconst3_2, VOIDmode, 3, SIGNED);
32679 SET_REAL_EXP (&dconst3_2, REAL_EXP (&dconst3_2) - 1);
32680
32681 halfthree = rs6000_load_constant_and_splat (mode, dconst3_2);
32682
32683 /* x0 = rsqrt estimate */
32684 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
32685 UNSPEC_RSQRT)));
32686
32687 /* y = 0.5 * src = 1.5 * src - src -> fewer constants */
32688 rs6000_emit_msub (y, src, halfthree, src);
32689
32690 for (i = 0; i < passes; i++)
32691 {
32692 rtx x1 = gen_reg_rtx (mode);
32693 rtx u = gen_reg_rtx (mode);
32694 rtx v = gen_reg_rtx (mode);
32695
32696 /* x1 = x0 * (1.5 - y * (x0 * x0)) */
32697 emit_insn (gen_mul (u, x0, x0));
32698 rs6000_emit_nmsub (v, y, u, halfthree);
32699 emit_insn (gen_mul (x1, x0, v));
32700 x0 = x1;
32701 }
32702
32703 emit_move_insn (dst, x0);
32704 return;
32705 }
32706
32707 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
32708 (Power7) targets. DST is the target, and SRC is the argument operand. */
32709
32710 void
32711 rs6000_emit_popcount (rtx dst, rtx src)
32712 {
32713 machine_mode mode = GET_MODE (dst);
32714 rtx tmp1, tmp2;
32715
32716 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
32717 if (TARGET_POPCNTD)
32718 {
32719 if (mode == SImode)
32720 emit_insn (gen_popcntdsi2 (dst, src));
32721 else
32722 emit_insn (gen_popcntddi2 (dst, src));
32723 return;
32724 }
32725
32726 tmp1 = gen_reg_rtx (mode);
32727
32728 if (mode == SImode)
32729 {
32730 emit_insn (gen_popcntbsi2 (tmp1, src));
32731 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
32732 NULL_RTX, 0);
32733 tmp2 = force_reg (SImode, tmp2);
32734 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
32735 }
32736 else
32737 {
32738 emit_insn (gen_popcntbdi2 (tmp1, src));
32739 tmp2 = expand_mult (DImode, tmp1,
32740 GEN_INT ((HOST_WIDE_INT)
32741 0x01010101 << 32 | 0x01010101),
32742 NULL_RTX, 0);
32743 tmp2 = force_reg (DImode, tmp2);
32744 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
32745 }
32746 }
32747
32748
32749 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
32750 target, and SRC is the argument operand. */
32751
32752 void
32753 rs6000_emit_parity (rtx dst, rtx src)
32754 {
32755 machine_mode mode = GET_MODE (dst);
32756 rtx tmp;
32757
32758 tmp = gen_reg_rtx (mode);
32759
32760 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
32761 if (TARGET_CMPB)
32762 {
32763 if (mode == SImode)
32764 {
32765 emit_insn (gen_popcntbsi2 (tmp, src));
32766 emit_insn (gen_paritysi2_cmpb (dst, tmp));
32767 }
32768 else
32769 {
32770 emit_insn (gen_popcntbdi2 (tmp, src));
32771 emit_insn (gen_paritydi2_cmpb (dst, tmp));
32772 }
32773 return;
32774 }
32775
32776 if (mode == SImode)
32777 {
32778 /* Is mult+shift >= shift+xor+shift+xor? */
32779 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
32780 {
32781 rtx tmp1, tmp2, tmp3, tmp4;
32782
32783 tmp1 = gen_reg_rtx (SImode);
32784 emit_insn (gen_popcntbsi2 (tmp1, src));
32785
32786 tmp2 = gen_reg_rtx (SImode);
32787 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
32788 tmp3 = gen_reg_rtx (SImode);
32789 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
32790
32791 tmp4 = gen_reg_rtx (SImode);
32792 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
32793 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
32794 }
32795 else
32796 rs6000_emit_popcount (tmp, src);
32797 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
32798 }
32799 else
32800 {
32801 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
32802 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
32803 {
32804 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
32805
32806 tmp1 = gen_reg_rtx (DImode);
32807 emit_insn (gen_popcntbdi2 (tmp1, src));
32808
32809 tmp2 = gen_reg_rtx (DImode);
32810 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
32811 tmp3 = gen_reg_rtx (DImode);
32812 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
32813
32814 tmp4 = gen_reg_rtx (DImode);
32815 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
32816 tmp5 = gen_reg_rtx (DImode);
32817 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
32818
32819 tmp6 = gen_reg_rtx (DImode);
32820 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
32821 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
32822 }
32823 else
32824 rs6000_emit_popcount (tmp, src);
32825 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
32826 }
32827 }
32828
32829 /* Expand an Altivec constant permutation for little endian mode.
32830 There are two issues: First, the two input operands must be
32831 swapped so that together they form a double-wide array in LE
32832 order. Second, the vperm instruction has surprising behavior
32833 in LE mode: it interprets the elements of the source vectors
32834 in BE mode ("left to right") and interprets the elements of
32835 the destination vector in LE mode ("right to left"). To
32836 correct for this, we must subtract each element of the permute
32837 control vector from 31.
32838
32839 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
32840 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
32841 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
32842 serve as the permute control vector. Then, in BE mode,
32843
32844 vperm 9,10,11,12
32845
32846 places the desired result in vr9. However, in LE mode the
32847 vector contents will be
32848
32849 vr10 = 00000003 00000002 00000001 00000000
32850 vr11 = 00000007 00000006 00000005 00000004
32851
32852 The result of the vperm using the same permute control vector is
32853
32854 vr9 = 05000000 07000000 01000000 03000000
32855
32856 That is, the leftmost 4 bytes of vr10 are interpreted as the
32857 source for the rightmost 4 bytes of vr9, and so on.
32858
32859 If we change the permute control vector to
32860
32861 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
32862
32863 and issue
32864
32865 vperm 9,11,10,12
32866
32867 we get the desired
32868
32869 vr9 = 00000006 00000004 00000002 00000000. */
32870
32871 void
32872 altivec_expand_vec_perm_const_le (rtx operands[4])
32873 {
32874 unsigned int i;
32875 rtx perm[16];
32876 rtx constv, unspec;
32877 rtx target = operands[0];
32878 rtx op0 = operands[1];
32879 rtx op1 = operands[2];
32880 rtx sel = operands[3];
32881
32882 /* Unpack and adjust the constant selector. */
32883 for (i = 0; i < 16; ++i)
32884 {
32885 rtx e = XVECEXP (sel, 0, i);
32886 unsigned int elt = 31 - (INTVAL (e) & 31);
32887 perm[i] = GEN_INT (elt);
32888 }
32889
32890 /* Expand to a permute, swapping the inputs and using the
32891 adjusted selector. */
32892 if (!REG_P (op0))
32893 op0 = force_reg (V16QImode, op0);
32894 if (!REG_P (op1))
32895 op1 = force_reg (V16QImode, op1);
32896
32897 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
32898 constv = force_reg (V16QImode, constv);
32899 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
32900 UNSPEC_VPERM);
32901 if (!REG_P (target))
32902 {
32903 rtx tmp = gen_reg_rtx (V16QImode);
32904 emit_move_insn (tmp, unspec);
32905 unspec = tmp;
32906 }
32907
32908 emit_move_insn (target, unspec);
32909 }
32910
32911 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
32912 permute control vector. But here it's not a constant, so we must
32913 generate a vector NAND or NOR to do the adjustment. */
32914
32915 void
32916 altivec_expand_vec_perm_le (rtx operands[4])
32917 {
32918 rtx notx, iorx, unspec;
32919 rtx target = operands[0];
32920 rtx op0 = operands[1];
32921 rtx op1 = operands[2];
32922 rtx sel = operands[3];
32923 rtx tmp = target;
32924 rtx norreg = gen_reg_rtx (V16QImode);
32925 machine_mode mode = GET_MODE (target);
32926
32927 /* Get everything in regs so the pattern matches. */
32928 if (!REG_P (op0))
32929 op0 = force_reg (mode, op0);
32930 if (!REG_P (op1))
32931 op1 = force_reg (mode, op1);
32932 if (!REG_P (sel))
32933 sel = force_reg (V16QImode, sel);
32934 if (!REG_P (target))
32935 tmp = gen_reg_rtx (mode);
32936
32937 /* Invert the selector with a VNAND if available, else a VNOR.
32938 The VNAND is preferred for future fusion opportunities. */
32939 notx = gen_rtx_NOT (V16QImode, sel);
32940 iorx = (TARGET_P8_VECTOR
32941 ? gen_rtx_IOR (V16QImode, notx, notx)
32942 : gen_rtx_AND (V16QImode, notx, notx));
32943 emit_insn (gen_rtx_SET (norreg, iorx));
32944
32945 /* Permute with operands reversed and adjusted selector. */
32946 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
32947 UNSPEC_VPERM);
32948
32949 /* Copy into target, possibly by way of a register. */
32950 if (!REG_P (target))
32951 {
32952 emit_move_insn (tmp, unspec);
32953 unspec = tmp;
32954 }
32955
32956 emit_move_insn (target, unspec);
32957 }
32958
32959 /* Expand an Altivec constant permutation. Return true if we match
32960 an efficient implementation; false to fall back to VPERM. */
32961
32962 bool
32963 altivec_expand_vec_perm_const (rtx operands[4])
32964 {
32965 struct altivec_perm_insn {
32966 HOST_WIDE_INT mask;
32967 enum insn_code impl;
32968 unsigned char perm[16];
32969 };
32970 static const struct altivec_perm_insn patterns[] = {
32971 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
32972 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
32973 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
32974 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
32975 { OPTION_MASK_ALTIVEC,
32976 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
32977 : CODE_FOR_altivec_vmrglb_direct),
32978 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
32979 { OPTION_MASK_ALTIVEC,
32980 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
32981 : CODE_FOR_altivec_vmrglh_direct),
32982 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
32983 { OPTION_MASK_ALTIVEC,
32984 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
32985 : CODE_FOR_altivec_vmrglw_direct),
32986 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
32987 { OPTION_MASK_ALTIVEC,
32988 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
32989 : CODE_FOR_altivec_vmrghb_direct),
32990 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
32991 { OPTION_MASK_ALTIVEC,
32992 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
32993 : CODE_FOR_altivec_vmrghh_direct),
32994 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
32995 { OPTION_MASK_ALTIVEC,
32996 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
32997 : CODE_FOR_altivec_vmrghw_direct),
32998 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
32999 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgew,
33000 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
33001 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgow,
33002 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
33003 };
33004
33005 unsigned int i, j, elt, which;
33006 unsigned char perm[16];
33007 rtx target, op0, op1, sel, x;
33008 bool one_vec;
33009
33010 target = operands[0];
33011 op0 = operands[1];
33012 op1 = operands[2];
33013 sel = operands[3];
33014
33015 /* Unpack the constant selector. */
33016 for (i = which = 0; i < 16; ++i)
33017 {
33018 rtx e = XVECEXP (sel, 0, i);
33019 elt = INTVAL (e) & 31;
33020 which |= (elt < 16 ? 1 : 2);
33021 perm[i] = elt;
33022 }
33023
33024 /* Simplify the constant selector based on operands. */
33025 switch (which)
33026 {
33027 default:
33028 gcc_unreachable ();
33029
33030 case 3:
33031 one_vec = false;
33032 if (!rtx_equal_p (op0, op1))
33033 break;
33034 /* FALLTHRU */
33035
33036 case 2:
33037 for (i = 0; i < 16; ++i)
33038 perm[i] &= 15;
33039 op0 = op1;
33040 one_vec = true;
33041 break;
33042
33043 case 1:
33044 op1 = op0;
33045 one_vec = true;
33046 break;
33047 }
33048
33049 /* Look for splat patterns. */
33050 if (one_vec)
33051 {
33052 elt = perm[0];
33053
33054 for (i = 0; i < 16; ++i)
33055 if (perm[i] != elt)
33056 break;
33057 if (i == 16)
33058 {
33059 if (!BYTES_BIG_ENDIAN)
33060 elt = 15 - elt;
33061 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
33062 return true;
33063 }
33064
33065 if (elt % 2 == 0)
33066 {
33067 for (i = 0; i < 16; i += 2)
33068 if (perm[i] != elt || perm[i + 1] != elt + 1)
33069 break;
33070 if (i == 16)
33071 {
33072 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
33073 x = gen_reg_rtx (V8HImode);
33074 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
33075 GEN_INT (field)));
33076 emit_move_insn (target, gen_lowpart (V16QImode, x));
33077 return true;
33078 }
33079 }
33080
33081 if (elt % 4 == 0)
33082 {
33083 for (i = 0; i < 16; i += 4)
33084 if (perm[i] != elt
33085 || perm[i + 1] != elt + 1
33086 || perm[i + 2] != elt + 2
33087 || perm[i + 3] != elt + 3)
33088 break;
33089 if (i == 16)
33090 {
33091 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
33092 x = gen_reg_rtx (V4SImode);
33093 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
33094 GEN_INT (field)));
33095 emit_move_insn (target, gen_lowpart (V16QImode, x));
33096 return true;
33097 }
33098 }
33099 }
33100
33101 /* Look for merge and pack patterns. */
33102 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
33103 {
33104 bool swapped;
33105
33106 if ((patterns[j].mask & rs6000_isa_flags) == 0)
33107 continue;
33108
33109 elt = patterns[j].perm[0];
33110 if (perm[0] == elt)
33111 swapped = false;
33112 else if (perm[0] == elt + 16)
33113 swapped = true;
33114 else
33115 continue;
33116 for (i = 1; i < 16; ++i)
33117 {
33118 elt = patterns[j].perm[i];
33119 if (swapped)
33120 elt = (elt >= 16 ? elt - 16 : elt + 16);
33121 else if (one_vec && elt >= 16)
33122 elt -= 16;
33123 if (perm[i] != elt)
33124 break;
33125 }
33126 if (i == 16)
33127 {
33128 enum insn_code icode = patterns[j].impl;
33129 machine_mode omode = insn_data[icode].operand[0].mode;
33130 machine_mode imode = insn_data[icode].operand[1].mode;
33131
33132 /* For little-endian, don't use vpkuwum and vpkuhum if the
33133 underlying vector type is not V4SI and V8HI, respectively.
33134 For example, using vpkuwum with a V8HI picks up the even
33135 halfwords (BE numbering) when the even halfwords (LE
33136 numbering) are what we need. */
33137 if (!BYTES_BIG_ENDIAN
33138 && icode == CODE_FOR_altivec_vpkuwum_direct
33139 && ((GET_CODE (op0) == REG
33140 && GET_MODE (op0) != V4SImode)
33141 || (GET_CODE (op0) == SUBREG
33142 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
33143 continue;
33144 if (!BYTES_BIG_ENDIAN
33145 && icode == CODE_FOR_altivec_vpkuhum_direct
33146 && ((GET_CODE (op0) == REG
33147 && GET_MODE (op0) != V8HImode)
33148 || (GET_CODE (op0) == SUBREG
33149 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
33150 continue;
33151
33152 /* For little-endian, the two input operands must be swapped
33153 (or swapped back) to ensure proper right-to-left numbering
33154 from 0 to 2N-1. */
33155 if (swapped ^ !BYTES_BIG_ENDIAN)
33156 std::swap (op0, op1);
33157 if (imode != V16QImode)
33158 {
33159 op0 = gen_lowpart (imode, op0);
33160 op1 = gen_lowpart (imode, op1);
33161 }
33162 if (omode == V16QImode)
33163 x = target;
33164 else
33165 x = gen_reg_rtx (omode);
33166 emit_insn (GEN_FCN (icode) (x, op0, op1));
33167 if (omode != V16QImode)
33168 emit_move_insn (target, gen_lowpart (V16QImode, x));
33169 return true;
33170 }
33171 }
33172
33173 if (!BYTES_BIG_ENDIAN)
33174 {
33175 altivec_expand_vec_perm_const_le (operands);
33176 return true;
33177 }
33178
33179 return false;
33180 }
33181
33182 /* Expand a Paired Single, VSX Permute Doubleword, or SPE constant permutation.
33183 Return true if we match an efficient implementation. */
33184
33185 static bool
33186 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
33187 unsigned char perm0, unsigned char perm1)
33188 {
33189 rtx x;
33190
33191 /* If both selectors come from the same operand, fold to single op. */
33192 if ((perm0 & 2) == (perm1 & 2))
33193 {
33194 if (perm0 & 2)
33195 op0 = op1;
33196 else
33197 op1 = op0;
33198 }
33199 /* If both operands are equal, fold to simpler permutation. */
33200 if (rtx_equal_p (op0, op1))
33201 {
33202 perm0 = perm0 & 1;
33203 perm1 = (perm1 & 1) + 2;
33204 }
33205 /* If the first selector comes from the second operand, swap. */
33206 else if (perm0 & 2)
33207 {
33208 if (perm1 & 2)
33209 return false;
33210 perm0 -= 2;
33211 perm1 += 2;
33212 std::swap (op0, op1);
33213 }
33214 /* If the second selector does not come from the second operand, fail. */
33215 else if ((perm1 & 2) == 0)
33216 return false;
33217
33218 /* Success! */
33219 if (target != NULL)
33220 {
33221 machine_mode vmode, dmode;
33222 rtvec v;
33223
33224 vmode = GET_MODE (target);
33225 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
33226 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4);
33227 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
33228 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
33229 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
33230 emit_insn (gen_rtx_SET (target, x));
33231 }
33232 return true;
33233 }
33234
33235 bool
33236 rs6000_expand_vec_perm_const (rtx operands[4])
33237 {
33238 rtx target, op0, op1, sel;
33239 unsigned char perm0, perm1;
33240
33241 target = operands[0];
33242 op0 = operands[1];
33243 op1 = operands[2];
33244 sel = operands[3];
33245
33246 /* Unpack the constant selector. */
33247 perm0 = INTVAL (XVECEXP (sel, 0, 0)) & 3;
33248 perm1 = INTVAL (XVECEXP (sel, 0, 1)) & 3;
33249
33250 return rs6000_expand_vec_perm_const_1 (target, op0, op1, perm0, perm1);
33251 }
33252
33253 /* Test whether a constant permutation is supported. */
33254
33255 static bool
33256 rs6000_vectorize_vec_perm_const_ok (machine_mode vmode,
33257 const unsigned char *sel)
33258 {
33259 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
33260 if (TARGET_ALTIVEC)
33261 return true;
33262
33263 /* Check for ps_merge* or evmerge* insns. */
33264 if ((TARGET_PAIRED_FLOAT && vmode == V2SFmode)
33265 || (TARGET_SPE && vmode == V2SImode))
33266 {
33267 rtx op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
33268 rtx op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
33269 return rs6000_expand_vec_perm_const_1 (NULL, op0, op1, sel[0], sel[1]);
33270 }
33271
33272 return false;
33273 }
33274
33275 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave. */
33276
33277 static void
33278 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
33279 machine_mode vmode, unsigned nelt, rtx perm[])
33280 {
33281 machine_mode imode;
33282 rtx x;
33283
33284 imode = vmode;
33285 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT)
33286 {
33287 imode = mode_for_size (GET_MODE_UNIT_BITSIZE (vmode), MODE_INT, 0);
33288 imode = mode_for_vector (imode, nelt);
33289 }
33290
33291 x = gen_rtx_CONST_VECTOR (imode, gen_rtvec_v (nelt, perm));
33292 x = expand_vec_perm (vmode, op0, op1, x, target);
33293 if (x != target)
33294 emit_move_insn (target, x);
33295 }
33296
33297 /* Expand an extract even operation. */
33298
33299 void
33300 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
33301 {
33302 machine_mode vmode = GET_MODE (target);
33303 unsigned i, nelt = GET_MODE_NUNITS (vmode);
33304 rtx perm[16];
33305
33306 for (i = 0; i < nelt; i++)
33307 perm[i] = GEN_INT (i * 2);
33308
33309 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
33310 }
33311
33312 /* Expand a vector interleave operation. */
33313
33314 void
33315 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
33316 {
33317 machine_mode vmode = GET_MODE (target);
33318 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
33319 rtx perm[16];
33320
33321 high = (highp ? 0 : nelt / 2);
33322 for (i = 0; i < nelt / 2; i++)
33323 {
33324 perm[i * 2] = GEN_INT (i + high);
33325 perm[i * 2 + 1] = GEN_INT (i + nelt + high);
33326 }
33327
33328 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
33329 }
33330
33331 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
33332 void
33333 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
33334 {
33335 HOST_WIDE_INT hwi_scale (scale);
33336 REAL_VALUE_TYPE r_pow;
33337 rtvec v = rtvec_alloc (2);
33338 rtx elt;
33339 rtx scale_vec = gen_reg_rtx (V2DFmode);
33340 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
33341 elt = const_double_from_real_value (r_pow, DFmode);
33342 RTVEC_ELT (v, 0) = elt;
33343 RTVEC_ELT (v, 1) = elt;
33344 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
33345 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
33346 }
33347
33348 /* Return an RTX representing where to find the function value of a
33349 function returning MODE. */
33350 static rtx
33351 rs6000_complex_function_value (machine_mode mode)
33352 {
33353 unsigned int regno;
33354 rtx r1, r2;
33355 machine_mode inner = GET_MODE_INNER (mode);
33356 unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode);
33357
33358 if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
33359 regno = FP_ARG_RETURN;
33360 else
33361 {
33362 regno = GP_ARG_RETURN;
33363
33364 /* 32-bit is OK since it'll go in r3/r4. */
33365 if (TARGET_32BIT && inner_bytes >= 4)
33366 return gen_rtx_REG (mode, regno);
33367 }
33368
33369 if (inner_bytes >= 8)
33370 return gen_rtx_REG (mode, regno);
33371
33372 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
33373 const0_rtx);
33374 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
33375 GEN_INT (inner_bytes));
33376 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
33377 }
33378
33379 /* Return an rtx describing a return value of MODE as a PARALLEL
33380 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
33381 stride REG_STRIDE. */
33382
33383 static rtx
33384 rs6000_parallel_return (machine_mode mode,
33385 int n_elts, machine_mode elt_mode,
33386 unsigned int regno, unsigned int reg_stride)
33387 {
33388 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
33389
33390 int i;
33391 for (i = 0; i < n_elts; i++)
33392 {
33393 rtx r = gen_rtx_REG (elt_mode, regno);
33394 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
33395 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
33396 regno += reg_stride;
33397 }
33398
33399 return par;
33400 }
33401
33402 /* Target hook for TARGET_FUNCTION_VALUE.
33403
33404 On the SPE, both FPs and vectors are returned in r3.
33405
33406 On RS/6000 an integer value is in r3 and a floating-point value is in
33407 fp1, unless -msoft-float. */
33408
33409 static rtx
33410 rs6000_function_value (const_tree valtype,
33411 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
33412 bool outgoing ATTRIBUTE_UNUSED)
33413 {
33414 machine_mode mode;
33415 unsigned int regno;
33416 machine_mode elt_mode;
33417 int n_elts;
33418
33419 /* Special handling for structs in darwin64. */
33420 if (TARGET_MACHO
33421 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
33422 {
33423 CUMULATIVE_ARGS valcum;
33424 rtx valret;
33425
33426 valcum.words = 0;
33427 valcum.fregno = FP_ARG_MIN_REG;
33428 valcum.vregno = ALTIVEC_ARG_MIN_REG;
33429 /* Do a trial code generation as if this were going to be passed as
33430 an argument; if any part goes in memory, we return NULL. */
33431 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
33432 if (valret)
33433 return valret;
33434 /* Otherwise fall through to standard ABI rules. */
33435 }
33436
33437 mode = TYPE_MODE (valtype);
33438
33439 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
33440 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
33441 {
33442 int first_reg, n_regs;
33443
33444 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode))
33445 {
33446 /* _Decimal128 must use even/odd register pairs. */
33447 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
33448 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
33449 }
33450 else
33451 {
33452 first_reg = ALTIVEC_ARG_RETURN;
33453 n_regs = 1;
33454 }
33455
33456 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
33457 }
33458
33459 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
33460 if (TARGET_32BIT && TARGET_POWERPC64)
33461 switch (mode)
33462 {
33463 default:
33464 break;
33465 case DImode:
33466 case SCmode:
33467 case DCmode:
33468 case TCmode:
33469 int count = GET_MODE_SIZE (mode) / 4;
33470 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
33471 }
33472
33473 if ((INTEGRAL_TYPE_P (valtype)
33474 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
33475 || POINTER_TYPE_P (valtype))
33476 mode = TARGET_32BIT ? SImode : DImode;
33477
33478 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
33479 /* _Decimal128 must use an even/odd register pair. */
33480 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
33481 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS
33482 && ((TARGET_SINGLE_FLOAT && (mode == SFmode)) || TARGET_DOUBLE_FLOAT))
33483 regno = FP_ARG_RETURN;
33484 else if (TREE_CODE (valtype) == COMPLEX_TYPE
33485 && targetm.calls.split_complex_arg)
33486 return rs6000_complex_function_value (mode);
33487 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
33488 return register is used in both cases, and we won't see V2DImode/V2DFmode
33489 for pure altivec, combine the two cases. */
33490 else if ((TREE_CODE (valtype) == VECTOR_TYPE || FLOAT128_VECTOR_P (mode))
33491 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
33492 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
33493 regno = ALTIVEC_ARG_RETURN;
33494 else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
33495 && (mode == DFmode || mode == DCmode
33496 || FLOAT128_IBM_P (mode) || mode == TCmode))
33497 return spe_build_register_parallel (mode, GP_ARG_RETURN);
33498 else
33499 regno = GP_ARG_RETURN;
33500
33501 return gen_rtx_REG (mode, regno);
33502 }
33503
33504 /* Define how to find the value returned by a library function
33505 assuming the value has mode MODE. */
33506 rtx
33507 rs6000_libcall_value (machine_mode mode)
33508 {
33509 unsigned int regno;
33510
33511 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
33512 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
33513 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
33514
33515 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
33516 /* _Decimal128 must use an even/odd register pair. */
33517 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
33518 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode)
33519 && TARGET_HARD_FLOAT && TARGET_FPRS
33520 && ((TARGET_SINGLE_FLOAT && mode == SFmode) || TARGET_DOUBLE_FLOAT))
33521 regno = FP_ARG_RETURN;
33522 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
33523 return register is used in both cases, and we won't see V2DImode/V2DFmode
33524 for pure altivec, combine the two cases. */
33525 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
33526 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
33527 regno = ALTIVEC_ARG_RETURN;
33528 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
33529 return rs6000_complex_function_value (mode);
33530 else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
33531 && (mode == DFmode || mode == DCmode
33532 || FLOAT128_IBM_P (mode) || mode == TCmode))
33533 return spe_build_register_parallel (mode, GP_ARG_RETURN);
33534 else
33535 regno = GP_ARG_RETURN;
33536
33537 return gen_rtx_REG (mode, regno);
33538 }
33539
33540
33541 /* Return true if we use LRA instead of reload pass. */
33542 static bool
33543 rs6000_lra_p (void)
33544 {
33545 return rs6000_lra_flag;
33546 }
33547
33548 /* Given FROM and TO register numbers, say whether this elimination is allowed.
33549 Frame pointer elimination is automatically handled.
33550
33551 For the RS/6000, if frame pointer elimination is being done, we would like
33552 to convert ap into fp, not sp.
33553
33554 We need r30 if -mminimal-toc was specified, and there are constant pool
33555 references. */
33556
33557 static bool
33558 rs6000_can_eliminate (const int from, const int to)
33559 {
33560 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
33561 ? ! frame_pointer_needed
33562 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
33563 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC || get_pool_size () == 0
33564 : true);
33565 }
33566
33567 /* Define the offset between two registers, FROM to be eliminated and its
33568 replacement TO, at the start of a routine. */
33569 HOST_WIDE_INT
33570 rs6000_initial_elimination_offset (int from, int to)
33571 {
33572 rs6000_stack_t *info = rs6000_stack_info ();
33573 HOST_WIDE_INT offset;
33574
33575 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
33576 offset = info->push_p ? 0 : -info->total_size;
33577 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
33578 {
33579 offset = info->push_p ? 0 : -info->total_size;
33580 if (FRAME_GROWS_DOWNWARD)
33581 offset += info->fixed_size + info->vars_size + info->parm_size;
33582 }
33583 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
33584 offset = FRAME_GROWS_DOWNWARD
33585 ? info->fixed_size + info->vars_size + info->parm_size
33586 : 0;
33587 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
33588 offset = info->total_size;
33589 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
33590 offset = info->push_p ? info->total_size : 0;
33591 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
33592 offset = 0;
33593 else
33594 gcc_unreachable ();
33595
33596 return offset;
33597 }
33598
33599 static rtx
33600 rs6000_dwarf_register_span (rtx reg)
33601 {
33602 rtx parts[8];
33603 int i, words;
33604 unsigned regno = REGNO (reg);
33605 machine_mode mode = GET_MODE (reg);
33606
33607 if (TARGET_SPE
33608 && regno < 32
33609 && (SPE_VECTOR_MODE (GET_MODE (reg))
33610 || (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode)
33611 && mode != SFmode && mode != SDmode && mode != SCmode)))
33612 ;
33613 else
33614 return NULL_RTX;
33615
33616 regno = REGNO (reg);
33617
33618 /* The duality of the SPE register size wreaks all kinds of havoc.
33619 This is a way of distinguishing r0 in 32-bits from r0 in
33620 64-bits. */
33621 words = (GET_MODE_SIZE (mode) + UNITS_PER_FP_WORD - 1) / UNITS_PER_FP_WORD;
33622 gcc_assert (words <= 4);
33623 for (i = 0; i < words; i++, regno++)
33624 {
33625 if (BYTES_BIG_ENDIAN)
33626 {
33627 parts[2 * i] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
33628 parts[2 * i + 1] = gen_rtx_REG (SImode, regno);
33629 }
33630 else
33631 {
33632 parts[2 * i] = gen_rtx_REG (SImode, regno);
33633 parts[2 * i + 1] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
33634 }
33635 }
33636
33637 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (words * 2, parts));
33638 }
33639
33640 /* Fill in sizes for SPE register high parts in table used by unwinder. */
33641
33642 static void
33643 rs6000_init_dwarf_reg_sizes_extra (tree address)
33644 {
33645 if (TARGET_SPE)
33646 {
33647 int i;
33648 machine_mode mode = TYPE_MODE (char_type_node);
33649 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
33650 rtx mem = gen_rtx_MEM (BLKmode, addr);
33651 rtx value = gen_int_mode (4, mode);
33652
33653 for (i = FIRST_SPE_HIGH_REGNO; i < LAST_SPE_HIGH_REGNO+1; i++)
33654 {
33655 int column = DWARF_REG_TO_UNWIND_COLUMN
33656 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
33657 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
33658
33659 emit_move_insn (adjust_address (mem, mode, offset), value);
33660 }
33661 }
33662
33663 if (TARGET_MACHO && ! TARGET_ALTIVEC)
33664 {
33665 int i;
33666 machine_mode mode = TYPE_MODE (char_type_node);
33667 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
33668 rtx mem = gen_rtx_MEM (BLKmode, addr);
33669 rtx value = gen_int_mode (16, mode);
33670
33671 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
33672 The unwinder still needs to know the size of Altivec registers. */
33673
33674 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
33675 {
33676 int column = DWARF_REG_TO_UNWIND_COLUMN
33677 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
33678 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
33679
33680 emit_move_insn (adjust_address (mem, mode, offset), value);
33681 }
33682 }
33683 }
33684
33685 /* Map internal gcc register numbers to debug format register numbers.
33686 FORMAT specifies the type of debug register number to use:
33687 0 -- debug information, except for frame-related sections
33688 1 -- DWARF .debug_frame section
33689 2 -- DWARF .eh_frame section */
33690
33691 unsigned int
33692 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
33693 {
33694 /* We never use the GCC internal number for SPE high registers.
33695 Those are mapped to the 1200..1231 range for all debug formats. */
33696 if (SPE_HIGH_REGNO_P (regno))
33697 return regno - FIRST_SPE_HIGH_REGNO + 1200;
33698
33699 /* Except for the above, we use the internal number for non-DWARF
33700 debug information, and also for .eh_frame. */
33701 if ((format == 0 && write_symbols != DWARF2_DEBUG) || format == 2)
33702 return regno;
33703
33704 /* On some platforms, we use the standard DWARF register
33705 numbering for .debug_info and .debug_frame. */
33706 #ifdef RS6000_USE_DWARF_NUMBERING
33707 if (regno <= 63)
33708 return regno;
33709 if (regno == LR_REGNO)
33710 return 108;
33711 if (regno == CTR_REGNO)
33712 return 109;
33713 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
33714 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
33715 The actual code emitted saves the whole of CR, so we map CR2_REGNO
33716 to the DWARF reg for CR. */
33717 if (format == 1 && regno == CR2_REGNO)
33718 return 64;
33719 if (CR_REGNO_P (regno))
33720 return regno - CR0_REGNO + 86;
33721 if (regno == CA_REGNO)
33722 return 101; /* XER */
33723 if (ALTIVEC_REGNO_P (regno))
33724 return regno - FIRST_ALTIVEC_REGNO + 1124;
33725 if (regno == VRSAVE_REGNO)
33726 return 356;
33727 if (regno == VSCR_REGNO)
33728 return 67;
33729 if (regno == SPE_ACC_REGNO)
33730 return 99;
33731 if (regno == SPEFSCR_REGNO)
33732 return 612;
33733 #endif
33734 return regno;
33735 }
33736
33737 /* target hook eh_return_filter_mode */
33738 static machine_mode
33739 rs6000_eh_return_filter_mode (void)
33740 {
33741 return TARGET_32BIT ? SImode : word_mode;
33742 }
33743
33744 /* Target hook for scalar_mode_supported_p. */
33745 static bool
33746 rs6000_scalar_mode_supported_p (machine_mode mode)
33747 {
33748 /* -m32 does not support TImode. This is the default, from
33749 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
33750 same ABI as for -m32. But default_scalar_mode_supported_p allows
33751 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
33752 for -mpowerpc64. */
33753 if (TARGET_32BIT && mode == TImode)
33754 return false;
33755
33756 if (DECIMAL_FLOAT_MODE_P (mode))
33757 return default_decimal_float_supported_p ();
33758 else if (TARGET_FLOAT128 && (mode == KFmode || mode == IFmode))
33759 return true;
33760 else
33761 return default_scalar_mode_supported_p (mode);
33762 }
33763
33764 /* Target hook for vector_mode_supported_p. */
33765 static bool
33766 rs6000_vector_mode_supported_p (machine_mode mode)
33767 {
33768
33769 if (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (mode))
33770 return true;
33771
33772 if (TARGET_SPE && SPE_VECTOR_MODE (mode))
33773 return true;
33774
33775 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
33776 128-bit, the compiler might try to widen IEEE 128-bit to IBM
33777 double-double. */
33778 else if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode))
33779 return true;
33780
33781 else
33782 return false;
33783 }
33784
33785 /* Target hook for c_mode_for_suffix. */
33786 static machine_mode
33787 rs6000_c_mode_for_suffix (char suffix)
33788 {
33789 if (TARGET_FLOAT128)
33790 {
33791 if (suffix == 'q' || suffix == 'Q')
33792 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
33793
33794 /* At the moment, we are not defining a suffix for IBM extended double.
33795 If/when the default for -mabi=ieeelongdouble is changed, and we want
33796 to support __ibm128 constants in legacy library code, we may need to
33797 re-evalaute this decision. Currently, c-lex.c only supports 'w' and
33798 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
33799 __float80 constants. */
33800 }
33801
33802 return VOIDmode;
33803 }
33804
33805 /* Target hook for invalid_arg_for_unprototyped_fn. */
33806 static const char *
33807 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
33808 {
33809 return (!rs6000_darwin64_abi
33810 && typelist == 0
33811 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
33812 && (funcdecl == NULL_TREE
33813 || (TREE_CODE (funcdecl) == FUNCTION_DECL
33814 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
33815 ? N_("AltiVec argument passed to unprototyped function")
33816 : NULL;
33817 }
33818
33819 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
33820 setup by using __stack_chk_fail_local hidden function instead of
33821 calling __stack_chk_fail directly. Otherwise it is better to call
33822 __stack_chk_fail directly. */
33823
33824 static tree ATTRIBUTE_UNUSED
33825 rs6000_stack_protect_fail (void)
33826 {
33827 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
33828 ? default_hidden_stack_protect_fail ()
33829 : default_external_stack_protect_fail ();
33830 }
33831
33832 void
33833 rs6000_final_prescan_insn (rtx_insn *insn, rtx *operand ATTRIBUTE_UNUSED,
33834 int num_operands ATTRIBUTE_UNUSED)
33835 {
33836 if (rs6000_warn_cell_microcode)
33837 {
33838 const char *temp;
33839 int insn_code_number = recog_memoized (insn);
33840 location_t location = INSN_LOCATION (insn);
33841
33842 /* Punt on insns we cannot recognize. */
33843 if (insn_code_number < 0)
33844 return;
33845
33846 temp = get_insn_template (insn_code_number, insn);
33847
33848 if (get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS)
33849 warning_at (location, OPT_mwarn_cell_microcode,
33850 "emitting microcode insn %s\t[%s] #%d",
33851 temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
33852 else if (get_attr_cell_micro (insn) == CELL_MICRO_CONDITIONAL)
33853 warning_at (location, OPT_mwarn_cell_microcode,
33854 "emitting conditional microcode insn %s\t[%s] #%d",
33855 temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
33856 }
33857 }
33858
33859 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
33860
33861 #if TARGET_ELF
33862 static unsigned HOST_WIDE_INT
33863 rs6000_asan_shadow_offset (void)
33864 {
33865 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
33866 }
33867 #endif
33868 \f
33869 /* Mask options that we want to support inside of attribute((target)) and
33870 #pragma GCC target operations. Note, we do not include things like
33871 64/32-bit, endianess, hard/soft floating point, etc. that would have
33872 different calling sequences. */
33873
33874 struct rs6000_opt_mask {
33875 const char *name; /* option name */
33876 HOST_WIDE_INT mask; /* mask to set */
33877 bool invert; /* invert sense of mask */
33878 bool valid_target; /* option is a target option */
33879 };
33880
33881 static struct rs6000_opt_mask const rs6000_opt_masks[] =
33882 {
33883 { "altivec", OPTION_MASK_ALTIVEC, false, true },
33884 { "cmpb", OPTION_MASK_CMPB, false, true },
33885 { "crypto", OPTION_MASK_CRYPTO, false, true },
33886 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
33887 { "dlmzb", OPTION_MASK_DLMZB, false, true },
33888 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
33889 false, true },
33890 { "float128", OPTION_MASK_FLOAT128, false, true },
33891 { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, true },
33892 { "fprnd", OPTION_MASK_FPRND, false, true },
33893 { "hard-dfp", OPTION_MASK_DFP, false, true },
33894 { "htm", OPTION_MASK_HTM, false, true },
33895 { "isel", OPTION_MASK_ISEL, false, true },
33896 { "mfcrf", OPTION_MASK_MFCRF, false, true },
33897 { "mfpgpr", OPTION_MASK_MFPGPR, false, true },
33898 { "modulo", OPTION_MASK_MODULO, false, true },
33899 { "mulhw", OPTION_MASK_MULHW, false, true },
33900 { "multiple", OPTION_MASK_MULTIPLE, false, true },
33901 { "popcntb", OPTION_MASK_POPCNTB, false, true },
33902 { "popcntd", OPTION_MASK_POPCNTD, false, true },
33903 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
33904 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
33905 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
33906 { "power9-dform", OPTION_MASK_P9_DFORM, false, true },
33907 { "power9-fusion", OPTION_MASK_P9_FUSION, false, true },
33908 { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true },
33909 { "power9-vector", OPTION_MASK_P9_VECTOR, false, true },
33910 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
33911 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
33912 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
33913 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
33914 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
33915 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
33916 { "string", OPTION_MASK_STRING, false, true },
33917 { "toc-fusion", OPTION_MASK_TOC_FUSION, false, true },
33918 { "update", OPTION_MASK_NO_UPDATE, true , true },
33919 { "upper-regs-df", OPTION_MASK_UPPER_REGS_DF, false, true },
33920 { "upper-regs-sf", OPTION_MASK_UPPER_REGS_SF, false, true },
33921 { "vsx", OPTION_MASK_VSX, false, true },
33922 { "vsx-timode", OPTION_MASK_VSX_TIMODE, false, true },
33923 #ifdef OPTION_MASK_64BIT
33924 #if TARGET_AIX_OS
33925 { "aix64", OPTION_MASK_64BIT, false, false },
33926 { "aix32", OPTION_MASK_64BIT, true, false },
33927 #else
33928 { "64", OPTION_MASK_64BIT, false, false },
33929 { "32", OPTION_MASK_64BIT, true, false },
33930 #endif
33931 #endif
33932 #ifdef OPTION_MASK_EABI
33933 { "eabi", OPTION_MASK_EABI, false, false },
33934 #endif
33935 #ifdef OPTION_MASK_LITTLE_ENDIAN
33936 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
33937 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
33938 #endif
33939 #ifdef OPTION_MASK_RELOCATABLE
33940 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
33941 #endif
33942 #ifdef OPTION_MASK_STRICT_ALIGN
33943 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
33944 #endif
33945 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
33946 { "string", OPTION_MASK_STRING, false, false },
33947 };
33948
33949 /* Builtin mask mapping for printing the flags. */
33950 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
33951 {
33952 { "altivec", RS6000_BTM_ALTIVEC, false, false },
33953 { "vsx", RS6000_BTM_VSX, false, false },
33954 { "spe", RS6000_BTM_SPE, false, false },
33955 { "paired", RS6000_BTM_PAIRED, false, false },
33956 { "fre", RS6000_BTM_FRE, false, false },
33957 { "fres", RS6000_BTM_FRES, false, false },
33958 { "frsqrte", RS6000_BTM_FRSQRTE, false, false },
33959 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
33960 { "popcntd", RS6000_BTM_POPCNTD, false, false },
33961 { "cell", RS6000_BTM_CELL, false, false },
33962 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
33963 { "crypto", RS6000_BTM_CRYPTO, false, false },
33964 { "htm", RS6000_BTM_HTM, false, false },
33965 { "hard-dfp", RS6000_BTM_DFP, false, false },
33966 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
33967 { "long-double-128", RS6000_BTM_LDBL128, false, false },
33968 };
33969
33970 /* Option variables that we want to support inside attribute((target)) and
33971 #pragma GCC target operations. */
33972
33973 struct rs6000_opt_var {
33974 const char *name; /* option name */
33975 size_t global_offset; /* offset of the option in global_options. */
33976 size_t target_offset; /* offset of the option in target options. */
33977 };
33978
33979 static struct rs6000_opt_var const rs6000_opt_vars[] =
33980 {
33981 { "friz",
33982 offsetof (struct gcc_options, x_TARGET_FRIZ),
33983 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
33984 { "avoid-indexed-addresses",
33985 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
33986 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
33987 { "paired",
33988 offsetof (struct gcc_options, x_rs6000_paired_float),
33989 offsetof (struct cl_target_option, x_rs6000_paired_float), },
33990 { "longcall",
33991 offsetof (struct gcc_options, x_rs6000_default_long_calls),
33992 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
33993 { "optimize-swaps",
33994 offsetof (struct gcc_options, x_rs6000_optimize_swaps),
33995 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
33996 { "allow-movmisalign",
33997 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
33998 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
33999 { "allow-df-permute",
34000 offsetof (struct gcc_options, x_TARGET_ALLOW_DF_PERMUTE),
34001 offsetof (struct cl_target_option, x_TARGET_ALLOW_DF_PERMUTE), },
34002 { "sched-groups",
34003 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
34004 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
34005 { "always-hint",
34006 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
34007 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
34008 { "align-branch-targets",
34009 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
34010 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
34011 { "vectorize-builtins",
34012 offsetof (struct gcc_options, x_TARGET_VECTORIZE_BUILTINS),
34013 offsetof (struct cl_target_option, x_TARGET_VECTORIZE_BUILTINS), },
34014 { "tls-markers",
34015 offsetof (struct gcc_options, x_tls_markers),
34016 offsetof (struct cl_target_option, x_tls_markers), },
34017 { "sched-prolog",
34018 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
34019 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
34020 { "sched-epilog",
34021 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
34022 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
34023 { "gen-cell-microcode",
34024 offsetof (struct gcc_options, x_rs6000_gen_cell_microcode),
34025 offsetof (struct cl_target_option, x_rs6000_gen_cell_microcode), },
34026 { "warn-cell-microcode",
34027 offsetof (struct gcc_options, x_rs6000_warn_cell_microcode),
34028 offsetof (struct cl_target_option, x_rs6000_warn_cell_microcode), },
34029 };
34030
34031 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
34032 parsing. Return true if there were no errors. */
34033
34034 static bool
34035 rs6000_inner_target_options (tree args, bool attr_p)
34036 {
34037 bool ret = true;
34038
34039 if (args == NULL_TREE)
34040 ;
34041
34042 else if (TREE_CODE (args) == STRING_CST)
34043 {
34044 char *p = ASTRDUP (TREE_STRING_POINTER (args));
34045 char *q;
34046
34047 while ((q = strtok (p, ",")) != NULL)
34048 {
34049 bool error_p = false;
34050 bool not_valid_p = false;
34051 const char *cpu_opt = NULL;
34052
34053 p = NULL;
34054 if (strncmp (q, "cpu=", 4) == 0)
34055 {
34056 int cpu_index = rs6000_cpu_name_lookup (q+4);
34057 if (cpu_index >= 0)
34058 rs6000_cpu_index = cpu_index;
34059 else
34060 {
34061 error_p = true;
34062 cpu_opt = q+4;
34063 }
34064 }
34065 else if (strncmp (q, "tune=", 5) == 0)
34066 {
34067 int tune_index = rs6000_cpu_name_lookup (q+5);
34068 if (tune_index >= 0)
34069 rs6000_tune_index = tune_index;
34070 else
34071 {
34072 error_p = true;
34073 cpu_opt = q+5;
34074 }
34075 }
34076 else
34077 {
34078 size_t i;
34079 bool invert = false;
34080 char *r = q;
34081
34082 error_p = true;
34083 if (strncmp (r, "no-", 3) == 0)
34084 {
34085 invert = true;
34086 r += 3;
34087 }
34088
34089 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
34090 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
34091 {
34092 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
34093
34094 if (!rs6000_opt_masks[i].valid_target)
34095 not_valid_p = true;
34096 else
34097 {
34098 error_p = false;
34099 rs6000_isa_flags_explicit |= mask;
34100
34101 /* VSX needs altivec, so -mvsx automagically sets
34102 altivec and disables -mavoid-indexed-addresses. */
34103 if (!invert)
34104 {
34105 if (mask == OPTION_MASK_VSX)
34106 {
34107 mask |= OPTION_MASK_ALTIVEC;
34108 TARGET_AVOID_XFORM = 0;
34109 }
34110 }
34111
34112 if (rs6000_opt_masks[i].invert)
34113 invert = !invert;
34114
34115 if (invert)
34116 rs6000_isa_flags &= ~mask;
34117 else
34118 rs6000_isa_flags |= mask;
34119 }
34120 break;
34121 }
34122
34123 if (error_p && !not_valid_p)
34124 {
34125 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
34126 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
34127 {
34128 size_t j = rs6000_opt_vars[i].global_offset;
34129 *((int *) ((char *)&global_options + j)) = !invert;
34130 error_p = false;
34131 not_valid_p = false;
34132 break;
34133 }
34134 }
34135 }
34136
34137 if (error_p)
34138 {
34139 const char *eprefix, *esuffix;
34140
34141 ret = false;
34142 if (attr_p)
34143 {
34144 eprefix = "__attribute__((__target__(";
34145 esuffix = ")))";
34146 }
34147 else
34148 {
34149 eprefix = "#pragma GCC target ";
34150 esuffix = "";
34151 }
34152
34153 if (cpu_opt)
34154 error ("invalid cpu \"%s\" for %s\"%s\"%s", cpu_opt, eprefix,
34155 q, esuffix);
34156 else if (not_valid_p)
34157 error ("%s\"%s\"%s is not allowed", eprefix, q, esuffix);
34158 else
34159 error ("%s\"%s\"%s is invalid", eprefix, q, esuffix);
34160 }
34161 }
34162 }
34163
34164 else if (TREE_CODE (args) == TREE_LIST)
34165 {
34166 do
34167 {
34168 tree value = TREE_VALUE (args);
34169 if (value)
34170 {
34171 bool ret2 = rs6000_inner_target_options (value, attr_p);
34172 if (!ret2)
34173 ret = false;
34174 }
34175 args = TREE_CHAIN (args);
34176 }
34177 while (args != NULL_TREE);
34178 }
34179
34180 else
34181 gcc_unreachable ();
34182
34183 return ret;
34184 }
34185
34186 /* Print out the target options as a list for -mdebug=target. */
34187
34188 static void
34189 rs6000_debug_target_options (tree args, const char *prefix)
34190 {
34191 if (args == NULL_TREE)
34192 fprintf (stderr, "%s<NULL>", prefix);
34193
34194 else if (TREE_CODE (args) == STRING_CST)
34195 {
34196 char *p = ASTRDUP (TREE_STRING_POINTER (args));
34197 char *q;
34198
34199 while ((q = strtok (p, ",")) != NULL)
34200 {
34201 p = NULL;
34202 fprintf (stderr, "%s\"%s\"", prefix, q);
34203 prefix = ", ";
34204 }
34205 }
34206
34207 else if (TREE_CODE (args) == TREE_LIST)
34208 {
34209 do
34210 {
34211 tree value = TREE_VALUE (args);
34212 if (value)
34213 {
34214 rs6000_debug_target_options (value, prefix);
34215 prefix = ", ";
34216 }
34217 args = TREE_CHAIN (args);
34218 }
34219 while (args != NULL_TREE);
34220 }
34221
34222 else
34223 gcc_unreachable ();
34224
34225 return;
34226 }
34227
34228 \f
34229 /* Hook to validate attribute((target("..."))). */
34230
34231 static bool
34232 rs6000_valid_attribute_p (tree fndecl,
34233 tree ARG_UNUSED (name),
34234 tree args,
34235 int flags)
34236 {
34237 struct cl_target_option cur_target;
34238 bool ret;
34239 tree old_optimize = build_optimization_node (&global_options);
34240 tree new_target, new_optimize;
34241 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
34242
34243 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
34244
34245 if (TARGET_DEBUG_TARGET)
34246 {
34247 tree tname = DECL_NAME (fndecl);
34248 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
34249 if (tname)
34250 fprintf (stderr, "function: %.*s\n",
34251 (int) IDENTIFIER_LENGTH (tname),
34252 IDENTIFIER_POINTER (tname));
34253 else
34254 fprintf (stderr, "function: unknown\n");
34255
34256 fprintf (stderr, "args:");
34257 rs6000_debug_target_options (args, " ");
34258 fprintf (stderr, "\n");
34259
34260 if (flags)
34261 fprintf (stderr, "flags: 0x%x\n", flags);
34262
34263 fprintf (stderr, "--------------------\n");
34264 }
34265
34266 old_optimize = build_optimization_node (&global_options);
34267 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
34268
34269 /* If the function changed the optimization levels as well as setting target
34270 options, start with the optimizations specified. */
34271 if (func_optimize && func_optimize != old_optimize)
34272 cl_optimization_restore (&global_options,
34273 TREE_OPTIMIZATION (func_optimize));
34274
34275 /* The target attributes may also change some optimization flags, so update
34276 the optimization options if necessary. */
34277 cl_target_option_save (&cur_target, &global_options);
34278 rs6000_cpu_index = rs6000_tune_index = -1;
34279 ret = rs6000_inner_target_options (args, true);
34280
34281 /* Set up any additional state. */
34282 if (ret)
34283 {
34284 ret = rs6000_option_override_internal (false);
34285 new_target = build_target_option_node (&global_options);
34286 }
34287 else
34288 new_target = NULL;
34289
34290 new_optimize = build_optimization_node (&global_options);
34291
34292 if (!new_target)
34293 ret = false;
34294
34295 else if (fndecl)
34296 {
34297 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
34298
34299 if (old_optimize != new_optimize)
34300 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
34301 }
34302
34303 cl_target_option_restore (&global_options, &cur_target);
34304
34305 if (old_optimize != new_optimize)
34306 cl_optimization_restore (&global_options,
34307 TREE_OPTIMIZATION (old_optimize));
34308
34309 return ret;
34310 }
34311
34312 \f
34313 /* Hook to validate the current #pragma GCC target and set the state, and
34314 update the macros based on what was changed. If ARGS is NULL, then
34315 POP_TARGET is used to reset the options. */
34316
34317 bool
34318 rs6000_pragma_target_parse (tree args, tree pop_target)
34319 {
34320 tree prev_tree = build_target_option_node (&global_options);
34321 tree cur_tree;
34322 struct cl_target_option *prev_opt, *cur_opt;
34323 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
34324 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
34325
34326 if (TARGET_DEBUG_TARGET)
34327 {
34328 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
34329 fprintf (stderr, "args:");
34330 rs6000_debug_target_options (args, " ");
34331 fprintf (stderr, "\n");
34332
34333 if (pop_target)
34334 {
34335 fprintf (stderr, "pop_target:\n");
34336 debug_tree (pop_target);
34337 }
34338 else
34339 fprintf (stderr, "pop_target: <NULL>\n");
34340
34341 fprintf (stderr, "--------------------\n");
34342 }
34343
34344 if (! args)
34345 {
34346 cur_tree = ((pop_target)
34347 ? pop_target
34348 : target_option_default_node);
34349 cl_target_option_restore (&global_options,
34350 TREE_TARGET_OPTION (cur_tree));
34351 }
34352 else
34353 {
34354 rs6000_cpu_index = rs6000_tune_index = -1;
34355 if (!rs6000_inner_target_options (args, false)
34356 || !rs6000_option_override_internal (false)
34357 || (cur_tree = build_target_option_node (&global_options))
34358 == NULL_TREE)
34359 {
34360 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
34361 fprintf (stderr, "invalid pragma\n");
34362
34363 return false;
34364 }
34365 }
34366
34367 target_option_current_node = cur_tree;
34368
34369 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
34370 change the macros that are defined. */
34371 if (rs6000_target_modify_macros_ptr)
34372 {
34373 prev_opt = TREE_TARGET_OPTION (prev_tree);
34374 prev_bumask = prev_opt->x_rs6000_builtin_mask;
34375 prev_flags = prev_opt->x_rs6000_isa_flags;
34376
34377 cur_opt = TREE_TARGET_OPTION (cur_tree);
34378 cur_flags = cur_opt->x_rs6000_isa_flags;
34379 cur_bumask = cur_opt->x_rs6000_builtin_mask;
34380
34381 diff_bumask = (prev_bumask ^ cur_bumask);
34382 diff_flags = (prev_flags ^ cur_flags);
34383
34384 if ((diff_flags != 0) || (diff_bumask != 0))
34385 {
34386 /* Delete old macros. */
34387 rs6000_target_modify_macros_ptr (false,
34388 prev_flags & diff_flags,
34389 prev_bumask & diff_bumask);
34390
34391 /* Define new macros. */
34392 rs6000_target_modify_macros_ptr (true,
34393 cur_flags & diff_flags,
34394 cur_bumask & diff_bumask);
34395 }
34396 }
34397
34398 return true;
34399 }
34400
34401 \f
34402 /* Remember the last target of rs6000_set_current_function. */
34403 static GTY(()) tree rs6000_previous_fndecl;
34404
34405 /* Establish appropriate back-end context for processing the function
34406 FNDECL. The argument might be NULL to indicate processing at top
34407 level, outside of any function scope. */
34408 static void
34409 rs6000_set_current_function (tree fndecl)
34410 {
34411 tree old_tree = (rs6000_previous_fndecl
34412 ? DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl)
34413 : NULL_TREE);
34414
34415 tree new_tree = (fndecl
34416 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
34417 : NULL_TREE);
34418
34419 if (TARGET_DEBUG_TARGET)
34420 {
34421 bool print_final = false;
34422 fprintf (stderr, "\n==================== rs6000_set_current_function");
34423
34424 if (fndecl)
34425 fprintf (stderr, ", fndecl %s (%p)",
34426 (DECL_NAME (fndecl)
34427 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
34428 : "<unknown>"), (void *)fndecl);
34429
34430 if (rs6000_previous_fndecl)
34431 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
34432
34433 fprintf (stderr, "\n");
34434 if (new_tree)
34435 {
34436 fprintf (stderr, "\nnew fndecl target specific options:\n");
34437 debug_tree (new_tree);
34438 print_final = true;
34439 }
34440
34441 if (old_tree)
34442 {
34443 fprintf (stderr, "\nold fndecl target specific options:\n");
34444 debug_tree (old_tree);
34445 print_final = true;
34446 }
34447
34448 if (print_final)
34449 fprintf (stderr, "--------------------\n");
34450 }
34451
34452 /* Only change the context if the function changes. This hook is called
34453 several times in the course of compiling a function, and we don't want to
34454 slow things down too much or call target_reinit when it isn't safe. */
34455 if (fndecl && fndecl != rs6000_previous_fndecl)
34456 {
34457 rs6000_previous_fndecl = fndecl;
34458 if (old_tree == new_tree)
34459 ;
34460
34461 else if (new_tree && new_tree != target_option_default_node)
34462 {
34463 cl_target_option_restore (&global_options,
34464 TREE_TARGET_OPTION (new_tree));
34465 if (TREE_TARGET_GLOBALS (new_tree))
34466 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
34467 else
34468 TREE_TARGET_GLOBALS (new_tree)
34469 = save_target_globals_default_opts ();
34470 }
34471
34472 else if (old_tree && old_tree != target_option_default_node)
34473 {
34474 new_tree = target_option_current_node;
34475 cl_target_option_restore (&global_options,
34476 TREE_TARGET_OPTION (new_tree));
34477 if (TREE_TARGET_GLOBALS (new_tree))
34478 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
34479 else if (new_tree == target_option_default_node)
34480 restore_target_globals (&default_target_globals);
34481 else
34482 TREE_TARGET_GLOBALS (new_tree)
34483 = save_target_globals_default_opts ();
34484 }
34485 }
34486 }
34487
34488 \f
34489 /* Save the current options */
34490
34491 static void
34492 rs6000_function_specific_save (struct cl_target_option *ptr,
34493 struct gcc_options *opts)
34494 {
34495 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
34496 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
34497 }
34498
34499 /* Restore the current options */
34500
34501 static void
34502 rs6000_function_specific_restore (struct gcc_options *opts,
34503 struct cl_target_option *ptr)
34504
34505 {
34506 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
34507 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
34508 (void) rs6000_option_override_internal (false);
34509 }
34510
34511 /* Print the current options */
34512
34513 static void
34514 rs6000_function_specific_print (FILE *file, int indent,
34515 struct cl_target_option *ptr)
34516 {
34517 rs6000_print_isa_options (file, indent, "Isa options set",
34518 ptr->x_rs6000_isa_flags);
34519
34520 rs6000_print_isa_options (file, indent, "Isa options explicit",
34521 ptr->x_rs6000_isa_flags_explicit);
34522 }
34523
34524 /* Helper function to print the current isa or misc options on a line. */
34525
34526 static void
34527 rs6000_print_options_internal (FILE *file,
34528 int indent,
34529 const char *string,
34530 HOST_WIDE_INT flags,
34531 const char *prefix,
34532 const struct rs6000_opt_mask *opts,
34533 size_t num_elements)
34534 {
34535 size_t i;
34536 size_t start_column = 0;
34537 size_t cur_column;
34538 size_t max_column = 76;
34539 const char *comma = "";
34540
34541 if (indent)
34542 start_column += fprintf (file, "%*s", indent, "");
34543
34544 if (!flags)
34545 {
34546 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
34547 return;
34548 }
34549
34550 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
34551
34552 /* Print the various mask options. */
34553 cur_column = start_column;
34554 for (i = 0; i < num_elements; i++)
34555 {
34556 if ((flags & opts[i].mask) != 0)
34557 {
34558 const char *no_str = rs6000_opt_masks[i].invert ? "no-" : "";
34559 size_t len = (strlen (comma)
34560 + strlen (prefix)
34561 + strlen (no_str)
34562 + strlen (rs6000_opt_masks[i].name));
34563
34564 cur_column += len;
34565 if (cur_column > max_column)
34566 {
34567 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
34568 cur_column = start_column + len;
34569 comma = "";
34570 }
34571
34572 fprintf (file, "%s%s%s%s", comma, prefix, no_str,
34573 rs6000_opt_masks[i].name);
34574 flags &= ~ opts[i].mask;
34575 comma = ", ";
34576 }
34577 }
34578
34579 fputs ("\n", file);
34580 }
34581
34582 /* Helper function to print the current isa options on a line. */
34583
34584 static void
34585 rs6000_print_isa_options (FILE *file, int indent, const char *string,
34586 HOST_WIDE_INT flags)
34587 {
34588 rs6000_print_options_internal (file, indent, string, flags, "-m",
34589 &rs6000_opt_masks[0],
34590 ARRAY_SIZE (rs6000_opt_masks));
34591 }
34592
34593 static void
34594 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
34595 HOST_WIDE_INT flags)
34596 {
34597 rs6000_print_options_internal (file, indent, string, flags, "",
34598 &rs6000_builtin_mask_names[0],
34599 ARRAY_SIZE (rs6000_builtin_mask_names));
34600 }
34601
34602 \f
34603 /* Hook to determine if one function can safely inline another. */
34604
34605 static bool
34606 rs6000_can_inline_p (tree caller, tree callee)
34607 {
34608 bool ret = false;
34609 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
34610 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
34611
34612 /* If callee has no option attributes, then it is ok to inline. */
34613 if (!callee_tree)
34614 ret = true;
34615
34616 /* If caller has no option attributes, but callee does then it is not ok to
34617 inline. */
34618 else if (!caller_tree)
34619 ret = false;
34620
34621 else
34622 {
34623 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
34624 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
34625
34626 /* Callee's options should a subset of the caller's, i.e. a vsx function
34627 can inline an altivec function but a non-vsx function can't inline a
34628 vsx function. */
34629 if ((caller_opts->x_rs6000_isa_flags & callee_opts->x_rs6000_isa_flags)
34630 == callee_opts->x_rs6000_isa_flags)
34631 ret = true;
34632 }
34633
34634 if (TARGET_DEBUG_TARGET)
34635 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
34636 (DECL_NAME (caller)
34637 ? IDENTIFIER_POINTER (DECL_NAME (caller))
34638 : "<unknown>"),
34639 (DECL_NAME (callee)
34640 ? IDENTIFIER_POINTER (DECL_NAME (callee))
34641 : "<unknown>"),
34642 (ret ? "can" : "cannot"));
34643
34644 return ret;
34645 }
34646 \f
34647 /* Allocate a stack temp and fixup the address so it meets the particular
34648 memory requirements (either offetable or REG+REG addressing). */
34649
34650 rtx
34651 rs6000_allocate_stack_temp (machine_mode mode,
34652 bool offsettable_p,
34653 bool reg_reg_p)
34654 {
34655 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
34656 rtx addr = XEXP (stack, 0);
34657 int strict_p = (reload_in_progress || reload_completed);
34658
34659 if (!legitimate_indirect_address_p (addr, strict_p))
34660 {
34661 if (offsettable_p
34662 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
34663 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
34664
34665 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
34666 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
34667 }
34668
34669 return stack;
34670 }
34671
34672 /* Given a memory reference, if it is not a reg or reg+reg addressing, convert
34673 to such a form to deal with memory reference instructions like STFIWX that
34674 only take reg+reg addressing. */
34675
34676 rtx
34677 rs6000_address_for_fpconvert (rtx x)
34678 {
34679 int strict_p = (reload_in_progress || reload_completed);
34680 rtx addr;
34681
34682 gcc_assert (MEM_P (x));
34683 addr = XEXP (x, 0);
34684 if (! legitimate_indirect_address_p (addr, strict_p)
34685 && ! legitimate_indexed_address_p (addr, strict_p))
34686 {
34687 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
34688 {
34689 rtx reg = XEXP (addr, 0);
34690 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
34691 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
34692 gcc_assert (REG_P (reg));
34693 emit_insn (gen_add3_insn (reg, reg, size_rtx));
34694 addr = reg;
34695 }
34696 else if (GET_CODE (addr) == PRE_MODIFY)
34697 {
34698 rtx reg = XEXP (addr, 0);
34699 rtx expr = XEXP (addr, 1);
34700 gcc_assert (REG_P (reg));
34701 gcc_assert (GET_CODE (expr) == PLUS);
34702 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
34703 addr = reg;
34704 }
34705
34706 x = replace_equiv_address (x, copy_addr_to_reg (addr));
34707 }
34708
34709 return x;
34710 }
34711
34712 /* Given a memory reference, if it is not in the form for altivec memory
34713 reference instructions (i.e. reg or reg+reg addressing with AND of -16),
34714 convert to the altivec format. */
34715
34716 rtx
34717 rs6000_address_for_altivec (rtx x)
34718 {
34719 gcc_assert (MEM_P (x));
34720 if (!altivec_indexed_or_indirect_operand (x, GET_MODE (x)))
34721 {
34722 rtx addr = XEXP (x, 0);
34723 int strict_p = (reload_in_progress || reload_completed);
34724
34725 if (!legitimate_indexed_address_p (addr, strict_p)
34726 && !legitimate_indirect_address_p (addr, strict_p))
34727 addr = copy_to_mode_reg (Pmode, addr);
34728
34729 addr = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
34730 x = change_address (x, GET_MODE (x), addr);
34731 }
34732
34733 return x;
34734 }
34735
34736 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
34737
34738 On the RS/6000, all integer constants are acceptable, most won't be valid
34739 for particular insns, though. Only easy FP constants are acceptable. */
34740
34741 static bool
34742 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
34743 {
34744 if (TARGET_ELF && tls_referenced_p (x))
34745 return false;
34746
34747 return ((GET_CODE (x) != CONST_DOUBLE && GET_CODE (x) != CONST_VECTOR)
34748 || GET_MODE (x) == VOIDmode
34749 || (TARGET_POWERPC64 && mode == DImode)
34750 || easy_fp_constant (x, mode)
34751 || easy_vector_constant (x, mode));
34752 }
34753
34754 \f
34755 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
34756
34757 static bool
34758 chain_already_loaded (rtx_insn *last)
34759 {
34760 for (; last != NULL; last = PREV_INSN (last))
34761 {
34762 if (NONJUMP_INSN_P (last))
34763 {
34764 rtx patt = PATTERN (last);
34765
34766 if (GET_CODE (patt) == SET)
34767 {
34768 rtx lhs = XEXP (patt, 0);
34769
34770 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
34771 return true;
34772 }
34773 }
34774 }
34775 return false;
34776 }
34777
34778 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
34779
34780 void
34781 rs6000_call_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
34782 {
34783 const bool direct_call_p
34784 = GET_CODE (func_desc) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (func_desc);
34785 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
34786 rtx toc_load = NULL_RTX;
34787 rtx toc_restore = NULL_RTX;
34788 rtx func_addr;
34789 rtx abi_reg = NULL_RTX;
34790 rtx call[4];
34791 int n_call;
34792 rtx insn;
34793
34794 /* Handle longcall attributes. */
34795 if (INTVAL (cookie) & CALL_LONG)
34796 func_desc = rs6000_longcall_ref (func_desc);
34797
34798 /* Handle indirect calls. */
34799 if (GET_CODE (func_desc) != SYMBOL_REF
34800 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func_desc)))
34801 {
34802 /* Save the TOC into its reserved slot before the call,
34803 and prepare to restore it after the call. */
34804 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
34805 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
34806 rtx stack_toc_mem = gen_frame_mem (Pmode,
34807 gen_rtx_PLUS (Pmode, stack_ptr,
34808 stack_toc_offset));
34809 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
34810 gen_rtvec (1, stack_toc_offset),
34811 UNSPEC_TOCSLOT);
34812 toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
34813
34814 /* Can we optimize saving the TOC in the prologue or
34815 do we need to do it at every call? */
34816 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
34817 cfun->machine->save_toc_in_prologue = true;
34818 else
34819 {
34820 MEM_VOLATILE_P (stack_toc_mem) = 1;
34821 emit_move_insn (stack_toc_mem, toc_reg);
34822 }
34823
34824 if (DEFAULT_ABI == ABI_ELFv2)
34825 {
34826 /* A function pointer in the ELFv2 ABI is just a plain address, but
34827 the ABI requires it to be loaded into r12 before the call. */
34828 func_addr = gen_rtx_REG (Pmode, 12);
34829 emit_move_insn (func_addr, func_desc);
34830 abi_reg = func_addr;
34831 }
34832 else
34833 {
34834 /* A function pointer under AIX is a pointer to a data area whose
34835 first word contains the actual address of the function, whose
34836 second word contains a pointer to its TOC, and whose third word
34837 contains a value to place in the static chain register (r11).
34838 Note that if we load the static chain, our "trampoline" need
34839 not have any executable code. */
34840
34841 /* Load up address of the actual function. */
34842 func_desc = force_reg (Pmode, func_desc);
34843 func_addr = gen_reg_rtx (Pmode);
34844 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func_desc));
34845
34846 /* Prepare to load the TOC of the called function. Note that the
34847 TOC load must happen immediately before the actual call so
34848 that unwinding the TOC registers works correctly. See the
34849 comment in frob_update_context. */
34850 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
34851 rtx func_toc_mem = gen_rtx_MEM (Pmode,
34852 gen_rtx_PLUS (Pmode, func_desc,
34853 func_toc_offset));
34854 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
34855
34856 /* If we have a static chain, load it up. But, if the call was
34857 originally direct, the 3rd word has not been written since no
34858 trampoline has been built, so we ought not to load it, lest we
34859 override a static chain value. */
34860 if (!direct_call_p
34861 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
34862 && !chain_already_loaded (get_current_sequence ()->next->last))
34863 {
34864 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
34865 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
34866 rtx func_sc_mem = gen_rtx_MEM (Pmode,
34867 gen_rtx_PLUS (Pmode, func_desc,
34868 func_sc_offset));
34869 emit_move_insn (sc_reg, func_sc_mem);
34870 abi_reg = sc_reg;
34871 }
34872 }
34873 }
34874 else
34875 {
34876 /* Direct calls use the TOC: for local calls, the callee will
34877 assume the TOC register is set; for non-local calls, the
34878 PLT stub needs the TOC register. */
34879 abi_reg = toc_reg;
34880 func_addr = func_desc;
34881 }
34882
34883 /* Create the call. */
34884 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), flag);
34885 if (value != NULL_RTX)
34886 call[0] = gen_rtx_SET (value, call[0]);
34887 n_call = 1;
34888
34889 if (toc_load)
34890 call[n_call++] = toc_load;
34891 if (toc_restore)
34892 call[n_call++] = toc_restore;
34893
34894 call[n_call++] = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
34895
34896 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
34897 insn = emit_call_insn (insn);
34898
34899 /* Mention all registers defined by the ABI to hold information
34900 as uses in CALL_INSN_FUNCTION_USAGE. */
34901 if (abi_reg)
34902 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
34903 }
34904
34905 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
34906
34907 void
34908 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
34909 {
34910 rtx call[2];
34911 rtx insn;
34912
34913 gcc_assert (INTVAL (cookie) == 0);
34914
34915 /* Create the call. */
34916 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), flag);
34917 if (value != NULL_RTX)
34918 call[0] = gen_rtx_SET (value, call[0]);
34919
34920 call[1] = simple_return_rtx;
34921
34922 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
34923 insn = emit_call_insn (insn);
34924
34925 /* Note use of the TOC register. */
34926 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, TOC_REGNUM));
34927 /* We need to also mark a use of the link register since the function we
34928 sibling-call to will use it to return to our caller. */
34929 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, LR_REGNO));
34930 }
34931
34932 /* Return whether we need to always update the saved TOC pointer when we update
34933 the stack pointer. */
34934
34935 static bool
34936 rs6000_save_toc_in_prologue_p (void)
34937 {
34938 return (cfun && cfun->machine && cfun->machine->save_toc_in_prologue);
34939 }
34940
34941 #ifdef HAVE_GAS_HIDDEN
34942 # define USE_HIDDEN_LINKONCE 1
34943 #else
34944 # define USE_HIDDEN_LINKONCE 0
34945 #endif
34946
34947 /* Fills in the label name that should be used for a 476 link stack thunk. */
34948
34949 void
34950 get_ppc476_thunk_name (char name[32])
34951 {
34952 gcc_assert (TARGET_LINK_STACK);
34953
34954 if (USE_HIDDEN_LINKONCE)
34955 sprintf (name, "__ppc476.get_thunk");
34956 else
34957 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
34958 }
34959
34960 /* This function emits the simple thunk routine that is used to preserve
34961 the link stack on the 476 cpu. */
34962
34963 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
34964 static void
34965 rs6000_code_end (void)
34966 {
34967 char name[32];
34968 tree decl;
34969
34970 if (!TARGET_LINK_STACK)
34971 return;
34972
34973 get_ppc476_thunk_name (name);
34974
34975 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
34976 build_function_type_list (void_type_node, NULL_TREE));
34977 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
34978 NULL_TREE, void_type_node);
34979 TREE_PUBLIC (decl) = 1;
34980 TREE_STATIC (decl) = 1;
34981
34982 #if RS6000_WEAK
34983 if (USE_HIDDEN_LINKONCE)
34984 {
34985 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
34986 targetm.asm_out.unique_section (decl, 0);
34987 switch_to_section (get_named_section (decl, NULL, 0));
34988 DECL_WEAK (decl) = 1;
34989 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
34990 targetm.asm_out.globalize_label (asm_out_file, name);
34991 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
34992 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
34993 }
34994 else
34995 #endif
34996 {
34997 switch_to_section (text_section);
34998 ASM_OUTPUT_LABEL (asm_out_file, name);
34999 }
35000
35001 DECL_INITIAL (decl) = make_node (BLOCK);
35002 current_function_decl = decl;
35003 allocate_struct_function (decl, false);
35004 init_function_start (decl);
35005 first_function_block_is_cold = false;
35006 /* Make sure unwind info is emitted for the thunk if needed. */
35007 final_start_function (emit_barrier (), asm_out_file, 1);
35008
35009 fputs ("\tblr\n", asm_out_file);
35010
35011 final_end_function ();
35012 init_insn_lengths ();
35013 free_after_compilation (cfun);
35014 set_cfun (NULL);
35015 current_function_decl = NULL;
35016 }
35017
35018 /* Add r30 to hard reg set if the prologue sets it up and it is not
35019 pic_offset_table_rtx. */
35020
35021 static void
35022 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
35023 {
35024 if (!TARGET_SINGLE_PIC_BASE
35025 && TARGET_TOC
35026 && TARGET_MINIMAL_TOC
35027 && get_pool_size () != 0)
35028 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
35029 if (cfun->machine->split_stack_argp_used)
35030 add_to_hard_reg_set (&set->set, Pmode, 12);
35031 }
35032
35033 \f
35034 /* Helper function for rs6000_split_logical to emit a logical instruction after
35035 spliting the operation to single GPR registers.
35036
35037 DEST is the destination register.
35038 OP1 and OP2 are the input source registers.
35039 CODE is the base operation (AND, IOR, XOR, NOT).
35040 MODE is the machine mode.
35041 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
35042 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
35043 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
35044
35045 static void
35046 rs6000_split_logical_inner (rtx dest,
35047 rtx op1,
35048 rtx op2,
35049 enum rtx_code code,
35050 machine_mode mode,
35051 bool complement_final_p,
35052 bool complement_op1_p,
35053 bool complement_op2_p)
35054 {
35055 rtx bool_rtx;
35056
35057 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
35058 if (op2 && GET_CODE (op2) == CONST_INT
35059 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
35060 && !complement_final_p && !complement_op1_p && !complement_op2_p)
35061 {
35062 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
35063 HOST_WIDE_INT value = INTVAL (op2) & mask;
35064
35065 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
35066 if (code == AND)
35067 {
35068 if (value == 0)
35069 {
35070 emit_insn (gen_rtx_SET (dest, const0_rtx));
35071 return;
35072 }
35073
35074 else if (value == mask)
35075 {
35076 if (!rtx_equal_p (dest, op1))
35077 emit_insn (gen_rtx_SET (dest, op1));
35078 return;
35079 }
35080 }
35081
35082 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
35083 into separate ORI/ORIS or XORI/XORIS instrucitons. */
35084 else if (code == IOR || code == XOR)
35085 {
35086 if (value == 0)
35087 {
35088 if (!rtx_equal_p (dest, op1))
35089 emit_insn (gen_rtx_SET (dest, op1));
35090 return;
35091 }
35092 }
35093 }
35094
35095 if (code == AND && mode == SImode
35096 && !complement_final_p && !complement_op1_p && !complement_op2_p)
35097 {
35098 emit_insn (gen_andsi3 (dest, op1, op2));
35099 return;
35100 }
35101
35102 if (complement_op1_p)
35103 op1 = gen_rtx_NOT (mode, op1);
35104
35105 if (complement_op2_p)
35106 op2 = gen_rtx_NOT (mode, op2);
35107
35108 /* For canonical RTL, if only one arm is inverted it is the first. */
35109 if (!complement_op1_p && complement_op2_p)
35110 std::swap (op1, op2);
35111
35112 bool_rtx = ((code == NOT)
35113 ? gen_rtx_NOT (mode, op1)
35114 : gen_rtx_fmt_ee (code, mode, op1, op2));
35115
35116 if (complement_final_p)
35117 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
35118
35119 emit_insn (gen_rtx_SET (dest, bool_rtx));
35120 }
35121
35122 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
35123 operations are split immediately during RTL generation to allow for more
35124 optimizations of the AND/IOR/XOR.
35125
35126 OPERANDS is an array containing the destination and two input operands.
35127 CODE is the base operation (AND, IOR, XOR, NOT).
35128 MODE is the machine mode.
35129 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
35130 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
35131 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
35132 CLOBBER_REG is either NULL or a scratch register of type CC to allow
35133 formation of the AND instructions. */
35134
35135 static void
35136 rs6000_split_logical_di (rtx operands[3],
35137 enum rtx_code code,
35138 bool complement_final_p,
35139 bool complement_op1_p,
35140 bool complement_op2_p)
35141 {
35142 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
35143 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
35144 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
35145 enum hi_lo { hi = 0, lo = 1 };
35146 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
35147 size_t i;
35148
35149 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
35150 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
35151 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
35152 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
35153
35154 if (code == NOT)
35155 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
35156 else
35157 {
35158 if (GET_CODE (operands[2]) != CONST_INT)
35159 {
35160 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
35161 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
35162 }
35163 else
35164 {
35165 HOST_WIDE_INT value = INTVAL (operands[2]);
35166 HOST_WIDE_INT value_hi_lo[2];
35167
35168 gcc_assert (!complement_final_p);
35169 gcc_assert (!complement_op1_p);
35170 gcc_assert (!complement_op2_p);
35171
35172 value_hi_lo[hi] = value >> 32;
35173 value_hi_lo[lo] = value & lower_32bits;
35174
35175 for (i = 0; i < 2; i++)
35176 {
35177 HOST_WIDE_INT sub_value = value_hi_lo[i];
35178
35179 if (sub_value & sign_bit)
35180 sub_value |= upper_32bits;
35181
35182 op2_hi_lo[i] = GEN_INT (sub_value);
35183
35184 /* If this is an AND instruction, check to see if we need to load
35185 the value in a register. */
35186 if (code == AND && sub_value != -1 && sub_value != 0
35187 && !and_operand (op2_hi_lo[i], SImode))
35188 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
35189 }
35190 }
35191 }
35192
35193 for (i = 0; i < 2; i++)
35194 {
35195 /* Split large IOR/XOR operations. */
35196 if ((code == IOR || code == XOR)
35197 && GET_CODE (op2_hi_lo[i]) == CONST_INT
35198 && !complement_final_p
35199 && !complement_op1_p
35200 && !complement_op2_p
35201 && !logical_const_operand (op2_hi_lo[i], SImode))
35202 {
35203 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
35204 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
35205 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
35206 rtx tmp = gen_reg_rtx (SImode);
35207
35208 /* Make sure the constant is sign extended. */
35209 if ((hi_16bits & sign_bit) != 0)
35210 hi_16bits |= upper_32bits;
35211
35212 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
35213 code, SImode, false, false, false);
35214
35215 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
35216 code, SImode, false, false, false);
35217 }
35218 else
35219 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
35220 code, SImode, complement_final_p,
35221 complement_op1_p, complement_op2_p);
35222 }
35223
35224 return;
35225 }
35226
35227 /* Split the insns that make up boolean operations operating on multiple GPR
35228 registers. The boolean MD patterns ensure that the inputs either are
35229 exactly the same as the output registers, or there is no overlap.
35230
35231 OPERANDS is an array containing the destination and two input operands.
35232 CODE is the base operation (AND, IOR, XOR, NOT).
35233 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
35234 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
35235 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
35236
35237 void
35238 rs6000_split_logical (rtx operands[3],
35239 enum rtx_code code,
35240 bool complement_final_p,
35241 bool complement_op1_p,
35242 bool complement_op2_p)
35243 {
35244 machine_mode mode = GET_MODE (operands[0]);
35245 machine_mode sub_mode;
35246 rtx op0, op1, op2;
35247 int sub_size, regno0, regno1, nregs, i;
35248
35249 /* If this is DImode, use the specialized version that can run before
35250 register allocation. */
35251 if (mode == DImode && !TARGET_POWERPC64)
35252 {
35253 rs6000_split_logical_di (operands, code, complement_final_p,
35254 complement_op1_p, complement_op2_p);
35255 return;
35256 }
35257
35258 op0 = operands[0];
35259 op1 = operands[1];
35260 op2 = (code == NOT) ? NULL_RTX : operands[2];
35261 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
35262 sub_size = GET_MODE_SIZE (sub_mode);
35263 regno0 = REGNO (op0);
35264 regno1 = REGNO (op1);
35265
35266 gcc_assert (reload_completed);
35267 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
35268 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
35269
35270 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
35271 gcc_assert (nregs > 1);
35272
35273 if (op2 && REG_P (op2))
35274 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
35275
35276 for (i = 0; i < nregs; i++)
35277 {
35278 int offset = i * sub_size;
35279 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
35280 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
35281 rtx sub_op2 = ((code == NOT)
35282 ? NULL_RTX
35283 : simplify_subreg (sub_mode, op2, mode, offset));
35284
35285 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
35286 complement_final_p, complement_op1_p,
35287 complement_op2_p);
35288 }
35289
35290 return;
35291 }
35292
35293 \f
35294 /* Return true if the peephole2 can combine a load involving a combination of
35295 an addis instruction and a load with an offset that can be fused together on
35296 a power8. */
35297
35298 bool
35299 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
35300 rtx addis_value, /* addis value. */
35301 rtx target, /* target register that is loaded. */
35302 rtx mem) /* bottom part of the memory addr. */
35303 {
35304 rtx addr;
35305 rtx base_reg;
35306
35307 /* Validate arguments. */
35308 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
35309 return false;
35310
35311 if (!base_reg_operand (target, GET_MODE (target)))
35312 return false;
35313
35314 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
35315 return false;
35316
35317 /* Allow sign/zero extension. */
35318 if (GET_CODE (mem) == ZERO_EXTEND
35319 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
35320 mem = XEXP (mem, 0);
35321
35322 if (!MEM_P (mem))
35323 return false;
35324
35325 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
35326 return false;
35327
35328 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
35329 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
35330 return false;
35331
35332 /* Validate that the register used to load the high value is either the
35333 register being loaded, or we can safely replace its use.
35334
35335 This function is only called from the peephole2 pass and we assume that
35336 there are 2 instructions in the peephole (addis and load), so we want to
35337 check if the target register was not used in the memory address and the
35338 register to hold the addis result is dead after the peephole. */
35339 if (REGNO (addis_reg) != REGNO (target))
35340 {
35341 if (reg_mentioned_p (target, mem))
35342 return false;
35343
35344 if (!peep2_reg_dead_p (2, addis_reg))
35345 return false;
35346
35347 /* If the target register being loaded is the stack pointer, we must
35348 avoid loading any other value into it, even temporarily. */
35349 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
35350 return false;
35351 }
35352
35353 base_reg = XEXP (addr, 0);
35354 return REGNO (addis_reg) == REGNO (base_reg);
35355 }
35356
35357 /* During the peephole2 pass, adjust and expand the insns for a load fusion
35358 sequence. We adjust the addis register to use the target register. If the
35359 load sign extends, we adjust the code to do the zero extending load, and an
35360 explicit sign extension later since the fusion only covers zero extending
35361 loads.
35362
35363 The operands are:
35364 operands[0] register set with addis (to be replaced with target)
35365 operands[1] value set via addis
35366 operands[2] target register being loaded
35367 operands[3] D-form memory reference using operands[0]. */
35368
35369 void
35370 expand_fusion_gpr_load (rtx *operands)
35371 {
35372 rtx addis_value = operands[1];
35373 rtx target = operands[2];
35374 rtx orig_mem = operands[3];
35375 rtx new_addr, new_mem, orig_addr, offset;
35376 enum rtx_code plus_or_lo_sum;
35377 machine_mode target_mode = GET_MODE (target);
35378 machine_mode extend_mode = target_mode;
35379 machine_mode ptr_mode = Pmode;
35380 enum rtx_code extend = UNKNOWN;
35381
35382 if (GET_CODE (orig_mem) == ZERO_EXTEND
35383 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
35384 {
35385 extend = GET_CODE (orig_mem);
35386 orig_mem = XEXP (orig_mem, 0);
35387 target_mode = GET_MODE (orig_mem);
35388 }
35389
35390 gcc_assert (MEM_P (orig_mem));
35391
35392 orig_addr = XEXP (orig_mem, 0);
35393 plus_or_lo_sum = GET_CODE (orig_addr);
35394 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
35395
35396 offset = XEXP (orig_addr, 1);
35397 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
35398 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
35399
35400 if (extend != UNKNOWN)
35401 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
35402
35403 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
35404 UNSPEC_FUSION_GPR);
35405 emit_insn (gen_rtx_SET (target, new_mem));
35406
35407 if (extend == SIGN_EXTEND)
35408 {
35409 int sub_off = ((BYTES_BIG_ENDIAN)
35410 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
35411 : 0);
35412 rtx sign_reg
35413 = simplify_subreg (target_mode, target, extend_mode, sub_off);
35414
35415 emit_insn (gen_rtx_SET (target,
35416 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
35417 }
35418
35419 return;
35420 }
35421
35422 /* Emit the addis instruction that will be part of a fused instruction
35423 sequence. */
35424
35425 void
35426 emit_fusion_addis (rtx target, rtx addis_value, const char *comment,
35427 const char *mode_name)
35428 {
35429 rtx fuse_ops[10];
35430 char insn_template[80];
35431 const char *addis_str = NULL;
35432 const char *comment_str = ASM_COMMENT_START;
35433
35434 if (*comment_str == ' ')
35435 comment_str++;
35436
35437 /* Emit the addis instruction. */
35438 fuse_ops[0] = target;
35439 if (satisfies_constraint_L (addis_value))
35440 {
35441 fuse_ops[1] = addis_value;
35442 addis_str = "lis %0,%v1";
35443 }
35444
35445 else if (GET_CODE (addis_value) == PLUS)
35446 {
35447 rtx op0 = XEXP (addis_value, 0);
35448 rtx op1 = XEXP (addis_value, 1);
35449
35450 if (REG_P (op0) && CONST_INT_P (op1)
35451 && satisfies_constraint_L (op1))
35452 {
35453 fuse_ops[1] = op0;
35454 fuse_ops[2] = op1;
35455 addis_str = "addis %0,%1,%v2";
35456 }
35457 }
35458
35459 else if (GET_CODE (addis_value) == HIGH)
35460 {
35461 rtx value = XEXP (addis_value, 0);
35462 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
35463 {
35464 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
35465 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
35466 if (TARGET_ELF)
35467 addis_str = "addis %0,%2,%1@toc@ha";
35468
35469 else if (TARGET_XCOFF)
35470 addis_str = "addis %0,%1@u(%2)";
35471
35472 else
35473 gcc_unreachable ();
35474 }
35475
35476 else if (GET_CODE (value) == PLUS)
35477 {
35478 rtx op0 = XEXP (value, 0);
35479 rtx op1 = XEXP (value, 1);
35480
35481 if (GET_CODE (op0) == UNSPEC
35482 && XINT (op0, 1) == UNSPEC_TOCREL
35483 && CONST_INT_P (op1))
35484 {
35485 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
35486 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
35487 fuse_ops[3] = op1;
35488 if (TARGET_ELF)
35489 addis_str = "addis %0,%2,%1+%3@toc@ha";
35490
35491 else if (TARGET_XCOFF)
35492 addis_str = "addis %0,%1+%3@u(%2)";
35493
35494 else
35495 gcc_unreachable ();
35496 }
35497 }
35498
35499 else if (satisfies_constraint_L (value))
35500 {
35501 fuse_ops[1] = value;
35502 addis_str = "lis %0,%v1";
35503 }
35504
35505 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
35506 {
35507 fuse_ops[1] = value;
35508 addis_str = "lis %0,%1@ha";
35509 }
35510 }
35511
35512 if (!addis_str)
35513 fatal_insn ("Could not generate addis value for fusion", addis_value);
35514
35515 sprintf (insn_template, "%s\t\t%s %s, type %s", addis_str, comment_str,
35516 comment, mode_name);
35517 output_asm_insn (insn_template, fuse_ops);
35518 }
35519
35520 /* Emit a D-form load or store instruction that is the second instruction
35521 of a fusion sequence. */
35522
35523 void
35524 emit_fusion_load_store (rtx load_store_reg, rtx addis_reg, rtx offset,
35525 const char *insn_str)
35526 {
35527 rtx fuse_ops[10];
35528 char insn_template[80];
35529
35530 fuse_ops[0] = load_store_reg;
35531 fuse_ops[1] = addis_reg;
35532
35533 if (CONST_INT_P (offset) && satisfies_constraint_I (offset))
35534 {
35535 sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str);
35536 fuse_ops[2] = offset;
35537 output_asm_insn (insn_template, fuse_ops);
35538 }
35539
35540 else if (GET_CODE (offset) == UNSPEC
35541 && XINT (offset, 1) == UNSPEC_TOCREL)
35542 {
35543 if (TARGET_ELF)
35544 sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str);
35545
35546 else if (TARGET_XCOFF)
35547 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
35548
35549 else
35550 gcc_unreachable ();
35551
35552 fuse_ops[2] = XVECEXP (offset, 0, 0);
35553 output_asm_insn (insn_template, fuse_ops);
35554 }
35555
35556 else if (GET_CODE (offset) == PLUS
35557 && GET_CODE (XEXP (offset, 0)) == UNSPEC
35558 && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL
35559 && CONST_INT_P (XEXP (offset, 1)))
35560 {
35561 rtx tocrel_unspec = XEXP (offset, 0);
35562 if (TARGET_ELF)
35563 sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str);
35564
35565 else if (TARGET_XCOFF)
35566 sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str);
35567
35568 else
35569 gcc_unreachable ();
35570
35571 fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0);
35572 fuse_ops[3] = XEXP (offset, 1);
35573 output_asm_insn (insn_template, fuse_ops);
35574 }
35575
35576 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset))
35577 {
35578 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
35579
35580 fuse_ops[2] = offset;
35581 output_asm_insn (insn_template, fuse_ops);
35582 }
35583
35584 else
35585 fatal_insn ("Unable to generate load/store offset for fusion", offset);
35586
35587 return;
35588 }
35589
35590 /* Wrap a TOC address that can be fused to indicate that special fusion
35591 processing is needed. */
35592
35593 rtx
35594 fusion_wrap_memory_address (rtx old_mem)
35595 {
35596 rtx old_addr = XEXP (old_mem, 0);
35597 rtvec v = gen_rtvec (1, old_addr);
35598 rtx new_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_FUSION_ADDIS);
35599 return replace_equiv_address_nv (old_mem, new_addr, false);
35600 }
35601
35602 /* Given an address, convert it into the addis and load offset parts. Addresses
35603 created during the peephole2 process look like:
35604 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
35605 (unspec [(...)] UNSPEC_TOCREL))
35606
35607 Addresses created via toc fusion look like:
35608 (unspec [(unspec [(...)] UNSPEC_TOCREL)] UNSPEC_FUSION_ADDIS)) */
35609
35610 static void
35611 fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo)
35612 {
35613 rtx hi, lo;
35614
35615 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_FUSION_ADDIS)
35616 {
35617 lo = XVECEXP (addr, 0, 0);
35618 hi = gen_rtx_HIGH (Pmode, lo);
35619 }
35620 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
35621 {
35622 hi = XEXP (addr, 0);
35623 lo = XEXP (addr, 1);
35624 }
35625 else
35626 gcc_unreachable ();
35627
35628 *p_hi = hi;
35629 *p_lo = lo;
35630 }
35631
35632 /* Return a string to fuse an addis instruction with a gpr load to the same
35633 register that we loaded up the addis instruction. The address that is used
35634 is the logical address that was formed during peephole2:
35635 (lo_sum (high) (low-part))
35636
35637 Or the address is the TOC address that is wrapped before register allocation:
35638 (unspec [(addr) (toc-reg)] UNSPEC_FUSION_ADDIS)
35639
35640 The code is complicated, so we call output_asm_insn directly, and just
35641 return "". */
35642
35643 const char *
35644 emit_fusion_gpr_load (rtx target, rtx mem)
35645 {
35646 rtx addis_value;
35647 rtx addr;
35648 rtx load_offset;
35649 const char *load_str = NULL;
35650 const char *mode_name = NULL;
35651 machine_mode mode;
35652
35653 if (GET_CODE (mem) == ZERO_EXTEND)
35654 mem = XEXP (mem, 0);
35655
35656 gcc_assert (REG_P (target) && MEM_P (mem));
35657
35658 addr = XEXP (mem, 0);
35659 fusion_split_address (addr, &addis_value, &load_offset);
35660
35661 /* Now emit the load instruction to the same register. */
35662 mode = GET_MODE (mem);
35663 switch (mode)
35664 {
35665 case QImode:
35666 mode_name = "char";
35667 load_str = "lbz";
35668 break;
35669
35670 case HImode:
35671 mode_name = "short";
35672 load_str = "lhz";
35673 break;
35674
35675 case SImode:
35676 case SFmode:
35677 mode_name = (mode == SFmode) ? "float" : "int";
35678 load_str = "lwz";
35679 break;
35680
35681 case DImode:
35682 case DFmode:
35683 gcc_assert (TARGET_POWERPC64);
35684 mode_name = (mode == DFmode) ? "double" : "long";
35685 load_str = "ld";
35686 break;
35687
35688 default:
35689 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem));
35690 }
35691
35692 /* Emit the addis instruction. */
35693 emit_fusion_addis (target, addis_value, "gpr load fusion", mode_name);
35694
35695 /* Emit the D-form load instruction. */
35696 emit_fusion_load_store (target, target, load_offset, load_str);
35697
35698 return "";
35699 }
35700 \f
35701
35702 /* Return true if the peephole2 can combine a load/store involving a
35703 combination of an addis instruction and the memory operation. This was
35704 added to the ISA 3.0 (power9) hardware. */
35705
35706 bool
35707 fusion_p9_p (rtx addis_reg, /* register set via addis. */
35708 rtx addis_value, /* addis value. */
35709 rtx dest, /* destination (memory or register). */
35710 rtx src) /* source (register or memory). */
35711 {
35712 rtx addr, mem, offset;
35713 enum machine_mode mode = GET_MODE (src);
35714
35715 /* Validate arguments. */
35716 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
35717 return false;
35718
35719 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
35720 return false;
35721
35722 /* Ignore extend operations that are part of the load. */
35723 if (GET_CODE (src) == FLOAT_EXTEND || GET_CODE (src) == ZERO_EXTEND)
35724 src = XEXP (src, 0);
35725
35726 /* Test for memory<-register or register<-memory. */
35727 if (fpr_reg_operand (src, mode) || int_reg_operand (src, mode))
35728 {
35729 if (!MEM_P (dest))
35730 return false;
35731
35732 mem = dest;
35733 }
35734
35735 else if (MEM_P (src))
35736 {
35737 if (!fpr_reg_operand (dest, mode) && !int_reg_operand (dest, mode))
35738 return false;
35739
35740 mem = src;
35741 }
35742
35743 else
35744 return false;
35745
35746 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
35747 if (GET_CODE (addr) == PLUS)
35748 {
35749 if (!rtx_equal_p (addis_reg, XEXP (addr, 0)))
35750 return false;
35751
35752 return satisfies_constraint_I (XEXP (addr, 1));
35753 }
35754
35755 else if (GET_CODE (addr) == LO_SUM)
35756 {
35757 if (!rtx_equal_p (addis_reg, XEXP (addr, 0)))
35758 return false;
35759
35760 offset = XEXP (addr, 1);
35761 if (TARGET_XCOFF || (TARGET_ELF && TARGET_POWERPC64))
35762 return small_toc_ref (offset, GET_MODE (offset));
35763
35764 else if (TARGET_ELF && !TARGET_POWERPC64)
35765 return CONSTANT_P (offset);
35766 }
35767
35768 return false;
35769 }
35770
35771 /* During the peephole2 pass, adjust and expand the insns for an extended fusion
35772 load sequence.
35773
35774 The operands are:
35775 operands[0] register set with addis
35776 operands[1] value set via addis
35777 operands[2] target register being loaded
35778 operands[3] D-form memory reference using operands[0].
35779
35780 This is similar to the fusion introduced with power8, except it scales to
35781 both loads/stores and does not require the result register to be the same as
35782 the base register. At the moment, we only do this if register set with addis
35783 is dead. */
35784
35785 void
35786 expand_fusion_p9_load (rtx *operands)
35787 {
35788 rtx tmp_reg = operands[0];
35789 rtx addis_value = operands[1];
35790 rtx target = operands[2];
35791 rtx orig_mem = operands[3];
35792 rtx new_addr, new_mem, orig_addr, offset, set, clobber, insn;
35793 enum rtx_code plus_or_lo_sum;
35794 machine_mode target_mode = GET_MODE (target);
35795 machine_mode extend_mode = target_mode;
35796 machine_mode ptr_mode = Pmode;
35797 enum rtx_code extend = UNKNOWN;
35798
35799 if (GET_CODE (orig_mem) == FLOAT_EXTEND || GET_CODE (orig_mem) == ZERO_EXTEND)
35800 {
35801 extend = GET_CODE (orig_mem);
35802 orig_mem = XEXP (orig_mem, 0);
35803 target_mode = GET_MODE (orig_mem);
35804 }
35805
35806 gcc_assert (MEM_P (orig_mem));
35807
35808 orig_addr = XEXP (orig_mem, 0);
35809 plus_or_lo_sum = GET_CODE (orig_addr);
35810 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
35811
35812 offset = XEXP (orig_addr, 1);
35813 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
35814 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
35815
35816 if (extend != UNKNOWN)
35817 new_mem = gen_rtx_fmt_e (extend, extend_mode, new_mem);
35818
35819 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
35820 UNSPEC_FUSION_P9);
35821
35822 set = gen_rtx_SET (target, new_mem);
35823 clobber = gen_rtx_CLOBBER (VOIDmode, tmp_reg);
35824 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber));
35825 emit_insn (insn);
35826
35827 return;
35828 }
35829
35830 /* During the peephole2 pass, adjust and expand the insns for an extended fusion
35831 store sequence.
35832
35833 The operands are:
35834 operands[0] register set with addis
35835 operands[1] value set via addis
35836 operands[2] target D-form memory being stored to
35837 operands[3] register being stored
35838
35839 This is similar to the fusion introduced with power8, except it scales to
35840 both loads/stores and does not require the result register to be the same as
35841 the base register. At the moment, we only do this if register set with addis
35842 is dead. */
35843
35844 void
35845 expand_fusion_p9_store (rtx *operands)
35846 {
35847 rtx tmp_reg = operands[0];
35848 rtx addis_value = operands[1];
35849 rtx orig_mem = operands[2];
35850 rtx src = operands[3];
35851 rtx new_addr, new_mem, orig_addr, offset, set, clobber, insn, new_src;
35852 enum rtx_code plus_or_lo_sum;
35853 machine_mode target_mode = GET_MODE (orig_mem);
35854 machine_mode ptr_mode = Pmode;
35855
35856 gcc_assert (MEM_P (orig_mem));
35857
35858 orig_addr = XEXP (orig_mem, 0);
35859 plus_or_lo_sum = GET_CODE (orig_addr);
35860 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
35861
35862 offset = XEXP (orig_addr, 1);
35863 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
35864 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
35865
35866 new_src = gen_rtx_UNSPEC (target_mode, gen_rtvec (1, src),
35867 UNSPEC_FUSION_P9);
35868
35869 set = gen_rtx_SET (new_mem, new_src);
35870 clobber = gen_rtx_CLOBBER (VOIDmode, tmp_reg);
35871 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber));
35872 emit_insn (insn);
35873
35874 return;
35875 }
35876
35877 /* Return a string to fuse an addis instruction with a load using extended
35878 fusion. The address that is used is the logical address that was formed
35879 during peephole2: (lo_sum (high) (low-part))
35880
35881 The code is complicated, so we call output_asm_insn directly, and just
35882 return "". */
35883
35884 const char *
35885 emit_fusion_p9_load (rtx reg, rtx mem, rtx tmp_reg)
35886 {
35887 enum machine_mode mode = GET_MODE (reg);
35888 rtx hi;
35889 rtx lo;
35890 rtx addr;
35891 const char *load_string;
35892 int r;
35893
35894 if (GET_CODE (mem) == FLOAT_EXTEND || GET_CODE (mem) == ZERO_EXTEND)
35895 {
35896 mem = XEXP (mem, 0);
35897 mode = GET_MODE (mem);
35898 }
35899
35900 if (GET_CODE (reg) == SUBREG)
35901 {
35902 gcc_assert (SUBREG_BYTE (reg) == 0);
35903 reg = SUBREG_REG (reg);
35904 }
35905
35906 if (!REG_P (reg))
35907 fatal_insn ("emit_fusion_p9_load, bad reg #1", reg);
35908
35909 r = REGNO (reg);
35910 if (FP_REGNO_P (r))
35911 {
35912 if (mode == SFmode)
35913 load_string = "lfs";
35914 else if (mode == DFmode || mode == DImode)
35915 load_string = "lfd";
35916 else
35917 gcc_unreachable ();
35918 }
35919 else if (INT_REGNO_P (r))
35920 {
35921 switch (mode)
35922 {
35923 case QImode:
35924 load_string = "lbz";
35925 break;
35926 case HImode:
35927 load_string = "lhz";
35928 break;
35929 case SImode:
35930 case SFmode:
35931 load_string = "lwz";
35932 break;
35933 case DImode:
35934 case DFmode:
35935 if (!TARGET_POWERPC64)
35936 gcc_unreachable ();
35937 load_string = "ld";
35938 break;
35939 default:
35940 gcc_unreachable ();
35941 }
35942 }
35943 else
35944 fatal_insn ("emit_fusion_p9_load, bad reg #2", reg);
35945
35946 if (!MEM_P (mem))
35947 fatal_insn ("emit_fusion_p9_load not MEM", mem);
35948
35949 addr = XEXP (mem, 0);
35950 fusion_split_address (addr, &hi, &lo);
35951
35952 /* Emit the addis instruction. */
35953 emit_fusion_addis (tmp_reg, hi, "power9 load fusion", GET_MODE_NAME (mode));
35954
35955 /* Emit the D-form load instruction. */
35956 emit_fusion_load_store (reg, tmp_reg, lo, load_string);
35957
35958 return "";
35959 }
35960
35961 /* Return a string to fuse an addis instruction with a store using extended
35962 fusion. The address that is used is the logical address that was formed
35963 during peephole2: (lo_sum (high) (low-part))
35964
35965 The code is complicated, so we call output_asm_insn directly, and just
35966 return "". */
35967
35968 const char *
35969 emit_fusion_p9_store (rtx mem, rtx reg, rtx tmp_reg)
35970 {
35971 enum machine_mode mode = GET_MODE (reg);
35972 rtx hi;
35973 rtx lo;
35974 rtx addr;
35975 const char *store_string;
35976 int r;
35977
35978 if (GET_CODE (reg) == SUBREG)
35979 {
35980 gcc_assert (SUBREG_BYTE (reg) == 0);
35981 reg = SUBREG_REG (reg);
35982 }
35983
35984 if (!REG_P (reg))
35985 fatal_insn ("emit_fusion_p9_store, bad reg #1", reg);
35986
35987 r = REGNO (reg);
35988 if (FP_REGNO_P (r))
35989 {
35990 if (mode == SFmode)
35991 store_string = "stfs";
35992 else if (mode == DFmode)
35993 store_string = "stfd";
35994 else
35995 gcc_unreachable ();
35996 }
35997 else if (INT_REGNO_P (r))
35998 {
35999 switch (mode)
36000 {
36001 case QImode:
36002 store_string = "stb";
36003 break;
36004 case HImode:
36005 store_string = "sth";
36006 break;
36007 case SImode:
36008 case SFmode:
36009 store_string = "stw";
36010 break;
36011 case DImode:
36012 case DFmode:
36013 if (!TARGET_POWERPC64)
36014 gcc_unreachable ();
36015 store_string = "std";
36016 break;
36017 default:
36018 gcc_unreachable ();
36019 }
36020 }
36021 else
36022 fatal_insn ("emit_fusion_p9_store, bad reg #2", reg);
36023
36024 if (!MEM_P (mem))
36025 fatal_insn ("emit_fusion_p9_store not MEM", mem);
36026
36027 addr = XEXP (mem, 0);
36028 fusion_split_address (addr, &hi, &lo);
36029
36030 /* Emit the addis instruction. */
36031 emit_fusion_addis (tmp_reg, hi, "power9 store fusion", GET_MODE_NAME (mode));
36032
36033 /* Emit the D-form load instruction. */
36034 emit_fusion_load_store (reg, tmp_reg, lo, store_string);
36035
36036 return "";
36037 }
36038
36039 \f
36040 /* Analyze vector computations and remove unnecessary doubleword
36041 swaps (xxswapdi instructions). This pass is performed only
36042 for little-endian VSX code generation.
36043
36044 For this specific case, loads and stores of 4x32 and 2x64 vectors
36045 are inefficient. These are implemented using the lvx2dx and
36046 stvx2dx instructions, which invert the order of doublewords in
36047 a vector register. Thus the code generation inserts an xxswapdi
36048 after each such load, and prior to each such store. (For spill
36049 code after register assignment, an additional xxswapdi is inserted
36050 following each store in order to return a hard register to its
36051 unpermuted value.)
36052
36053 The extra xxswapdi instructions reduce performance. This can be
36054 particularly bad for vectorized code. The purpose of this pass
36055 is to reduce the number of xxswapdi instructions required for
36056 correctness.
36057
36058 The primary insight is that much code that operates on vectors
36059 does not care about the relative order of elements in a register,
36060 so long as the correct memory order is preserved. If we have
36061 a computation where all input values are provided by lvxd2x/xxswapdi
36062 sequences, all outputs are stored using xxswapdi/stvxd2x sequences,
36063 and all intermediate computations are pure SIMD (independent of
36064 element order), then all the xxswapdi's associated with the loads
36065 and stores may be removed.
36066
36067 This pass uses some of the infrastructure and logical ideas from
36068 the "web" pass in web.c. We create maximal webs of computations
36069 fitting the description above using union-find. Each such web is
36070 then optimized by removing its unnecessary xxswapdi instructions.
36071
36072 The pass is placed prior to global optimization so that we can
36073 perform the optimization in the safest and simplest way possible;
36074 that is, by replacing each xxswapdi insn with a register copy insn.
36075 Subsequent forward propagation will remove copies where possible.
36076
36077 There are some operations sensitive to element order for which we
36078 can still allow the operation, provided we modify those operations.
36079 These include CONST_VECTORs, for which we must swap the first and
36080 second halves of the constant vector; and SUBREGs, for which we
36081 must adjust the byte offset to account for the swapped doublewords.
36082 A remaining opportunity would be non-immediate-form splats, for
36083 which we should adjust the selected lane of the input. We should
36084 also make code generation adjustments for sum-across operations,
36085 since this is a common vectorizer reduction.
36086
36087 Because we run prior to the first split, we can see loads and stores
36088 here that match *vsx_le_perm_{load,store}_<mode>. These are vanilla
36089 vector loads and stores that have not yet been split into a permuting
36090 load/store and a swap. (One way this can happen is with a builtin
36091 call to vec_vsx_{ld,st}.) We can handle these as well, but rather
36092 than deleting a swap, we convert the load/store into a permuting
36093 load/store (which effectively removes the swap). */
36094
36095 /* Notes on Permutes
36096
36097 We do not currently handle computations that contain permutes. There
36098 is a general transformation that can be performed correctly, but it
36099 may introduce more expensive code than it replaces. To handle these
36100 would require a cost model to determine when to perform the optimization.
36101 This commentary records how this could be done if desired.
36102
36103 The most general permute is something like this (example for V16QI):
36104
36105 (vec_select:V16QI (vec_concat:V32QI (op1:V16QI) (op2:V16QI))
36106 (parallel [(const_int a0) (const_int a1)
36107 ...
36108 (const_int a14) (const_int a15)]))
36109
36110 where a0,...,a15 are in [0,31] and select elements from op1 and op2
36111 to produce in the result.
36112
36113 Regardless of mode, we can convert the PARALLEL to a mask of 16
36114 byte-element selectors. Let's call this M, with M[i] representing
36115 the ith byte-element selector value. Then if we swap doublewords
36116 throughout the computation, we can get correct behavior by replacing
36117 M with M' as follows:
36118
36119 M'[i] = { (M[i]+8)%16 : M[i] in [0,15]
36120 { ((M[i]+8)%16)+16 : M[i] in [16,31]
36121
36122 This seems promising at first, since we are just replacing one mask
36123 with another. But certain masks are preferable to others. If M
36124 is a mask that matches a vmrghh pattern, for example, M' certainly
36125 will not. Instead of a single vmrghh, we would generate a load of
36126 M' and a vperm. So we would need to know how many xxswapd's we can
36127 remove as a result of this transformation to determine if it's
36128 profitable; and preferably the logic would need to be aware of all
36129 the special preferable masks.
36130
36131 Another form of permute is an UNSPEC_VPERM, in which the mask is
36132 already in a register. In some cases, this mask may be a constant
36133 that we can discover with ud-chains, in which case the above
36134 transformation is ok. However, the common usage here is for the
36135 mask to be produced by an UNSPEC_LVSL, in which case the mask
36136 cannot be known at compile time. In such a case we would have to
36137 generate several instructions to compute M' as above at run time,
36138 and a cost model is needed again.
36139
36140 However, when the mask M for an UNSPEC_VPERM is loaded from the
36141 constant pool, we can replace M with M' as above at no cost
36142 beyond adding a constant pool entry. */
36143
36144 /* This is based on the union-find logic in web.c. web_entry_base is
36145 defined in df.h. */
36146 class swap_web_entry : public web_entry_base
36147 {
36148 public:
36149 /* Pointer to the insn. */
36150 rtx_insn *insn;
36151 /* Set if insn contains a mention of a vector register. All other
36152 fields are undefined if this field is unset. */
36153 unsigned int is_relevant : 1;
36154 /* Set if insn is a load. */
36155 unsigned int is_load : 1;
36156 /* Set if insn is a store. */
36157 unsigned int is_store : 1;
36158 /* Set if insn is a doubleword swap. This can either be a register swap
36159 or a permuting load or store (test is_load and is_store for this). */
36160 unsigned int is_swap : 1;
36161 /* Set if the insn has a live-in use of a parameter register. */
36162 unsigned int is_live_in : 1;
36163 /* Set if the insn has a live-out def of a return register. */
36164 unsigned int is_live_out : 1;
36165 /* Set if the insn contains a subreg reference of a vector register. */
36166 unsigned int contains_subreg : 1;
36167 /* Set if the insn contains a 128-bit integer operand. */
36168 unsigned int is_128_int : 1;
36169 /* Set if this is a call-insn. */
36170 unsigned int is_call : 1;
36171 /* Set if this insn does not perform a vector operation for which
36172 element order matters, or if we know how to fix it up if it does.
36173 Undefined if is_swap is set. */
36174 unsigned int is_swappable : 1;
36175 /* A nonzero value indicates what kind of special handling for this
36176 insn is required if doublewords are swapped. Undefined if
36177 is_swappable is not set. */
36178 unsigned int special_handling : 4;
36179 /* Set if the web represented by this entry cannot be optimized. */
36180 unsigned int web_not_optimizable : 1;
36181 /* Set if this insn should be deleted. */
36182 unsigned int will_delete : 1;
36183 };
36184
36185 enum special_handling_values {
36186 SH_NONE = 0,
36187 SH_CONST_VECTOR,
36188 SH_SUBREG,
36189 SH_NOSWAP_LD,
36190 SH_NOSWAP_ST,
36191 SH_EXTRACT,
36192 SH_SPLAT,
36193 SH_XXPERMDI,
36194 SH_CONCAT,
36195 SH_VPERM
36196 };
36197
36198 /* Union INSN with all insns containing definitions that reach USE.
36199 Detect whether USE is live-in to the current function. */
36200 static void
36201 union_defs (swap_web_entry *insn_entry, rtx insn, df_ref use)
36202 {
36203 struct df_link *link = DF_REF_CHAIN (use);
36204
36205 if (!link)
36206 insn_entry[INSN_UID (insn)].is_live_in = 1;
36207
36208 while (link)
36209 {
36210 if (DF_REF_IS_ARTIFICIAL (link->ref))
36211 insn_entry[INSN_UID (insn)].is_live_in = 1;
36212
36213 if (DF_REF_INSN_INFO (link->ref))
36214 {
36215 rtx def_insn = DF_REF_INSN (link->ref);
36216 (void)unionfind_union (insn_entry + INSN_UID (insn),
36217 insn_entry + INSN_UID (def_insn));
36218 }
36219
36220 link = link->next;
36221 }
36222 }
36223
36224 /* Union INSN with all insns containing uses reached from DEF.
36225 Detect whether DEF is live-out from the current function. */
36226 static void
36227 union_uses (swap_web_entry *insn_entry, rtx insn, df_ref def)
36228 {
36229 struct df_link *link = DF_REF_CHAIN (def);
36230
36231 if (!link)
36232 insn_entry[INSN_UID (insn)].is_live_out = 1;
36233
36234 while (link)
36235 {
36236 /* This could be an eh use or some other artificial use;
36237 we treat these all the same (killing the optimization). */
36238 if (DF_REF_IS_ARTIFICIAL (link->ref))
36239 insn_entry[INSN_UID (insn)].is_live_out = 1;
36240
36241 if (DF_REF_INSN_INFO (link->ref))
36242 {
36243 rtx use_insn = DF_REF_INSN (link->ref);
36244 (void)unionfind_union (insn_entry + INSN_UID (insn),
36245 insn_entry + INSN_UID (use_insn));
36246 }
36247
36248 link = link->next;
36249 }
36250 }
36251
36252 /* Return 1 iff INSN is a load insn, including permuting loads that
36253 represent an lvxd2x instruction; else return 0. */
36254 static unsigned int
36255 insn_is_load_p (rtx insn)
36256 {
36257 rtx body = PATTERN (insn);
36258
36259 if (GET_CODE (body) == SET)
36260 {
36261 if (GET_CODE (SET_SRC (body)) == MEM)
36262 return 1;
36263
36264 if (GET_CODE (SET_SRC (body)) == VEC_SELECT
36265 && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM)
36266 return 1;
36267
36268 return 0;
36269 }
36270
36271 if (GET_CODE (body) != PARALLEL)
36272 return 0;
36273
36274 rtx set = XVECEXP (body, 0, 0);
36275
36276 if (GET_CODE (set) == SET && GET_CODE (SET_SRC (set)) == MEM)
36277 return 1;
36278
36279 return 0;
36280 }
36281
36282 /* Return 1 iff INSN is a store insn, including permuting stores that
36283 represent an stvxd2x instruction; else return 0. */
36284 static unsigned int
36285 insn_is_store_p (rtx insn)
36286 {
36287 rtx body = PATTERN (insn);
36288 if (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == MEM)
36289 return 1;
36290 if (GET_CODE (body) != PARALLEL)
36291 return 0;
36292 rtx set = XVECEXP (body, 0, 0);
36293 if (GET_CODE (set) == SET && GET_CODE (SET_DEST (set)) == MEM)
36294 return 1;
36295 return 0;
36296 }
36297
36298 /* Return 1 iff INSN swaps doublewords. This may be a reg-reg swap,
36299 a permuting load, or a permuting store. */
36300 static unsigned int
36301 insn_is_swap_p (rtx insn)
36302 {
36303 rtx body = PATTERN (insn);
36304 if (GET_CODE (body) != SET)
36305 return 0;
36306 rtx rhs = SET_SRC (body);
36307 if (GET_CODE (rhs) != VEC_SELECT)
36308 return 0;
36309 rtx parallel = XEXP (rhs, 1);
36310 if (GET_CODE (parallel) != PARALLEL)
36311 return 0;
36312 unsigned int len = XVECLEN (parallel, 0);
36313 if (len != 2 && len != 4 && len != 8 && len != 16)
36314 return 0;
36315 for (unsigned int i = 0; i < len / 2; ++i)
36316 {
36317 rtx op = XVECEXP (parallel, 0, i);
36318 if (GET_CODE (op) != CONST_INT || INTVAL (op) != len / 2 + i)
36319 return 0;
36320 }
36321 for (unsigned int i = len / 2; i < len; ++i)
36322 {
36323 rtx op = XVECEXP (parallel, 0, i);
36324 if (GET_CODE (op) != CONST_INT || INTVAL (op) != i - len / 2)
36325 return 0;
36326 }
36327 return 1;
36328 }
36329
36330 /* Return TRUE if insn is a swap fed by a load from the constant pool. */
36331 static bool
36332 const_load_sequence_p (swap_web_entry *insn_entry, rtx insn)
36333 {
36334 unsigned uid = INSN_UID (insn);
36335 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load)
36336 return false;
36337
36338 /* Find the unique use in the swap and locate its def. If the def
36339 isn't unique, punt. */
36340 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
36341 df_ref use;
36342 FOR_EACH_INSN_INFO_USE (use, insn_info)
36343 {
36344 struct df_link *def_link = DF_REF_CHAIN (use);
36345 if (!def_link || def_link->next)
36346 return false;
36347
36348 rtx def_insn = DF_REF_INSN (def_link->ref);
36349 unsigned uid2 = INSN_UID (def_insn);
36350 if (!insn_entry[uid2].is_load || !insn_entry[uid2].is_swap)
36351 return false;
36352
36353 rtx body = PATTERN (def_insn);
36354 if (GET_CODE (body) != SET
36355 || GET_CODE (SET_SRC (body)) != VEC_SELECT
36356 || GET_CODE (XEXP (SET_SRC (body), 0)) != MEM)
36357 return false;
36358
36359 rtx mem = XEXP (SET_SRC (body), 0);
36360 rtx base_reg = XEXP (mem, 0);
36361
36362 df_ref base_use;
36363 insn_info = DF_INSN_INFO_GET (def_insn);
36364 FOR_EACH_INSN_INFO_USE (base_use, insn_info)
36365 {
36366 if (!rtx_equal_p (DF_REF_REG (base_use), base_reg))
36367 continue;
36368
36369 struct df_link *base_def_link = DF_REF_CHAIN (base_use);
36370 if (!base_def_link || base_def_link->next)
36371 return false;
36372
36373 rtx tocrel_insn = DF_REF_INSN (base_def_link->ref);
36374 rtx tocrel_body = PATTERN (tocrel_insn);
36375 rtx base, offset;
36376 if (GET_CODE (tocrel_body) != SET)
36377 return false;
36378 if (!toc_relative_expr_p (SET_SRC (tocrel_body), false))
36379 return false;
36380 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
36381 if (GET_CODE (base) != SYMBOL_REF || !CONSTANT_POOL_ADDRESS_P (base))
36382 return false;
36383 }
36384 }
36385 return true;
36386 }
36387
36388 /* Return 1 iff OP is an operand that will not be affected by having
36389 vector doublewords swapped in memory. */
36390 static unsigned int
36391 rtx_is_swappable_p (rtx op, unsigned int *special)
36392 {
36393 enum rtx_code code = GET_CODE (op);
36394 int i, j;
36395 rtx parallel;
36396
36397 switch (code)
36398 {
36399 case LABEL_REF:
36400 case SYMBOL_REF:
36401 case CLOBBER:
36402 case REG:
36403 return 1;
36404
36405 case VEC_CONCAT:
36406 case ASM_INPUT:
36407 case ASM_OPERANDS:
36408 return 0;
36409
36410 case CONST_VECTOR:
36411 {
36412 *special = SH_CONST_VECTOR;
36413 return 1;
36414 }
36415
36416 case VEC_DUPLICATE:
36417 /* Opportunity: If XEXP (op, 0) has the same mode as the result,
36418 and XEXP (op, 1) is a PARALLEL with a single QImode const int,
36419 it represents a vector splat for which we can do special
36420 handling. */
36421 if (GET_CODE (XEXP (op, 0)) == CONST_INT)
36422 return 1;
36423 else if (GET_CODE (XEXP (op, 0)) == REG
36424 && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
36425 /* This catches V2DF and V2DI splat, at a minimum. */
36426 return 1;
36427 else if (GET_CODE (XEXP (op, 0)) == VEC_SELECT)
36428 /* If the duplicated item is from a select, defer to the select
36429 processing to see if we can change the lane for the splat. */
36430 return rtx_is_swappable_p (XEXP (op, 0), special);
36431 else
36432 return 0;
36433
36434 case VEC_SELECT:
36435 /* A vec_extract operation is ok if we change the lane. */
36436 if (GET_CODE (XEXP (op, 0)) == REG
36437 && GET_MODE_INNER (GET_MODE (XEXP (op, 0))) == GET_MODE (op)
36438 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
36439 && XVECLEN (parallel, 0) == 1
36440 && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT)
36441 {
36442 *special = SH_EXTRACT;
36443 return 1;
36444 }
36445 /* An XXPERMDI is ok if we adjust the lanes. Note that if the
36446 XXPERMDI is a swap operation, it will be identified by
36447 insn_is_swap_p and therefore we won't get here. */
36448 else if (GET_CODE (XEXP (op, 0)) == VEC_CONCAT
36449 && (GET_MODE (XEXP (op, 0)) == V4DFmode
36450 || GET_MODE (XEXP (op, 0)) == V4DImode)
36451 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
36452 && XVECLEN (parallel, 0) == 2
36453 && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT
36454 && GET_CODE (XVECEXP (parallel, 0, 1)) == CONST_INT)
36455 {
36456 *special = SH_XXPERMDI;
36457 return 1;
36458 }
36459 else
36460 return 0;
36461
36462 case UNSPEC:
36463 {
36464 /* Various operations are unsafe for this optimization, at least
36465 without significant additional work. Permutes are obviously
36466 problematic, as both the permute control vector and the ordering
36467 of the target values are invalidated by doubleword swapping.
36468 Vector pack and unpack modify the number of vector lanes.
36469 Merge-high/low will not operate correctly on swapped operands.
36470 Vector shifts across element boundaries are clearly uncool,
36471 as are vector select and concatenate operations. Vector
36472 sum-across instructions define one operand with a specific
36473 order-dependent element, so additional fixup code would be
36474 needed to make those work. Vector set and non-immediate-form
36475 vector splat are element-order sensitive. A few of these
36476 cases might be workable with special handling if required.
36477 Adding cost modeling would be appropriate in some cases. */
36478 int val = XINT (op, 1);
36479 switch (val)
36480 {
36481 default:
36482 break;
36483 case UNSPEC_VMRGH_DIRECT:
36484 case UNSPEC_VMRGL_DIRECT:
36485 case UNSPEC_VPACK_SIGN_SIGN_SAT:
36486 case UNSPEC_VPACK_SIGN_UNS_SAT:
36487 case UNSPEC_VPACK_UNS_UNS_MOD:
36488 case UNSPEC_VPACK_UNS_UNS_MOD_DIRECT:
36489 case UNSPEC_VPACK_UNS_UNS_SAT:
36490 case UNSPEC_VPERM:
36491 case UNSPEC_VPERM_UNS:
36492 case UNSPEC_VPERMHI:
36493 case UNSPEC_VPERMSI:
36494 case UNSPEC_VPKPX:
36495 case UNSPEC_VSLDOI:
36496 case UNSPEC_VSLO:
36497 case UNSPEC_VSRO:
36498 case UNSPEC_VSUM2SWS:
36499 case UNSPEC_VSUM4S:
36500 case UNSPEC_VSUM4UBS:
36501 case UNSPEC_VSUMSWS:
36502 case UNSPEC_VSUMSWS_DIRECT:
36503 case UNSPEC_VSX_CONCAT:
36504 case UNSPEC_VSX_SET:
36505 case UNSPEC_VSX_SLDWI:
36506 case UNSPEC_VUNPACK_HI_SIGN:
36507 case UNSPEC_VUNPACK_HI_SIGN_DIRECT:
36508 case UNSPEC_VUNPACK_LO_SIGN:
36509 case UNSPEC_VUNPACK_LO_SIGN_DIRECT:
36510 case UNSPEC_VUPKHPX:
36511 case UNSPEC_VUPKHS_V4SF:
36512 case UNSPEC_VUPKHU_V4SF:
36513 case UNSPEC_VUPKLPX:
36514 case UNSPEC_VUPKLS_V4SF:
36515 case UNSPEC_VUPKLU_V4SF:
36516 case UNSPEC_VSX_CVDPSPN:
36517 case UNSPEC_VSX_CVSPDP:
36518 case UNSPEC_VSX_CVSPDPN:
36519 return 0;
36520 case UNSPEC_VSPLT_DIRECT:
36521 *special = SH_SPLAT;
36522 return 1;
36523 }
36524 }
36525
36526 default:
36527 break;
36528 }
36529
36530 const char *fmt = GET_RTX_FORMAT (code);
36531 int ok = 1;
36532
36533 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
36534 if (fmt[i] == 'e' || fmt[i] == 'u')
36535 {
36536 unsigned int special_op = SH_NONE;
36537 ok &= rtx_is_swappable_p (XEXP (op, i), &special_op);
36538 if (special_op == SH_NONE)
36539 continue;
36540 /* Ensure we never have two kinds of special handling
36541 for the same insn. */
36542 if (*special != SH_NONE && *special != special_op)
36543 return 0;
36544 *special = special_op;
36545 }
36546 else if (fmt[i] == 'E')
36547 for (j = 0; j < XVECLEN (op, i); ++j)
36548 {
36549 unsigned int special_op = SH_NONE;
36550 ok &= rtx_is_swappable_p (XVECEXP (op, i, j), &special_op);
36551 if (special_op == SH_NONE)
36552 continue;
36553 /* Ensure we never have two kinds of special handling
36554 for the same insn. */
36555 if (*special != SH_NONE && *special != special_op)
36556 return 0;
36557 *special = special_op;
36558 }
36559
36560 return ok;
36561 }
36562
36563 /* Return 1 iff INSN is an operand that will not be affected by
36564 having vector doublewords swapped in memory (in which case
36565 *SPECIAL is unchanged), or that can be modified to be correct
36566 if vector doublewords are swapped in memory (in which case
36567 *SPECIAL is changed to a value indicating how). */
36568 static unsigned int
36569 insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn,
36570 unsigned int *special)
36571 {
36572 /* Calls are always bad. */
36573 if (GET_CODE (insn) == CALL_INSN)
36574 return 0;
36575
36576 /* Loads and stores seen here are not permuting, but we can still
36577 fix them up by converting them to permuting ones. Exceptions:
36578 UNSPEC_LVE, UNSPEC_LVX, and UNSPEC_STVX, which have a PARALLEL
36579 body instead of a SET; and UNSPEC_STVE, which has an UNSPEC
36580 for the SET source. */
36581 rtx body = PATTERN (insn);
36582 int i = INSN_UID (insn);
36583
36584 if (insn_entry[i].is_load)
36585 {
36586 if (GET_CODE (body) == SET)
36587 {
36588 *special = SH_NOSWAP_LD;
36589 return 1;
36590 }
36591 else
36592 return 0;
36593 }
36594
36595 if (insn_entry[i].is_store)
36596 {
36597 if (GET_CODE (body) == SET && GET_CODE (SET_SRC (body)) != UNSPEC)
36598 {
36599 *special = SH_NOSWAP_ST;
36600 return 1;
36601 }
36602 else
36603 return 0;
36604 }
36605
36606 /* A convert to single precision can be left as is provided that
36607 all of its uses are in xxspltw instructions that splat BE element
36608 zero. */
36609 if (GET_CODE (body) == SET
36610 && GET_CODE (SET_SRC (body)) == UNSPEC
36611 && XINT (SET_SRC (body), 1) == UNSPEC_VSX_CVDPSPN)
36612 {
36613 df_ref def;
36614 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
36615
36616 FOR_EACH_INSN_INFO_DEF (def, insn_info)
36617 {
36618 struct df_link *link = DF_REF_CHAIN (def);
36619 if (!link)
36620 return 0;
36621
36622 for (; link; link = link->next) {
36623 rtx use_insn = DF_REF_INSN (link->ref);
36624 rtx use_body = PATTERN (use_insn);
36625 if (GET_CODE (use_body) != SET
36626 || GET_CODE (SET_SRC (use_body)) != UNSPEC
36627 || XINT (SET_SRC (use_body), 1) != UNSPEC_VSX_XXSPLTW
36628 || XEXP (XEXP (SET_SRC (use_body), 0), 1) != const0_rtx)
36629 return 0;
36630 }
36631 }
36632
36633 return 1;
36634 }
36635
36636 /* A concatenation of two doublewords is ok if we reverse the
36637 order of the inputs. */
36638 if (GET_CODE (body) == SET
36639 && GET_CODE (SET_SRC (body)) == VEC_CONCAT
36640 && (GET_MODE (SET_SRC (body)) == V2DFmode
36641 || GET_MODE (SET_SRC (body)) == V2DImode))
36642 {
36643 *special = SH_CONCAT;
36644 return 1;
36645 }
36646
36647 /* An UNSPEC_VPERM is ok if the mask operand is loaded from the
36648 constant pool. */
36649 if (GET_CODE (body) == SET
36650 && GET_CODE (SET_SRC (body)) == UNSPEC
36651 && XINT (SET_SRC (body), 1) == UNSPEC_VPERM
36652 && XVECLEN (SET_SRC (body), 0) == 3
36653 && GET_CODE (XVECEXP (SET_SRC (body), 0, 2)) == REG)
36654 {
36655 rtx mask_reg = XVECEXP (SET_SRC (body), 0, 2);
36656 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
36657 df_ref use;
36658 FOR_EACH_INSN_INFO_USE (use, insn_info)
36659 if (rtx_equal_p (DF_REF_REG (use), mask_reg))
36660 {
36661 struct df_link *def_link = DF_REF_CHAIN (use);
36662 /* Punt if multiple definitions for this reg. */
36663 if (def_link && !def_link->next &&
36664 const_load_sequence_p (insn_entry,
36665 DF_REF_INSN (def_link->ref)))
36666 {
36667 *special = SH_VPERM;
36668 return 1;
36669 }
36670 }
36671 }
36672
36673 /* Otherwise check the operands for vector lane violations. */
36674 return rtx_is_swappable_p (body, special);
36675 }
36676
36677 enum chain_purpose { FOR_LOADS, FOR_STORES };
36678
36679 /* Return true if the UD or DU chain headed by LINK is non-empty,
36680 and every entry on the chain references an insn that is a
36681 register swap. Furthermore, if PURPOSE is FOR_LOADS, each such
36682 register swap must have only permuting loads as reaching defs.
36683 If PURPOSE is FOR_STORES, each such register swap must have only
36684 register swaps or permuting stores as reached uses. */
36685 static bool
36686 chain_contains_only_swaps (swap_web_entry *insn_entry, struct df_link *link,
36687 enum chain_purpose purpose)
36688 {
36689 if (!link)
36690 return false;
36691
36692 for (; link; link = link->next)
36693 {
36694 if (!ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (DF_REF_REG (link->ref))))
36695 continue;
36696
36697 if (DF_REF_IS_ARTIFICIAL (link->ref))
36698 return false;
36699
36700 rtx reached_insn = DF_REF_INSN (link->ref);
36701 unsigned uid = INSN_UID (reached_insn);
36702 struct df_insn_info *insn_info = DF_INSN_INFO_GET (reached_insn);
36703
36704 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load
36705 || insn_entry[uid].is_store)
36706 return false;
36707
36708 if (purpose == FOR_LOADS)
36709 {
36710 df_ref use;
36711 FOR_EACH_INSN_INFO_USE (use, insn_info)
36712 {
36713 struct df_link *swap_link = DF_REF_CHAIN (use);
36714
36715 while (swap_link)
36716 {
36717 if (DF_REF_IS_ARTIFICIAL (link->ref))
36718 return false;
36719
36720 rtx swap_def_insn = DF_REF_INSN (swap_link->ref);
36721 unsigned uid2 = INSN_UID (swap_def_insn);
36722
36723 /* Only permuting loads are allowed. */
36724 if (!insn_entry[uid2].is_swap || !insn_entry[uid2].is_load)
36725 return false;
36726
36727 swap_link = swap_link->next;
36728 }
36729 }
36730 }
36731 else if (purpose == FOR_STORES)
36732 {
36733 df_ref def;
36734 FOR_EACH_INSN_INFO_DEF (def, insn_info)
36735 {
36736 struct df_link *swap_link = DF_REF_CHAIN (def);
36737
36738 while (swap_link)
36739 {
36740 if (DF_REF_IS_ARTIFICIAL (link->ref))
36741 return false;
36742
36743 rtx swap_use_insn = DF_REF_INSN (swap_link->ref);
36744 unsigned uid2 = INSN_UID (swap_use_insn);
36745
36746 /* Permuting stores or register swaps are allowed. */
36747 if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load)
36748 return false;
36749
36750 swap_link = swap_link->next;
36751 }
36752 }
36753 }
36754 }
36755
36756 return true;
36757 }
36758
36759 /* Mark the xxswapdi instructions associated with permuting loads and
36760 stores for removal. Note that we only flag them for deletion here,
36761 as there is a possibility of a swap being reached from multiple
36762 loads, etc. */
36763 static void
36764 mark_swaps_for_removal (swap_web_entry *insn_entry, unsigned int i)
36765 {
36766 rtx insn = insn_entry[i].insn;
36767 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
36768
36769 if (insn_entry[i].is_load)
36770 {
36771 df_ref def;
36772 FOR_EACH_INSN_INFO_DEF (def, insn_info)
36773 {
36774 struct df_link *link = DF_REF_CHAIN (def);
36775
36776 /* We know by now that these are swaps, so we can delete
36777 them confidently. */
36778 while (link)
36779 {
36780 rtx use_insn = DF_REF_INSN (link->ref);
36781 insn_entry[INSN_UID (use_insn)].will_delete = 1;
36782 link = link->next;
36783 }
36784 }
36785 }
36786 else if (insn_entry[i].is_store)
36787 {
36788 df_ref use;
36789 FOR_EACH_INSN_INFO_USE (use, insn_info)
36790 {
36791 /* Ignore uses for addressability. */
36792 machine_mode mode = GET_MODE (DF_REF_REG (use));
36793 if (!ALTIVEC_OR_VSX_VECTOR_MODE (mode))
36794 continue;
36795
36796 struct df_link *link = DF_REF_CHAIN (use);
36797
36798 /* We know by now that these are swaps, so we can delete
36799 them confidently. */
36800 while (link)
36801 {
36802 rtx def_insn = DF_REF_INSN (link->ref);
36803 insn_entry[INSN_UID (def_insn)].will_delete = 1;
36804 link = link->next;
36805 }
36806 }
36807 }
36808 }
36809
36810 /* OP is either a CONST_VECTOR or an expression containing one.
36811 Swap the first half of the vector with the second in the first
36812 case. Recurse to find it in the second. */
36813 static void
36814 swap_const_vector_halves (rtx op)
36815 {
36816 int i;
36817 enum rtx_code code = GET_CODE (op);
36818 if (GET_CODE (op) == CONST_VECTOR)
36819 {
36820 int half_units = GET_MODE_NUNITS (GET_MODE (op)) / 2;
36821 for (i = 0; i < half_units; ++i)
36822 {
36823 rtx temp = CONST_VECTOR_ELT (op, i);
36824 CONST_VECTOR_ELT (op, i) = CONST_VECTOR_ELT (op, i + half_units);
36825 CONST_VECTOR_ELT (op, i + half_units) = temp;
36826 }
36827 }
36828 else
36829 {
36830 int j;
36831 const char *fmt = GET_RTX_FORMAT (code);
36832 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
36833 if (fmt[i] == 'e' || fmt[i] == 'u')
36834 swap_const_vector_halves (XEXP (op, i));
36835 else if (fmt[i] == 'E')
36836 for (j = 0; j < XVECLEN (op, i); ++j)
36837 swap_const_vector_halves (XVECEXP (op, i, j));
36838 }
36839 }
36840
36841 /* Find all subregs of a vector expression that perform a narrowing,
36842 and adjust the subreg index to account for doubleword swapping. */
36843 static void
36844 adjust_subreg_index (rtx op)
36845 {
36846 enum rtx_code code = GET_CODE (op);
36847 if (code == SUBREG
36848 && (GET_MODE_SIZE (GET_MODE (op))
36849 < GET_MODE_SIZE (GET_MODE (XEXP (op, 0)))))
36850 {
36851 unsigned int index = SUBREG_BYTE (op);
36852 if (index < 8)
36853 index += 8;
36854 else
36855 index -= 8;
36856 SUBREG_BYTE (op) = index;
36857 }
36858
36859 const char *fmt = GET_RTX_FORMAT (code);
36860 int i,j;
36861 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
36862 if (fmt[i] == 'e' || fmt[i] == 'u')
36863 adjust_subreg_index (XEXP (op, i));
36864 else if (fmt[i] == 'E')
36865 for (j = 0; j < XVECLEN (op, i); ++j)
36866 adjust_subreg_index (XVECEXP (op, i, j));
36867 }
36868
36869 /* Convert the non-permuting load INSN to a permuting one. */
36870 static void
36871 permute_load (rtx_insn *insn)
36872 {
36873 rtx body = PATTERN (insn);
36874 rtx mem_op = SET_SRC (body);
36875 rtx tgt_reg = SET_DEST (body);
36876 machine_mode mode = GET_MODE (tgt_reg);
36877 int n_elts = GET_MODE_NUNITS (mode);
36878 int half_elts = n_elts / 2;
36879 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
36880 int i, j;
36881 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
36882 XVECEXP (par, 0, i) = GEN_INT (j);
36883 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
36884 XVECEXP (par, 0, i) = GEN_INT (j);
36885 rtx sel = gen_rtx_VEC_SELECT (mode, mem_op, par);
36886 SET_SRC (body) = sel;
36887 INSN_CODE (insn) = -1; /* Force re-recognition. */
36888 df_insn_rescan (insn);
36889
36890 if (dump_file)
36891 fprintf (dump_file, "Replacing load %d with permuted load\n",
36892 INSN_UID (insn));
36893 }
36894
36895 /* Convert the non-permuting store INSN to a permuting one. */
36896 static void
36897 permute_store (rtx_insn *insn)
36898 {
36899 rtx body = PATTERN (insn);
36900 rtx src_reg = SET_SRC (body);
36901 machine_mode mode = GET_MODE (src_reg);
36902 int n_elts = GET_MODE_NUNITS (mode);
36903 int half_elts = n_elts / 2;
36904 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
36905 int i, j;
36906 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
36907 XVECEXP (par, 0, i) = GEN_INT (j);
36908 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
36909 XVECEXP (par, 0, i) = GEN_INT (j);
36910 rtx sel = gen_rtx_VEC_SELECT (mode, src_reg, par);
36911 SET_SRC (body) = sel;
36912 INSN_CODE (insn) = -1; /* Force re-recognition. */
36913 df_insn_rescan (insn);
36914
36915 if (dump_file)
36916 fprintf (dump_file, "Replacing store %d with permuted store\n",
36917 INSN_UID (insn));
36918 }
36919
36920 /* Given OP that contains a vector extract operation, adjust the index
36921 of the extracted lane to account for the doubleword swap. */
36922 static void
36923 adjust_extract (rtx_insn *insn)
36924 {
36925 rtx pattern = PATTERN (insn);
36926 if (GET_CODE (pattern) == PARALLEL)
36927 pattern = XVECEXP (pattern, 0, 0);
36928 rtx src = SET_SRC (pattern);
36929 /* The vec_select may be wrapped in a vec_duplicate for a splat, so
36930 account for that. */
36931 rtx sel = GET_CODE (src) == VEC_DUPLICATE ? XEXP (src, 0) : src;
36932 rtx par = XEXP (sel, 1);
36933 int half_elts = GET_MODE_NUNITS (GET_MODE (XEXP (sel, 0))) >> 1;
36934 int lane = INTVAL (XVECEXP (par, 0, 0));
36935 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
36936 XVECEXP (par, 0, 0) = GEN_INT (lane);
36937 INSN_CODE (insn) = -1; /* Force re-recognition. */
36938 df_insn_rescan (insn);
36939
36940 if (dump_file)
36941 fprintf (dump_file, "Changing lane for extract %d\n", INSN_UID (insn));
36942 }
36943
36944 /* Given OP that contains a vector direct-splat operation, adjust the index
36945 of the source lane to account for the doubleword swap. */
36946 static void
36947 adjust_splat (rtx_insn *insn)
36948 {
36949 rtx body = PATTERN (insn);
36950 rtx unspec = XEXP (body, 1);
36951 int half_elts = GET_MODE_NUNITS (GET_MODE (unspec)) >> 1;
36952 int lane = INTVAL (XVECEXP (unspec, 0, 1));
36953 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
36954 XVECEXP (unspec, 0, 1) = GEN_INT (lane);
36955 INSN_CODE (insn) = -1; /* Force re-recognition. */
36956 df_insn_rescan (insn);
36957
36958 if (dump_file)
36959 fprintf (dump_file, "Changing lane for splat %d\n", INSN_UID (insn));
36960 }
36961
36962 /* Given OP that contains an XXPERMDI operation (that is not a doubleword
36963 swap), reverse the order of the source operands and adjust the indices
36964 of the source lanes to account for doubleword reversal. */
36965 static void
36966 adjust_xxpermdi (rtx_insn *insn)
36967 {
36968 rtx set = PATTERN (insn);
36969 rtx select = XEXP (set, 1);
36970 rtx concat = XEXP (select, 0);
36971 rtx src0 = XEXP (concat, 0);
36972 XEXP (concat, 0) = XEXP (concat, 1);
36973 XEXP (concat, 1) = src0;
36974 rtx parallel = XEXP (select, 1);
36975 int lane0 = INTVAL (XVECEXP (parallel, 0, 0));
36976 int lane1 = INTVAL (XVECEXP (parallel, 0, 1));
36977 int new_lane0 = 3 - lane1;
36978 int new_lane1 = 3 - lane0;
36979 XVECEXP (parallel, 0, 0) = GEN_INT (new_lane0);
36980 XVECEXP (parallel, 0, 1) = GEN_INT (new_lane1);
36981 INSN_CODE (insn) = -1; /* Force re-recognition. */
36982 df_insn_rescan (insn);
36983
36984 if (dump_file)
36985 fprintf (dump_file, "Changing lanes for xxpermdi %d\n", INSN_UID (insn));
36986 }
36987
36988 /* Given OP that contains a VEC_CONCAT operation of two doublewords,
36989 reverse the order of those inputs. */
36990 static void
36991 adjust_concat (rtx_insn *insn)
36992 {
36993 rtx set = PATTERN (insn);
36994 rtx concat = XEXP (set, 1);
36995 rtx src0 = XEXP (concat, 0);
36996 XEXP (concat, 0) = XEXP (concat, 1);
36997 XEXP (concat, 1) = src0;
36998 INSN_CODE (insn) = -1; /* Force re-recognition. */
36999 df_insn_rescan (insn);
37000
37001 if (dump_file)
37002 fprintf (dump_file, "Reversing inputs for concat %d\n", INSN_UID (insn));
37003 }
37004
37005 /* Given an UNSPEC_VPERM insn, modify the mask loaded from the
37006 constant pool to reflect swapped doublewords. */
37007 static void
37008 adjust_vperm (rtx_insn *insn)
37009 {
37010 /* We previously determined that the UNSPEC_VPERM was fed by a
37011 swap of a swapping load of a TOC-relative constant pool symbol.
37012 Find the MEM in the swapping load and replace it with a MEM for
37013 the adjusted mask constant. */
37014 rtx set = PATTERN (insn);
37015 rtx mask_reg = XVECEXP (SET_SRC (set), 0, 2);
37016
37017 /* Find the swap. */
37018 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
37019 df_ref use;
37020 rtx_insn *swap_insn = 0;
37021 FOR_EACH_INSN_INFO_USE (use, insn_info)
37022 if (rtx_equal_p (DF_REF_REG (use), mask_reg))
37023 {
37024 struct df_link *def_link = DF_REF_CHAIN (use);
37025 gcc_assert (def_link && !def_link->next);
37026 swap_insn = DF_REF_INSN (def_link->ref);
37027 break;
37028 }
37029 gcc_assert (swap_insn);
37030
37031 /* Find the load. */
37032 insn_info = DF_INSN_INFO_GET (swap_insn);
37033 rtx_insn *load_insn = 0;
37034 FOR_EACH_INSN_INFO_USE (use, insn_info)
37035 {
37036 struct df_link *def_link = DF_REF_CHAIN (use);
37037 gcc_assert (def_link && !def_link->next);
37038 load_insn = DF_REF_INSN (def_link->ref);
37039 break;
37040 }
37041 gcc_assert (load_insn);
37042
37043 /* Find the TOC-relative symbol access. */
37044 insn_info = DF_INSN_INFO_GET (load_insn);
37045 rtx_insn *tocrel_insn = 0;
37046 FOR_EACH_INSN_INFO_USE (use, insn_info)
37047 {
37048 struct df_link *def_link = DF_REF_CHAIN (use);
37049 gcc_assert (def_link && !def_link->next);
37050 tocrel_insn = DF_REF_INSN (def_link->ref);
37051 break;
37052 }
37053 gcc_assert (tocrel_insn);
37054
37055 /* Find the embedded CONST_VECTOR. We have to call toc_relative_expr_p
37056 to set tocrel_base; otherwise it would be unnecessary as we've
37057 already established it will return true. */
37058 rtx base, offset;
37059 if (!toc_relative_expr_p (SET_SRC (PATTERN (tocrel_insn)), false))
37060 gcc_unreachable ();
37061 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
37062 rtx const_vector = get_pool_constant (base);
37063 gcc_assert (GET_CODE (const_vector) == CONST_VECTOR);
37064
37065 /* Create an adjusted mask from the initial mask. */
37066 unsigned int new_mask[16], i, val;
37067 for (i = 0; i < 16; ++i) {
37068 val = INTVAL (XVECEXP (const_vector, 0, i));
37069 if (val < 16)
37070 new_mask[i] = (val + 8) % 16;
37071 else
37072 new_mask[i] = ((val + 8) % 16) + 16;
37073 }
37074
37075 /* Create a new CONST_VECTOR and a MEM that references it. */
37076 rtx vals = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
37077 for (i = 0; i < 16; ++i)
37078 XVECEXP (vals, 0, i) = GEN_INT (new_mask[i]);
37079 rtx new_const_vector = gen_rtx_CONST_VECTOR (V16QImode, XVEC (vals, 0));
37080 rtx new_mem = force_const_mem (V16QImode, new_const_vector);
37081 /* This gives us a MEM whose base operand is a SYMBOL_REF, which we
37082 can't recognize. Force the SYMBOL_REF into a register. */
37083 if (!REG_P (XEXP (new_mem, 0))) {
37084 rtx base_reg = force_reg (Pmode, XEXP (new_mem, 0));
37085 XEXP (new_mem, 0) = base_reg;
37086 /* Move the newly created insn ahead of the load insn. */
37087 rtx_insn *force_insn = get_last_insn ();
37088 remove_insn (force_insn);
37089 rtx_insn *before_load_insn = PREV_INSN (load_insn);
37090 add_insn_after (force_insn, before_load_insn, BLOCK_FOR_INSN (load_insn));
37091 df_insn_rescan (before_load_insn);
37092 df_insn_rescan (force_insn);
37093 }
37094
37095 /* Replace the MEM in the load instruction and rescan it. */
37096 XEXP (SET_SRC (PATTERN (load_insn)), 0) = new_mem;
37097 INSN_CODE (load_insn) = -1; /* Force re-recognition. */
37098 df_insn_rescan (load_insn);
37099
37100 if (dump_file)
37101 fprintf (dump_file, "Adjusting mask for vperm %d\n", INSN_UID (insn));
37102 }
37103
37104 /* The insn described by INSN_ENTRY[I] can be swapped, but only
37105 with special handling. Take care of that here. */
37106 static void
37107 handle_special_swappables (swap_web_entry *insn_entry, unsigned i)
37108 {
37109 rtx_insn *insn = insn_entry[i].insn;
37110 rtx body = PATTERN (insn);
37111
37112 switch (insn_entry[i].special_handling)
37113 {
37114 default:
37115 gcc_unreachable ();
37116 case SH_CONST_VECTOR:
37117 {
37118 /* A CONST_VECTOR will only show up somewhere in the RHS of a SET. */
37119 gcc_assert (GET_CODE (body) == SET);
37120 rtx rhs = SET_SRC (body);
37121 swap_const_vector_halves (rhs);
37122 if (dump_file)
37123 fprintf (dump_file, "Swapping constant halves in insn %d\n", i);
37124 break;
37125 }
37126 case SH_SUBREG:
37127 /* A subreg of the same size is already safe. For subregs that
37128 select a smaller portion of a reg, adjust the index for
37129 swapped doublewords. */
37130 adjust_subreg_index (body);
37131 if (dump_file)
37132 fprintf (dump_file, "Adjusting subreg in insn %d\n", i);
37133 break;
37134 case SH_NOSWAP_LD:
37135 /* Convert a non-permuting load to a permuting one. */
37136 permute_load (insn);
37137 break;
37138 case SH_NOSWAP_ST:
37139 /* Convert a non-permuting store to a permuting one. */
37140 permute_store (insn);
37141 break;
37142 case SH_EXTRACT:
37143 /* Change the lane on an extract operation. */
37144 adjust_extract (insn);
37145 break;
37146 case SH_SPLAT:
37147 /* Change the lane on a direct-splat operation. */
37148 adjust_splat (insn);
37149 break;
37150 case SH_XXPERMDI:
37151 /* Change the lanes on an XXPERMDI operation. */
37152 adjust_xxpermdi (insn);
37153 break;
37154 case SH_CONCAT:
37155 /* Reverse the order of a concatenation operation. */
37156 adjust_concat (insn);
37157 break;
37158 case SH_VPERM:
37159 /* Change the mask loaded from the constant pool for a VPERM. */
37160 adjust_vperm (insn);
37161 break;
37162 }
37163 }
37164
37165 /* Find the insn from the Ith table entry, which is known to be a
37166 register swap Y = SWAP(X). Replace it with a copy Y = X. */
37167 static void
37168 replace_swap_with_copy (swap_web_entry *insn_entry, unsigned i)
37169 {
37170 rtx_insn *insn = insn_entry[i].insn;
37171 rtx body = PATTERN (insn);
37172 rtx src_reg = XEXP (SET_SRC (body), 0);
37173 rtx copy = gen_rtx_SET (SET_DEST (body), src_reg);
37174 rtx_insn *new_insn = emit_insn_before (copy, insn);
37175 set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn));
37176 df_insn_rescan (new_insn);
37177
37178 if (dump_file)
37179 {
37180 unsigned int new_uid = INSN_UID (new_insn);
37181 fprintf (dump_file, "Replacing swap %d with copy %d\n", i, new_uid);
37182 }
37183
37184 df_insn_delete (insn);
37185 remove_insn (insn);
37186 insn->set_deleted ();
37187 }
37188
37189 /* Dump the swap table to DUMP_FILE. */
37190 static void
37191 dump_swap_insn_table (swap_web_entry *insn_entry)
37192 {
37193 int e = get_max_uid ();
37194 fprintf (dump_file, "\nRelevant insns with their flag settings\n\n");
37195
37196 for (int i = 0; i < e; ++i)
37197 if (insn_entry[i].is_relevant)
37198 {
37199 swap_web_entry *pred_entry = (swap_web_entry *)insn_entry[i].pred ();
37200 fprintf (dump_file, "%6d %6d ", i,
37201 pred_entry && pred_entry->insn
37202 ? INSN_UID (pred_entry->insn) : 0);
37203 if (insn_entry[i].is_load)
37204 fputs ("load ", dump_file);
37205 if (insn_entry[i].is_store)
37206 fputs ("store ", dump_file);
37207 if (insn_entry[i].is_swap)
37208 fputs ("swap ", dump_file);
37209 if (insn_entry[i].is_live_in)
37210 fputs ("live-in ", dump_file);
37211 if (insn_entry[i].is_live_out)
37212 fputs ("live-out ", dump_file);
37213 if (insn_entry[i].contains_subreg)
37214 fputs ("subreg ", dump_file);
37215 if (insn_entry[i].is_128_int)
37216 fputs ("int128 ", dump_file);
37217 if (insn_entry[i].is_call)
37218 fputs ("call ", dump_file);
37219 if (insn_entry[i].is_swappable)
37220 {
37221 fputs ("swappable ", dump_file);
37222 if (insn_entry[i].special_handling == SH_CONST_VECTOR)
37223 fputs ("special:constvec ", dump_file);
37224 else if (insn_entry[i].special_handling == SH_SUBREG)
37225 fputs ("special:subreg ", dump_file);
37226 else if (insn_entry[i].special_handling == SH_NOSWAP_LD)
37227 fputs ("special:load ", dump_file);
37228 else if (insn_entry[i].special_handling == SH_NOSWAP_ST)
37229 fputs ("special:store ", dump_file);
37230 else if (insn_entry[i].special_handling == SH_EXTRACT)
37231 fputs ("special:extract ", dump_file);
37232 else if (insn_entry[i].special_handling == SH_SPLAT)
37233 fputs ("special:splat ", dump_file);
37234 else if (insn_entry[i].special_handling == SH_XXPERMDI)
37235 fputs ("special:xxpermdi ", dump_file);
37236 else if (insn_entry[i].special_handling == SH_CONCAT)
37237 fputs ("special:concat ", dump_file);
37238 else if (insn_entry[i].special_handling == SH_VPERM)
37239 fputs ("special:vperm ", dump_file);
37240 }
37241 if (insn_entry[i].web_not_optimizable)
37242 fputs ("unoptimizable ", dump_file);
37243 if (insn_entry[i].will_delete)
37244 fputs ("delete ", dump_file);
37245 fputs ("\n", dump_file);
37246 }
37247 fputs ("\n", dump_file);
37248 }
37249
37250 /* Main entry point for this pass. */
37251 unsigned int
37252 rs6000_analyze_swaps (function *fun)
37253 {
37254 swap_web_entry *insn_entry;
37255 basic_block bb;
37256 rtx_insn *insn;
37257
37258 /* Dataflow analysis for use-def chains. */
37259 df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
37260 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
37261 df_analyze ();
37262 df_set_flags (DF_DEFER_INSN_RESCAN);
37263
37264 /* Allocate structure to represent webs of insns. */
37265 insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ());
37266
37267 /* Walk the insns to gather basic data. */
37268 FOR_ALL_BB_FN (bb, fun)
37269 FOR_BB_INSNS (bb, insn)
37270 {
37271 unsigned int uid = INSN_UID (insn);
37272 if (NONDEBUG_INSN_P (insn))
37273 {
37274 insn_entry[uid].insn = insn;
37275
37276 if (GET_CODE (insn) == CALL_INSN)
37277 insn_entry[uid].is_call = 1;
37278
37279 /* Walk the uses and defs to see if we mention vector regs.
37280 Record any constraints on optimization of such mentions. */
37281 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
37282 df_ref mention;
37283 FOR_EACH_INSN_INFO_USE (mention, insn_info)
37284 {
37285 /* We use DF_REF_REAL_REG here to get inside any subregs. */
37286 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
37287
37288 /* If a use gets its value from a call insn, it will be
37289 a hard register and will look like (reg:V4SI 3 3).
37290 The df analysis creates two mentions for GPR3 and GPR4,
37291 both DImode. We must recognize this and treat it as a
37292 vector mention to ensure the call is unioned with this
37293 use. */
37294 if (mode == DImode && DF_REF_INSN_INFO (mention))
37295 {
37296 rtx feeder = DF_REF_INSN (mention);
37297 /* FIXME: It is pretty hard to get from the df mention
37298 to the mode of the use in the insn. We arbitrarily
37299 pick a vector mode here, even though the use might
37300 be a real DImode. We can be too conservative
37301 (create a web larger than necessary) because of
37302 this, so consider eventually fixing this. */
37303 if (GET_CODE (feeder) == CALL_INSN)
37304 mode = V4SImode;
37305 }
37306
37307 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
37308 {
37309 insn_entry[uid].is_relevant = 1;
37310 if (mode == TImode || mode == V1TImode
37311 || FLOAT128_VECTOR_P (mode))
37312 insn_entry[uid].is_128_int = 1;
37313 if (DF_REF_INSN_INFO (mention))
37314 insn_entry[uid].contains_subreg
37315 = !rtx_equal_p (DF_REF_REG (mention),
37316 DF_REF_REAL_REG (mention));
37317 union_defs (insn_entry, insn, mention);
37318 }
37319 }
37320 FOR_EACH_INSN_INFO_DEF (mention, insn_info)
37321 {
37322 /* We use DF_REF_REAL_REG here to get inside any subregs. */
37323 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
37324
37325 /* If we're loading up a hard vector register for a call,
37326 it looks like (set (reg:V4SI 9 9) (...)). The df
37327 analysis creates two mentions for GPR9 and GPR10, both
37328 DImode. So relying on the mode from the mentions
37329 isn't sufficient to ensure we union the call into the
37330 web with the parameter setup code. */
37331 if (mode == DImode && GET_CODE (insn) == SET
37332 && ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (SET_DEST (insn))))
37333 mode = GET_MODE (SET_DEST (insn));
37334
37335 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
37336 {
37337 insn_entry[uid].is_relevant = 1;
37338 if (mode == TImode || mode == V1TImode
37339 || FLOAT128_VECTOR_P (mode))
37340 insn_entry[uid].is_128_int = 1;
37341 if (DF_REF_INSN_INFO (mention))
37342 insn_entry[uid].contains_subreg
37343 = !rtx_equal_p (DF_REF_REG (mention),
37344 DF_REF_REAL_REG (mention));
37345 /* REG_FUNCTION_VALUE_P is not valid for subregs. */
37346 else if (REG_FUNCTION_VALUE_P (DF_REF_REG (mention)))
37347 insn_entry[uid].is_live_out = 1;
37348 union_uses (insn_entry, insn, mention);
37349 }
37350 }
37351
37352 if (insn_entry[uid].is_relevant)
37353 {
37354 /* Determine if this is a load or store. */
37355 insn_entry[uid].is_load = insn_is_load_p (insn);
37356 insn_entry[uid].is_store = insn_is_store_p (insn);
37357
37358 /* Determine if this is a doubleword swap. If not,
37359 determine whether it can legally be swapped. */
37360 if (insn_is_swap_p (insn))
37361 insn_entry[uid].is_swap = 1;
37362 else
37363 {
37364 unsigned int special = SH_NONE;
37365 insn_entry[uid].is_swappable
37366 = insn_is_swappable_p (insn_entry, insn, &special);
37367 if (special != SH_NONE && insn_entry[uid].contains_subreg)
37368 insn_entry[uid].is_swappable = 0;
37369 else if (special != SH_NONE)
37370 insn_entry[uid].special_handling = special;
37371 else if (insn_entry[uid].contains_subreg)
37372 insn_entry[uid].special_handling = SH_SUBREG;
37373 }
37374 }
37375 }
37376 }
37377
37378 if (dump_file)
37379 {
37380 fprintf (dump_file, "\nSwap insn entry table when first built\n");
37381 dump_swap_insn_table (insn_entry);
37382 }
37383
37384 /* Record unoptimizable webs. */
37385 unsigned e = get_max_uid (), i;
37386 for (i = 0; i < e; ++i)
37387 {
37388 if (!insn_entry[i].is_relevant)
37389 continue;
37390
37391 swap_web_entry *root
37392 = (swap_web_entry*)(&insn_entry[i])->unionfind_root ();
37393
37394 if (insn_entry[i].is_live_in || insn_entry[i].is_live_out
37395 || (insn_entry[i].contains_subreg
37396 && insn_entry[i].special_handling != SH_SUBREG)
37397 || insn_entry[i].is_128_int || insn_entry[i].is_call
37398 || !(insn_entry[i].is_swappable || insn_entry[i].is_swap))
37399 root->web_not_optimizable = 1;
37400
37401 /* If we have loads or stores that aren't permuting then the
37402 optimization isn't appropriate. */
37403 else if ((insn_entry[i].is_load || insn_entry[i].is_store)
37404 && !insn_entry[i].is_swap && !insn_entry[i].is_swappable)
37405 root->web_not_optimizable = 1;
37406
37407 /* If we have permuting loads or stores that are not accompanied
37408 by a register swap, the optimization isn't appropriate. */
37409 else if (insn_entry[i].is_load && insn_entry[i].is_swap)
37410 {
37411 rtx insn = insn_entry[i].insn;
37412 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
37413 df_ref def;
37414
37415 FOR_EACH_INSN_INFO_DEF (def, insn_info)
37416 {
37417 struct df_link *link = DF_REF_CHAIN (def);
37418
37419 if (!chain_contains_only_swaps (insn_entry, link, FOR_LOADS))
37420 {
37421 root->web_not_optimizable = 1;
37422 break;
37423 }
37424 }
37425 }
37426 else if (insn_entry[i].is_store && insn_entry[i].is_swap)
37427 {
37428 rtx insn = insn_entry[i].insn;
37429 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
37430 df_ref use;
37431
37432 FOR_EACH_INSN_INFO_USE (use, insn_info)
37433 {
37434 struct df_link *link = DF_REF_CHAIN (use);
37435
37436 if (!chain_contains_only_swaps (insn_entry, link, FOR_STORES))
37437 {
37438 root->web_not_optimizable = 1;
37439 break;
37440 }
37441 }
37442 }
37443 }
37444
37445 if (dump_file)
37446 {
37447 fprintf (dump_file, "\nSwap insn entry table after web analysis\n");
37448 dump_swap_insn_table (insn_entry);
37449 }
37450
37451 /* For each load and store in an optimizable web (which implies
37452 the loads and stores are permuting), find the associated
37453 register swaps and mark them for removal. Due to various
37454 optimizations we may mark the same swap more than once. Also
37455 perform special handling for swappable insns that require it. */
37456 for (i = 0; i < e; ++i)
37457 if ((insn_entry[i].is_load || insn_entry[i].is_store)
37458 && insn_entry[i].is_swap)
37459 {
37460 swap_web_entry* root_entry
37461 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
37462 if (!root_entry->web_not_optimizable)
37463 mark_swaps_for_removal (insn_entry, i);
37464 }
37465 else if (insn_entry[i].is_swappable && insn_entry[i].special_handling)
37466 {
37467 swap_web_entry* root_entry
37468 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
37469 if (!root_entry->web_not_optimizable)
37470 handle_special_swappables (insn_entry, i);
37471 }
37472
37473 /* Now delete the swaps marked for removal. */
37474 for (i = 0; i < e; ++i)
37475 if (insn_entry[i].will_delete)
37476 replace_swap_with_copy (insn_entry, i);
37477
37478 /* Clean up. */
37479 free (insn_entry);
37480 return 0;
37481 }
37482
37483 const pass_data pass_data_analyze_swaps =
37484 {
37485 RTL_PASS, /* type */
37486 "swaps", /* name */
37487 OPTGROUP_NONE, /* optinfo_flags */
37488 TV_NONE, /* tv_id */
37489 0, /* properties_required */
37490 0, /* properties_provided */
37491 0, /* properties_destroyed */
37492 0, /* todo_flags_start */
37493 TODO_df_finish, /* todo_flags_finish */
37494 };
37495
37496 class pass_analyze_swaps : public rtl_opt_pass
37497 {
37498 public:
37499 pass_analyze_swaps(gcc::context *ctxt)
37500 : rtl_opt_pass(pass_data_analyze_swaps, ctxt)
37501 {}
37502
37503 /* opt_pass methods: */
37504 virtual bool gate (function *)
37505 {
37506 return (optimize > 0 && !BYTES_BIG_ENDIAN && TARGET_VSX
37507 && rs6000_optimize_swaps);
37508 }
37509
37510 virtual unsigned int execute (function *fun)
37511 {
37512 return rs6000_analyze_swaps (fun);
37513 }
37514
37515 }; // class pass_analyze_swaps
37516
37517 rtl_opt_pass *
37518 make_pass_analyze_swaps (gcc::context *ctxt)
37519 {
37520 return new pass_analyze_swaps (ctxt);
37521 }
37522
37523 #ifdef RS6000_GLIBC_ATOMIC_FENV
37524 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
37525 static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl;
37526 #endif
37527
37528 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
37529
37530 static void
37531 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
37532 {
37533 if (!TARGET_HARD_FLOAT || !TARGET_FPRS)
37534 {
37535 #ifdef RS6000_GLIBC_ATOMIC_FENV
37536 if (atomic_hold_decl == NULL_TREE)
37537 {
37538 atomic_hold_decl
37539 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
37540 get_identifier ("__atomic_feholdexcept"),
37541 build_function_type_list (void_type_node,
37542 double_ptr_type_node,
37543 NULL_TREE));
37544 TREE_PUBLIC (atomic_hold_decl) = 1;
37545 DECL_EXTERNAL (atomic_hold_decl) = 1;
37546 }
37547
37548 if (atomic_clear_decl == NULL_TREE)
37549 {
37550 atomic_clear_decl
37551 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
37552 get_identifier ("__atomic_feclearexcept"),
37553 build_function_type_list (void_type_node,
37554 NULL_TREE));
37555 TREE_PUBLIC (atomic_clear_decl) = 1;
37556 DECL_EXTERNAL (atomic_clear_decl) = 1;
37557 }
37558
37559 tree const_double = build_qualified_type (double_type_node,
37560 TYPE_QUAL_CONST);
37561 tree const_double_ptr = build_pointer_type (const_double);
37562 if (atomic_update_decl == NULL_TREE)
37563 {
37564 atomic_update_decl
37565 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
37566 get_identifier ("__atomic_feupdateenv"),
37567 build_function_type_list (void_type_node,
37568 const_double_ptr,
37569 NULL_TREE));
37570 TREE_PUBLIC (atomic_update_decl) = 1;
37571 DECL_EXTERNAL (atomic_update_decl) = 1;
37572 }
37573
37574 tree fenv_var = create_tmp_var_raw (double_type_node);
37575 TREE_ADDRESSABLE (fenv_var) = 1;
37576 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node, fenv_var);
37577
37578 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
37579 *clear = build_call_expr (atomic_clear_decl, 0);
37580 *update = build_call_expr (atomic_update_decl, 1,
37581 fold_convert (const_double_ptr, fenv_addr));
37582 #endif
37583 return;
37584 }
37585
37586 tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
37587 tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
37588 tree call_mffs = build_call_expr (mffs, 0);
37589
37590 /* Generates the equivalent of feholdexcept (&fenv_var)
37591
37592 *fenv_var = __builtin_mffs ();
37593 double fenv_hold;
37594 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
37595 __builtin_mtfsf (0xff, fenv_hold); */
37596
37597 /* Mask to clear everything except for the rounding modes and non-IEEE
37598 arithmetic flag. */
37599 const unsigned HOST_WIDE_INT hold_exception_mask =
37600 HOST_WIDE_INT_C (0xffffffff00000007);
37601
37602 tree fenv_var = create_tmp_var_raw (double_type_node);
37603
37604 tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, call_mffs);
37605
37606 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
37607 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
37608 build_int_cst (uint64_type_node,
37609 hold_exception_mask));
37610
37611 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
37612 fenv_llu_and);
37613
37614 tree hold_mtfsf = build_call_expr (mtfsf, 2,
37615 build_int_cst (unsigned_type_node, 0xff),
37616 fenv_hold_mtfsf);
37617
37618 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
37619
37620 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
37621
37622 double fenv_clear = __builtin_mffs ();
37623 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
37624 __builtin_mtfsf (0xff, fenv_clear); */
37625
37626 /* Mask to clear everything except for the rounding modes and non-IEEE
37627 arithmetic flag. */
37628 const unsigned HOST_WIDE_INT clear_exception_mask =
37629 HOST_WIDE_INT_C (0xffffffff00000000);
37630
37631 tree fenv_clear = create_tmp_var_raw (double_type_node);
37632
37633 tree clear_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_clear, call_mffs);
37634
37635 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
37636 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
37637 fenv_clean_llu,
37638 build_int_cst (uint64_type_node,
37639 clear_exception_mask));
37640
37641 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
37642 fenv_clear_llu_and);
37643
37644 tree clear_mtfsf = build_call_expr (mtfsf, 2,
37645 build_int_cst (unsigned_type_node, 0xff),
37646 fenv_clear_mtfsf);
37647
37648 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
37649
37650 /* Generates the equivalent of feupdateenv (&fenv_var)
37651
37652 double old_fenv = __builtin_mffs ();
37653 double fenv_update;
37654 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
37655 (*(uint64_t*)fenv_var 0x1ff80fff);
37656 __builtin_mtfsf (0xff, fenv_update); */
37657
37658 const unsigned HOST_WIDE_INT update_exception_mask =
37659 HOST_WIDE_INT_C (0xffffffff1fffff00);
37660 const unsigned HOST_WIDE_INT new_exception_mask =
37661 HOST_WIDE_INT_C (0x1ff80fff);
37662
37663 tree old_fenv = create_tmp_var_raw (double_type_node);
37664 tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, call_mffs);
37665
37666 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
37667 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
37668 build_int_cst (uint64_type_node,
37669 update_exception_mask));
37670
37671 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
37672 build_int_cst (uint64_type_node,
37673 new_exception_mask));
37674
37675 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
37676 old_llu_and, new_llu_and);
37677
37678 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
37679 new_llu_mask);
37680
37681 tree update_mtfsf = build_call_expr (mtfsf, 2,
37682 build_int_cst (unsigned_type_node, 0xff),
37683 fenv_update_mtfsf);
37684
37685 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
37686 }
37687
37688 \f
37689 struct gcc_target targetm = TARGET_INITIALIZER;
37690
37691 #include "gt-rs6000.h"
This page took 1.775529 seconds and 5 git commands to generate.